Skip to content

Commit 24e6b2f

Browse files
wetorAlir3z4
andauthored
Support tri-backquote style code block and fix ordered list indent (#431)
Co-authored-by: Alireza Savand <[email protected]>
1 parent 2723f84 commit 24e6b2f

File tree

11 files changed

+159
-14
lines changed

11 files changed

+159
-14
lines changed

html2text/__init__.py

Lines changed: 30 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,7 @@ def __init__(
7878
self.use_automatic_links = config.USE_AUTOMATIC_LINKS # covered in cli
7979
self.hide_strikethrough = False # covered in cli
8080
self.mark_code = config.MARK_CODE
81+
self.backquote_code_style = config.BACKQUOTE_CODE_STYLE
8182
self.wrap_list_items = config.WRAP_LIST_ITEMS # covered in cli
8283
self.wrap_links = config.WRAP_LINKS # covered in cli
8384
self.wrap_tables = config.WRAP_TABLES
@@ -111,6 +112,8 @@ def __init__(
111112
self.blockquote = 0
112113
self.pre = False
113114
self.startpre = False
115+
self.pre_indent = ""
116+
self.list_code_indent = ""
114117
self.code = False
115118
self.quote = False
116119
self.br_toggle = ""
@@ -629,6 +632,7 @@ def link_url(self: HTML2Text, link: str, title: str = "") -> None:
629632
self.lastWasList = False
630633

631634
if tag == "li":
635+
self.list_code_indent = ""
632636
self.pbr()
633637
if start:
634638
if self.list:
@@ -644,15 +648,16 @@ def link_url(self: HTML2Text, link: str, title: str = "") -> None:
644648
# TODO: line up <ol><li>s > 9 correctly.
645649
parent_list = None
646650
for list in self.list:
647-
self.o(
648-
" " if parent_list == "ol" and list.name == "ul" else " "
649-
)
651+
self.list_code_indent += " " if parent_list == "ol" else " "
650652
parent_list = list.name
653+
self.o(self.list_code_indent)
651654

652655
if li.name == "ul":
656+
self.list_code_indent += " "
653657
self.o(self.ul_item_mark + " ")
654658
elif li.name == "ol":
655659
li.num += 1
660+
self.list_code_indent += " "
656661
self.o(str(li.num) + ". ")
657662
self.start = True
658663

@@ -715,8 +720,11 @@ def link_url(self: HTML2Text, link: str, title: str = "") -> None:
715720
if start:
716721
self.startpre = True
717722
self.pre = True
723+
self.pre_indent = ""
718724
else:
719725
self.pre = False
726+
if self.backquote_code_style:
727+
self.out("\n" + self.pre_indent + "```")
720728
if self.mark_code:
721729
self.out("\n[/code]")
722730
self.p()
@@ -786,17 +794,23 @@ def o(
786794
bq += " "
787795

788796
if self.pre:
789-
if not self.list:
797+
if self.list:
798+
bq += self.list_code_indent
799+
800+
if not self.backquote_code_style:
790801
bq += " "
791-
# else: list content is already partially indented
792-
bq += " " * len(self.list)
802+
793803
data = data.replace("\n", "\n" + bq)
804+
self.pre_indent = bq
794805

795806
if self.startpre:
796807
self.startpre = False
797-
if self.list:
808+
if self.backquote_code_style:
809+
self.out("\n" + self.pre_indent + "```")
810+
self.p_p = 0
811+
elif self.list:
798812
# use existing initial indentation
799-
data = data.lstrip("\n")
813+
data = data.lstrip("\n" + self.pre_indent)
800814

801815
if self.start:
802816
self.space = False
@@ -952,8 +966,15 @@ def optwrap(self, text: str) -> str:
952966
# because of the presence of a link in it
953967
if not self.wrap_links:
954968
self.inline_links = False
969+
start_code = False
955970
for para in text.split("\n"):
956-
if len(para) > 0:
971+
# If the text is between tri-backquote pairs, it's a code block;
972+
# don't wrap
973+
if self.backquote_code_style and para.lstrip().startswith("```"):
974+
start_code = not start_code
975+
if start_code:
976+
result += para + "\n"
977+
elif len(para) > 0:
957978
if not skipwrap(
958979
para, self.wrap_links, self.wrap_list_items, self.wrap_tables
959980
):

html2text/cli.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -238,6 +238,13 @@ class bcolors:
238238
default=config.MARK_CODE,
239239
help="Mark program code blocks with [code]...[/code]",
240240
)
241+
p.add_argument(
242+
"--backquote-code-style",
243+
action="store_true",
244+
dest="backquote_code_style",
245+
default=config.BACKQUOTE_CODE_STYLE,
246+
help="Multi line code block using tri-backquote style",
247+
)
241248
p.add_argument(
242249
"--decode-errors",
243250
dest="decode_errors",
@@ -318,6 +325,7 @@ class bcolors:
318325
h.skip_internal_links = args.skip_internal_links
319326
h.links_each_paragraph = args.links_each_paragraph
320327
h.mark_code = args.mark_code
328+
h.backquote_code_style = args.backquote_code_style
321329
h.wrap_links = args.wrap_links
322330
h.wrap_list_items = args.wrap_list_items
323331
h.wrap_tables = args.wrap_tables

html2text/config.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@
4848
IMAGES_WITH_SIZE = False
4949
IGNORE_EMPHASIS = False
5050
MARK_CODE = False
51+
BACKQUOTE_CODE_STYLE = False
5152
DECODE_ERRORS = "strict"
5253
DEFAULT_IMAGE_ALT = ""
5354
PAD_TABLES = False

test/backquote_code_style.html

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
<p><pre>
2+
def func(x):
3+
if x &lt; 1:
4+
return 'a'
5+
return 'b'
6+
</pre></p>
7+
8+
<ul>
9+
<li>unordered</li>
10+
<li>...</li>
11+
<ol>
12+
<li>ordered</li>
13+
<li>code:
14+
<pre>a
15+
b
16+
c</pre>
17+
</li>
18+
<li>...</li>
19+
<ol>
20+
<li>ordered</li>
21+
<li>code:
22+
<pre>d
23+
e
24+
f</pre>
25+
</li>
26+
<li>...</li>
27+
</ol>
28+
<li>end</li>
29+
</ol>
30+
<li>end</li>
31+
</ul>

test/backquote_code_style.md

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
2+
```
3+
def func(x):
4+
if x < 1:
5+
return 'a'
6+
return 'b'
7+
8+
```
9+
10+
* unordered
11+
* ...
12+
1. ordered
13+
2. code:
14+
```
15+
a
16+
b
17+
c
18+
```
19+
20+
3. ...
21+
1. ordered
22+
2. code:
23+
```
24+
d
25+
e
26+
f
27+
```
28+
29+
3. ...
30+
4. end
31+
* end
32+

test/mixed_nested_lists.html

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,3 +18,28 @@
1818
<li>end</li>
1919
</ul>
2020

21+
22+
<ul>
23+
<li>unordered</li>
24+
<li>...</li>
25+
<ol>
26+
<li>ordered</li>
27+
<li>code:
28+
<pre>a
29+
b
30+
c</pre>
31+
</li>
32+
<li>...</li>
33+
<ol>
34+
<li>ordered</li>
35+
<li>code:
36+
<pre>d
37+
e
38+
f</pre>
39+
</li>
40+
<li>...</li>
41+
</ol>
42+
<li>end</li>
43+
</ol>
44+
<li>end</li>
45+
</ul>

test/mixed_nested_lists.md

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,3 +9,25 @@
99
1. ordered
1010
2. ...
1111
* end
12+
13+
* unordered
14+
* ...
15+
1. ordered
16+
2. code:
17+
18+
a
19+
b
20+
c
21+
22+
3. ...
23+
1. ordered
24+
2. code:
25+
26+
d
27+
e
28+
f
29+
30+
3. ...
31+
4. end
32+
* end
33+

test/normal.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,8 @@ text to separate lists
1212

1313
1. now with numbers
1414
2. the prisoner
15-
1. not an _italic number_
16-
2. a **bold human** being
15+
1. not an _italic number_
16+
2. a **bold human** being
1717
3. end
1818

1919
**bold**

test/normal_escape_snob.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,8 @@ text to separate lists
1212

1313
1. now with numbers
1414
2. the prisoner
15-
1. not an _italic number_
16-
2. a **bold human** being
15+
1. not an _italic number_
16+
2. a **bold human** being
1717
3. end
1818

1919
**bold**

test/preformatted_in_list.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
* Run this command:
2-
2+
33
ls -l *.html
44

55
* ?

0 commit comments

Comments
 (0)