Skip to content
This repository was archived by the owner on Dec 17, 2019. It is now read-only.

Commit 49fa23a

Browse files
committed
Preserve order of nested headers in TOC
Fixes Python-Markdown#380. The TOC replacement is now seperate from building the TOC, which allows us to iter through the doc in order rather than with the non-order-preserving interparent pethod. This is almost a complete refactor of the run method. Also cleaned up the config stuff. Thanks to @colewerner for the report and test case.
1 parent 86ca8ce commit 49fa23a

File tree

4 files changed

+108
-49
lines changed

4 files changed

+108
-49
lines changed

markdown/extensions/toc.py

Lines changed: 55 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -86,12 +86,44 @@ def order_toc_list(toc_list):
8686

8787

8888
class TocTreeprocessor(Treeprocessor):
89+
def __init__(self, md, config):
90+
super(TocTreeprocessor, self).__init__(md)
91+
92+
self.marker = config["marker"]
93+
self.title = config["title"]
94+
self.slugify = config["slugify"]
95+
self.use_anchors = parseBoolValue(config["anchorlink"])
96+
self.use_permalinks = parseBoolValue(config["permalink"], False)
97+
if self.use_permalinks is None:
98+
self.use_permalinks = config["permalink"]
99+
100+
self.header_rgx = re.compile("[Hh][123456]")
89101

90-
# Iterator wrapper to get parent and child all at once
91102
def iterparent(self, root):
92-
for parent in root.getiterator():
103+
''' Iterator wrapper to get parent and child all at once. '''
104+
for parent in root.iter():
93105
for child in parent:
94106
yield parent, child
107+
108+
def replace_marker(self, root, elem):
109+
''' Replace marker with elem. '''
110+
for (p, c) in self.iterparent(root):
111+
text = ''.join(itertext(c)).strip()
112+
if not text:
113+
continue
114+
115+
# To keep the output from screwing up the
116+
# validation by putting a <div> inside of a <p>
117+
# we actually replace the <p> in its entirety.
118+
# We do not allow the marker inside a header as that
119+
# would causes an enless loop of placing a new TOC
120+
# inside previously generated TOC.
121+
if c.text and c.text.strip() == self.marker and \
122+
not self.header_rgx.match(c.tag) and c.tag not in ['pre', 'code']:
123+
for i in range(len(p)):
124+
if p[i] == c:
125+
p[i] = elem
126+
break
95127

96128
def add_anchor(self, c, elem_id): # @ReservedAssignment
97129
anchor = etree.Element("a")
@@ -116,10 +148,10 @@ def add_permalink(self, c, elem_id):
116148

117149
def build_toc_etree(self, div, toc_list):
118150
# Add title to the div
119-
if self.config["title"]:
151+
if self.title:
120152
header = etree.SubElement(div, "span")
121153
header.attrib["class"] = "toctitle"
122-
header.text = self.config["title"]
154+
header.text = self.title
123155

124156
def build_etree_ul(toc_list, parent):
125157
ul = etree.SubElement(parent, "ul")
@@ -136,62 +168,37 @@ def build_etree_ul(toc_list, parent):
136168
return build_etree_ul(toc_list, div)
137169

138170
def run(self, doc):
139-
140-
div = etree.Element("div")
141-
div.attrib["class"] = "toc"
142-
header_rgx = re.compile("[Hh][123456]")
143-
144-
self.use_anchors = parseBoolValue(self.config["anchorlink"])
145-
self.use_permalinks = parseBoolValue(self.config["permalink"], False)
146-
if self.use_permalinks is None:
147-
self.use_permalinks = self.config["permalink"]
148-
149171
# Get a list of id attributes
150172
used_ids = set()
151-
for c in doc.getiterator():
152-
if "id" in c.attrib:
153-
used_ids.add(c.attrib["id"])
173+
for el in doc.iter():
174+
if "id" in el.attrib:
175+
used_ids.add(el.attrib["id"])
154176

177+
div = etree.Element("div")
178+
div.attrib["class"] = "toc"
179+
self.replace_marker(doc, div)
180+
155181
toc_list = []
156-
for (p, c) in self.iterparent(doc):
157-
text = ''.join(itertext(c)).strip()
158-
if not text:
159-
continue
160-
161-
# To keep the output from screwing up the
162-
# validation by putting a <div> inside of a <p>
163-
# we actually replace the <p> in its entirety.
164-
# We do not allow the marker inside a header as that
165-
# would causes an enless loop of placing a new TOC
166-
# inside previously generated TOC.
167-
if c.text and c.text.strip() == self.config["marker"] and \
168-
not header_rgx.match(c.tag) and c.tag not in ['pre', 'code']:
169-
for i in range(len(p)):
170-
if p[i] == c:
171-
p[i] = div
172-
break
173-
174-
if header_rgx.match(c.tag):
175-
182+
for el in doc.iter():
183+
if self.header_rgx.match(el.tag):
184+
text = ''.join(itertext(el)).strip()
185+
176186
# Do not override pre-existing ids
177-
if "id" not in c.attrib:
187+
if "id" not in el.attrib:
178188
elem_id = stashedHTML2text(text, self.markdown)
179-
elem_id = unique(self.config["slugify"](elem_id, '-'),
180-
used_ids)
181-
c.attrib["id"] = elem_id
189+
elem_id = unique(self.slugify(elem_id, '-'), used_ids)
190+
el.attrib["id"] = elem_id
182191
else:
183-
elem_id = c.attrib["id"]
184-
185-
tag_level = int(c.tag[-1])
192+
elem_id = el.attrib["id"]
186193

187-
toc_list.append({'level': tag_level,
194+
toc_list.append({'level': int(el.tag[-1]),
188195
'id': elem_id,
189196
'name': text})
190197

191198
if self.use_anchors:
192-
self.add_anchor(c, elem_id)
199+
self.add_anchor(el, elem_id)
193200
if self.use_permalinks:
194-
self.add_permalink(c, elem_id)
201+
self.add_permalink(el, elem_id)
195202

196203
toc_list_nested = order_toc_list(toc_list)
197204
self.build_toc_etree(div, toc_list_nested)
@@ -235,8 +242,7 @@ def extendMarkdown(self, md, md_globals):
235242
md.registerExtension(self)
236243
self.md = md
237244
self.reset()
238-
tocext = self.TreeProcessorClass(md)
239-
tocext.config = self.getConfigs()
245+
tocext = self.TreeProcessorClass(md, self.getConfigs())
240246
# Headerid ext is set to '>prettify'. With this set to '_end',
241247
# it should always come after headerid ext (and honor ids assinged
242248
# by the header id extension) if both are used. Same goes for

tests/extensions/test.cfg

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,10 @@ toc_nested2:
3636
markdown.extensions.toc:
3737
permalink: "[link]"
3838

39+
toc_nested_list:
40+
extensions:
41+
- markdown.extensions.toc
42+
3943
wikilinks:
4044
extensions:
4145
- markdown.extensions.wikilinks
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
<h1 id="title">Title</h1>
2+
<div class="toc">
3+
<ul>
4+
<li><a href="#title">Title</a><ul>
5+
<li><a href="#section-1">Section 1</a><ul>
6+
<li><a href="#subsection-1">Subsection 1</a></li>
7+
<li><a href="#subsection-2">Subsection 2</a></li>
8+
</ul>
9+
</li>
10+
<li><a href="#section-2">Section 2</a></li>
11+
<li><a href="#section-3">Section 3</a></li>
12+
</ul>
13+
</li>
14+
</ul>
15+
</div>
16+
<h2 id="section-1">Section 1</h2>
17+
<ol>
18+
<li>
19+
<p>List Item 1</p>
20+
<h3 id="subsection-1">Subsection 1</h3>
21+
<p>Explanation 1</p>
22+
</li>
23+
<li>
24+
<p>List Item 2</p>
25+
<h3 id="subsection-2">Subsection 2</h3>
26+
<p>Explanation 2</p>
27+
</li>
28+
</ol>
29+
<h2 id="section-2">Section 2</h2>
30+
<h2 id="section-3">Section 3</h2>
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
# Title
2+
3+
[TOC]
4+
5+
## Section 1
6+
7+
1. List Item 1
8+
9+
### Subsection 1
10+
Explanation 1
11+
12+
2. List Item 2
13+
14+
### Subsection 2
15+
Explanation 2
16+
17+
## Section 2
18+
19+
## Section 3

0 commit comments

Comments
 (0)