Skip to content

Commit a4ceb0b

Browse files
author
Waylan Limberg
committed
HeaderID Ext now handles raw html in ids. Fixes #232
1 parent df8423b commit a4ceb0b

File tree

3 files changed

+32
-2
lines changed

3 files changed

+32
-2
lines changed

markdown/extensions/headerid.py

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,7 @@
7878
from __future__ import unicode_literals
7979
from . import Extension
8080
from ..treeprocessors import Treeprocessor
81+
from ..util import HTML_PLACEHOLDER_RE
8182
import re
8283
import logging
8384
import unicodedata
@@ -130,13 +131,27 @@ def run(self, doc):
130131
start_level, force_id = self._get_meta()
131132
slugify = self.config['slugify']
132133
sep = self.config['separator']
134+
135+
def _html_sub(m):
136+
""" Substitute raw html with plain text. """
137+
try:
138+
raw, safe = self.md.htmlStash.rawHtmlBlocks[int(m.group(1))]
139+
except (IndexError, TypeError):
140+
return m.group(0)
141+
if self.md.safeMode and not safe:
142+
return ''
143+
# Strip out tags and entities - leaveing text
144+
return re.sub(r'(<[^>]+>)|(&[\#a-zA-Z0-9]+;)', '', raw)
145+
133146
for elem in doc.getiterator():
134147
if elem.tag in ['h1', 'h2', 'h3', 'h4', 'h5', 'h6']:
135148
if force_id:
136149
if "id" in elem.attrib:
137150
id = elem.get('id')
138151
else:
139-
id = slugify(''.join(itertext(elem)), sep)
152+
id = HTML_PLACEHOLDER_RE.sub(_html_sub,
153+
''.join(itertext(elem)))
154+
id = slugify(id, sep)
140155
elem.set('id', unique(id, self.IDs))
141156
if start_level:
142157
level = int(elem.tag[-1]) + start_level

markdown/util.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,9 @@
3838
INLINE_PLACEHOLDER = INLINE_PLACEHOLDER_PREFIX + "%s" + ETX
3939
INLINE_PLACEHOLDER_RE = re.compile(INLINE_PLACEHOLDER % r'([0-9]{4})')
4040
AMP_SUBSTITUTE = STX+"amp"+ETX
41+
HTML_PLACEHOLDER = STX + "wzxhzdk:%s" + ETX
42+
HTML_PLACEHOLDER_RE = re.compile(HTML_PLACEHOLDER % r'([0-9]+)')
43+
4144

4245
"""
4346
Constants you probably do not need to change
@@ -132,5 +135,5 @@ def reset(self):
132135
self.rawHtmlBlocks = []
133136

134137
def get_placeholder(self, key):
135-
return "%swzxhzdk:%d%s" % (STX, key, ETX)
138+
return HTML_PLACEHOLDER % key
136139

tests/test_extensions.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -236,6 +236,18 @@ def testHeaderInlineMarkup(self):
236236
'<h1 id="some-header-with-markup">Some <em>Header</em> with '
237237
'<a href="http://example.com">markup</a>.</h1>')
238238

239+
def testHtmlEntities(self):
240+
""" Test HeaderIDs with HTML Entities. """
241+
text = '# Foo &amp; bar'
242+
self.assertEqual(self.md.convert(text),
243+
'<h1 id="foo-bar">Foo &amp; bar</h1>')
244+
245+
def testRawHtml(self):
246+
""" Test HeaderIDs with raw HTML. """
247+
text = '# Foo <b>Bar</b> Baz.'
248+
self.assertEqual(self.md.convert(text),
249+
'<h1 id="foo-bar-baz">Foo <b>Bar</b> Baz.</h1>')
250+
239251
def testNoAutoIds(self):
240252
""" Test HeaderIDs with no auto generated IDs. """
241253

0 commit comments

Comments
 (0)