Skip to content

Commit ef9a229

Browse files
author
Waylan Limberg
committed
Fixed #47. Improved HRProccessor.\n\nPython's re module does not support atomic grouping, which was slowing the HR regex down if a long HR ended with a non HR char (casing the regex to backtrack). Therefore, we have to simulate atomic grouping. Fortunately, we only need to match end-of-line or end-of-string after the atomic group here, so it was an easy case to simulate. Just remove the '$' from the end of the regex and manualy check using m.end(). The run method was refactored while I was at it, saving us from running the regex twice for each HR.
1 parent c53307a commit ef9a229

File tree

3 files changed

+26
-19
lines changed

3 files changed

+26
-19
lines changed

markdown/blockprocessors.py

Lines changed: 19 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -460,35 +460,36 @@ def run(self, parent, blocks):
460460
class HRProcessor(BlockProcessor):
461461
""" Process Horizontal Rules. """
462462

463-
RE = r'[ ]{0,3}((-+[ ]{0,2}){3,}|(_+[ ]{0,2}){3,}|(\*+[ ]{0,2}){3,})[ ]*'
463+
RE = r'^[ ]{0,3}((-+[ ]{0,2}){3,}|(_+[ ]{0,2}){3,}|(\*+[ ]{0,2}){3,})[ ]*'
464464
# Detect hr on any line of a block.
465-
SEARCH_RE = re.compile(r'(^|\n)%s(\n|$)' % RE)
466-
# Match a hr on a single line of text.
467-
MATCH_RE = re.compile(r'^%s$' % RE)
465+
SEARCH_RE = re.compile(RE, re.MULTILINE)
468466

469467
def test(self, parent, block):
470-
return bool(self.SEARCH_RE.search(block))
468+
m = self.SEARCH_RE.search(block)
469+
# No atomic grouping in python so we simulate it here for performance.
470+
# The regex only matches what would be in the atomic group - the HR.
471+
# Then check if we are at end of block or if next char is a newline.
472+
if m and (m.end() == len(block) or block[m.end()] == '\n'):
473+
# Save match object on class instance so we can use it later.
474+
self.match = m
475+
return True
476+
return False
471477

472478
def run(self, parent, blocks):
473-
lines = blocks.pop(0).split('\n')
474-
prelines = []
479+
block = blocks.pop(0)
475480
# Check for lines in block before hr.
476-
for line in lines:
477-
m = self.MATCH_RE.match(line)
478-
if m:
479-
break
480-
else:
481-
prelines.append(line)
482-
if len(prelines):
481+
prelines = block[:self.match.start()].rstrip('\n')
482+
if prelines:
483483
# Recursively parse lines before hr so they get parsed first.
484-
self.parser.parseBlocks(parent, ['\n'.join(prelines)])
484+
self.parser.parseBlocks(parent, [prelines])
485485
# create hr
486486
hr = util.etree.SubElement(parent, 'hr')
487487
# check for lines in block after hr.
488-
lines = lines[len(prelines)+1:]
489-
if len(lines):
488+
postlines = block[self.match.end():].lstrip('\n')
489+
if postlines:
490490
# Add lines after hr to master blocks for later parsing.
491-
blocks.insert(0, '\n'.join(lines))
491+
blocks.insert(0, postlines)
492+
492493

493494

494495
class EmptyBlockProcessor(BlockProcessor):

tests/misc/para-with-hr.html

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,6 @@
11
<p>Here is a paragraph, followed by a horizontal rule.</p>
22
<hr />
3-
<p>Followed by another paragraph.</p>
3+
<p>Followed by another paragraph.</p>
4+
<p>Here is another paragraph, followed by:
5+
*** not an HR.
6+
Followed by more of the same paragraph.</p>

tests/misc/para-with-hr.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,3 +2,6 @@ Here is a paragraph, followed by a horizontal rule.
22
***
33
Followed by another paragraph.
44

5+
Here is another paragraph, followed by:
6+
*** not an HR.
7+
Followed by more of the same paragraph.

0 commit comments

Comments
 (0)