Skip to content

Commit dda3311

Browse files
facelessuserwaylan
authored andcommitted
Better handling of backticks in tables (#524)
At some point the logic of counting backticks and determining if they are odd or even was used to parse a row's text into cells. Unfortunately this approach broke expected code parsing logic in a table. We essentially traded one bug for another. This fixes table backtick handling and restores sane backtick logic while preserving existing fixes. (issue #449)
1 parent c10cfff commit dda3311

File tree

3 files changed

+102
-41
lines changed

3 files changed

+102
-41
lines changed

markdown/extensions/tables.py

Lines changed: 62 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -19,13 +19,15 @@
1919
from __future__ import unicode_literals
2020
from . import Extension
2121
from ..blockprocessors import BlockProcessor
22-
from ..inlinepatterns import BacktickPattern, BACKTICK_RE
2322
from ..util import etree
23+
import re
2424

2525

2626
class TableProcessor(BlockProcessor):
2727
""" Process Tables. """
2828

29+
RE_CODE_PIPES = re.compile(r'(?:(\\\\)|(`+)|(\\\|)|(\|))')
30+
2931
def test(self, parent, block):
3032
rows = block.split('\n')
3133
return (len(rows) > 1 and '|' in rows[0] and
@@ -88,50 +90,70 @@ def _split_row(self, row, border):
8890
row = row[1:]
8991
if row.endswith('|'):
9092
row = row[:-1]
91-
return self._split(row, '|')
93+
return self._split(row)
9294

93-
def _split(self, row, marker):
95+
def _split(self, row):
9496
""" split a row of text with some code into a list of cells. """
95-
if self._row_has_unpaired_backticks(row):
96-
# fallback on old behaviour
97-
return row.split(marker)
98-
# modify the backtick pattern to only match at the beginning of the search string
99-
backtick_pattern = BacktickPattern('^' + BACKTICK_RE)
10097
elements = []
101-
current = ''
102-
i = 0
103-
while i < len(row):
104-
letter = row[i]
105-
if letter == marker:
106-
if current != '' or len(elements) == 0:
107-
# Don't append empty string unless it is the first element
108-
# The border is already removed when we get the row, then the line is strip()'d
109-
# If the first element is a marker, then we have an empty first cell
110-
elements.append(current)
111-
current = ''
112-
else:
113-
match = backtick_pattern.getCompiledRegExp().match(row[i:])
114-
if not match:
115-
current += letter
116-
else:
117-
groups = match.groups()
118-
delim = groups[1] # the code block delimeter (ie 1 or more backticks)
119-
row_contents = groups[2] # the text contained inside the code block
120-
i += match.start(4) - 1 # jump pointer to the beginning of the rest of the text (group #4)
121-
element = delim + row_contents + delim # reinstert backticks
122-
current += element
123-
i += 1
124-
elements.append(current)
98+
pipes = []
99+
tics = []
100+
tic_points = []
101+
tic_region = []
102+
good_pipes = []
103+
104+
# Parse row
105+
# Throw out \\, and \|
106+
for m in self.RE_CODE_PIPES.finditer(row):
107+
# Store ` data (len, start_pos, end_pos)
108+
if m.group(2):
109+
# `+
110+
# Store length of each tic group
111+
tics.append(len(m.group(2)))
112+
# Store start and end of tic group
113+
tic_points.append((m.start(2), m.end(2) - 1))
114+
# Store pipe location
115+
elif m.group(4):
116+
pipes.append(m.start(4))
117+
118+
# Pair up tics according to size if possible
119+
# Walk through tic list and see if tic has a close.
120+
# Store the tic region (start of region, end of region).
121+
pos = 0
122+
tic_len = len(tics)
123+
while pos < tic_len:
124+
try:
125+
index = tics[pos + 1:].index(tics[pos]) + 1
126+
tic_region.append((tic_points[pos][0], tic_points[pos + index][1]))
127+
pos += index + 1
128+
except ValueError:
129+
pos += 1
130+
131+
# Resolve pipes. Check if they are within a tic pair region.
132+
# Walk through pipes comparing them to each region.
133+
# - If pipe position is less that a region, it isn't in a region
134+
# - If it is within a region, we don't want it, so throw it out
135+
# - If we didn't throw it out, it must be a table pipe
136+
for pipe in pipes:
137+
throw_out = False
138+
for region in tic_region:
139+
if pipe < region[0]:
140+
# Pipe is not in a region
141+
break
142+
elif region[0] <= pipe <= region[1]:
143+
# Pipe is within a code region. Throw it out.
144+
throw_out = True
145+
break
146+
if not throw_out:
147+
good_pipes.append(pipe)
148+
149+
# Split row according to table delimeters.
150+
pos = 0
151+
for pipe in good_pipes:
152+
elements.append(row[pos:pipe])
153+
pos = pipe + 1
154+
elements.append(row[pos:])
125155
return elements
126156

127-
def _row_has_unpaired_backticks(self, row):
128-
count_total_backtick = row.count('`')
129-
count_escaped_backtick = row.count('\`')
130-
count_backtick = count_total_backtick - count_escaped_backtick
131-
# odd number of backticks,
132-
# we won't be able to build correct code blocks
133-
return count_backtick & 1
134-
135157

136158
class TableExtension(Extension):
137159
""" Add tables to Markdown. """

tests/extensions/extra/tables.html

Lines changed: 30 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -255,4 +255,33 @@ <h2>Table Tests</h2>
255255
<ul>
256256
<li>this | should | not</li>
257257
<li>be | a | table</li>
258-
</ul>
258+
</ul>
259+
<p>Add tests for issue #449</p>
260+
<table>
261+
<thead>
262+
<tr>
263+
<th>Odd backticks</th>
264+
<th>Even backticks</th>
265+
</tr>
266+
</thead>
267+
<tbody>
268+
<tr>
269+
<td><code>[!\"\#$%&amp;'()*+,\-./:;&lt;=&gt;?@\[\\\]^_`{|}~]</code></td>
270+
<td><code>[!\"\#$%&amp;'()*+,\-./:;&lt;=&gt;?@\[\\\]^`_`{|}~]</code></td>
271+
</tr>
272+
</tbody>
273+
</table>
274+
<table>
275+
<thead>
276+
<tr>
277+
<th>Escapes</th>
278+
<th>More Escapes</th>
279+
</tr>
280+
</thead>
281+
<tbody>
282+
<tr>
283+
<td><code>`\</code></td>
284+
<td><code>\</code></td>
285+
</tr>
286+
</tbody>
287+
</table>

tests/extensions/extra/tables.txt

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,3 +80,13 @@ Lists are not tables
8080

8181
- this | should | not
8282
- be | a | table
83+
84+
Add tests for issue #449
85+
86+
Odd backticks | Even backticks
87+
------------ | -------------
88+
``[!\"\#$%&'()*+,\-./:;<=>?@\[\\\]^_`{|}~]`` | ``[!\"\#$%&'()*+,\-./:;<=>?@\[\\\]^`_`{|}~]``
89+
90+
Escapes | More Escapes
91+
------- | ------
92+
`` `\`` | `\`

0 commit comments

Comments
 (0)