|
19 | 19 | from __future__ import unicode_literals
|
20 | 20 | from . import Extension
|
21 | 21 | from ..blockprocessors import BlockProcessor
|
22 |
| -from ..inlinepatterns import BacktickPattern, BACKTICK_RE |
23 | 22 | from ..util import etree
|
| 23 | +import re |
24 | 24 |
|
25 | 25 |
|
26 | 26 | class TableProcessor(BlockProcessor):
|
27 | 27 | """ Process Tables. """
|
28 | 28 |
|
| 29 | + RE_CODE_PIPES = re.compile(r'(?:(\\\\)|(`+)|(\\\|)|(\|))') |
| 30 | + |
29 | 31 | def test(self, parent, block):
|
30 | 32 | rows = block.split('\n')
|
31 | 33 | return (len(rows) > 1 and '|' in rows[0] and
|
@@ -88,50 +90,70 @@ def _split_row(self, row, border):
|
88 | 90 | row = row[1:]
|
89 | 91 | if row.endswith('|'):
|
90 | 92 | row = row[:-1]
|
91 |
| - return self._split(row, '|') |
| 93 | + return self._split(row) |
92 | 94 |
|
93 |
| - def _split(self, row, marker): |
| 95 | + def _split(self, row): |
94 | 96 | """ split a row of text with some code into a list of cells. """
|
95 |
| - if self._row_has_unpaired_backticks(row): |
96 |
| - # fallback on old behaviour |
97 |
| - return row.split(marker) |
98 |
| - # modify the backtick pattern to only match at the beginning of the search string |
99 |
| - backtick_pattern = BacktickPattern('^' + BACKTICK_RE) |
100 | 97 | elements = []
|
101 |
| - current = '' |
102 |
| - i = 0 |
103 |
| - while i < len(row): |
104 |
| - letter = row[i] |
105 |
| - if letter == marker: |
106 |
| - if current != '' or len(elements) == 0: |
107 |
| - # Don't append empty string unless it is the first element |
108 |
| - # The border is already removed when we get the row, then the line is strip()'d |
109 |
| - # If the first element is a marker, then we have an empty first cell |
110 |
| - elements.append(current) |
111 |
| - current = '' |
112 |
| - else: |
113 |
| - match = backtick_pattern.getCompiledRegExp().match(row[i:]) |
114 |
| - if not match: |
115 |
| - current += letter |
116 |
| - else: |
117 |
| - groups = match.groups() |
118 |
| - delim = groups[1] # the code block delimeter (ie 1 or more backticks) |
119 |
| - row_contents = groups[2] # the text contained inside the code block |
120 |
| - i += match.start(4) - 1 # jump pointer to the beginning of the rest of the text (group #4) |
121 |
| - element = delim + row_contents + delim # reinstert backticks |
122 |
| - current += element |
123 |
| - i += 1 |
124 |
| - elements.append(current) |
| 98 | + pipes = [] |
| 99 | + tics = [] |
| 100 | + tic_points = [] |
| 101 | + tic_region = [] |
| 102 | + good_pipes = [] |
| 103 | + |
| 104 | + # Parse row |
| 105 | + # Throw out \\, and \| |
| 106 | + for m in self.RE_CODE_PIPES.finditer(row): |
| 107 | + # Store ` data (len, start_pos, end_pos) |
| 108 | + if m.group(2): |
| 109 | + # `+ |
| 110 | + # Store length of each tic group |
| 111 | + tics.append(len(m.group(2))) |
| 112 | + # Store start and end of tic group |
| 113 | + tic_points.append((m.start(2), m.end(2) - 1)) |
| 114 | + # Store pipe location |
| 115 | + elif m.group(4): |
| 116 | + pipes.append(m.start(4)) |
| 117 | + |
| 118 | + # Pair up tics according to size if possible |
| 119 | + # Walk through tic list and see if tic has a close. |
| 120 | + # Store the tic region (start of region, end of region). |
| 121 | + pos = 0 |
| 122 | + tic_len = len(tics) |
| 123 | + while pos < tic_len: |
| 124 | + try: |
| 125 | + index = tics[pos + 1:].index(tics[pos]) + 1 |
| 126 | + tic_region.append((tic_points[pos][0], tic_points[pos + index][1])) |
| 127 | + pos += index + 1 |
| 128 | + except ValueError: |
| 129 | + pos += 1 |
| 130 | + |
| 131 | + # Resolve pipes. Check if they are within a tic pair region. |
| 132 | + # Walk through pipes comparing them to each region. |
| 133 | + # - If pipe position is less that a region, it isn't in a region |
| 134 | + # - If it is within a region, we don't want it, so throw it out |
| 135 | + # - If we didn't throw it out, it must be a table pipe |
| 136 | + for pipe in pipes: |
| 137 | + throw_out = False |
| 138 | + for region in tic_region: |
| 139 | + if pipe < region[0]: |
| 140 | + # Pipe is not in a region |
| 141 | + break |
| 142 | + elif region[0] <= pipe <= region[1]: |
| 143 | + # Pipe is within a code region. Throw it out. |
| 144 | + throw_out = True |
| 145 | + break |
| 146 | + if not throw_out: |
| 147 | + good_pipes.append(pipe) |
| 148 | + |
| 149 | + # Split row according to table delimeters. |
| 150 | + pos = 0 |
| 151 | + for pipe in good_pipes: |
| 152 | + elements.append(row[pos:pipe]) |
| 153 | + pos = pipe + 1 |
| 154 | + elements.append(row[pos:]) |
125 | 155 | return elements
|
126 | 156 |
|
127 |
| - def _row_has_unpaired_backticks(self, row): |
128 |
| - count_total_backtick = row.count('`') |
129 |
| - count_escaped_backtick = row.count('\`') |
130 |
| - count_backtick = count_total_backtick - count_escaped_backtick |
131 |
| - # odd number of backticks, |
132 |
| - # we won't be able to build correct code blocks |
133 |
| - return count_backtick & 1 |
134 |
| - |
135 | 157 |
|
136 | 158 | class TableExtension(Extension):
|
137 | 159 | """ Add tables to Markdown. """
|
|
0 commit comments