Skip to content
1 change: 1 addition & 0 deletions .spell-dict
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,7 @@ Treeprocessor
Treeprocessors
tuple
tuples
unparsable
unclosed
unescape
unescaping
Expand Down
4 changes: 3 additions & 1 deletion docs/changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
* Include `scripts/*.py` in the generated source tarballs (#1430).
* Ensure lines after heading in loose list are properly detabbed (#1443).
* Give smarty tree processor higher priority than toc (#1440).
* Permit carrots (`^`) and square brackets (`]`) but explicitly exclude
* Permit carets (`^`) and square brackets (`]`) but explicitly exclude
backslashes (`\`) from abbreviations (#1444).
* In attribute lists (`attr_list`, `fenced_code`), quoted attribute values are
now allowed to contain curly braces (`}`) (#1414).

## [3.5.2] -- 2024-01-10

Expand Down
86 changes: 55 additions & 31 deletions markdown/extensions/attr_list.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,17 +57,30 @@ def _handle_word(s, t):


_scanner = re.Scanner([
(r'[^ =]+=".*?"', _handle_double_quote),
(r"[^ =]+='.*?'", _handle_single_quote),
(r'[^ =]+=[^ =]+', _handle_key_value),
(r'[^ =]+', _handle_word),
(r'[^ =}]+=".*?"', _handle_double_quote),
(r"[^ =}]+='.*?'", _handle_single_quote),
(r'[^ =}]+=[^ =}]+', _handle_key_value),
(r'[^ =}]+', _handle_word),
(r' ', None)
])


def get_attrs(str: str) -> list[tuple[str, str]]:
""" Parse attribute list and return a list of attribute tuples. """
return _scanner.scan(str)[0]
def get_attrs_and_remainder(attrs_string: str) -> tuple[list[tuple[str, str]], str]:
""" Parse attribute list and return a list of attribute tuples.

Additionally, return any text that remained after a curly brace. In typical cases, its presence
should mean that the input does not match the intended attribute list syntax.
"""
attrs, remainder = _scanner.scan(attrs_string)
# To keep historic behavior, discard all unparsable text prior to '}'.
index = remainder.find('}')
remainder = remainder[index:] if index != -1 else ''
return attrs, remainder


def get_attrs(str: str) -> list[tuple[str, str]]: # pragma: no cover
""" Soft-deprecated. Prefer `get_attrs_and_remainder`. """
return get_attrs_and_remainder(str)[0]


def isheader(elem: Element) -> bool:
Expand All @@ -76,7 +89,7 @@ def isheader(elem: Element) -> bool:

class AttrListTreeprocessor(Treeprocessor):

BASE_RE = r'\{\:?[ ]*([^\}\n ][^\}\n]*)[ ]*\}'
BASE_RE = r'\{\:?[ ]*([^\}\n ][^\n]*)[ ]*\}'
HEADER_RE = re.compile(r'[ ]+{}[ ]*$'.format(BASE_RE))
BLOCK_RE = re.compile(r'\n[ ]*{}[ ]*$'.format(BASE_RE))
INLINE_RE = re.compile(r'^{}'.format(BASE_RE))
Expand Down Expand Up @@ -106,49 +119,58 @@ def run(self, doc: Element) -> None:
# use tail of last child. no `ul` or `ol`.
m = RE.search(elem[-1].tail)
if m:
self.assign_attrs(elem, m.group(1))
elem[-1].tail = elem[-1].tail[:m.start()]
if not self.assign_attrs(elem, m.group(1), strict=True):
elem[-1].tail = elem[-1].tail[:m.start()]
elif pos is not None and pos > 0 and elem[pos-1].tail:
# use tail of last child before `ul` or `ol`
m = RE.search(elem[pos-1].tail)
if m:
self.assign_attrs(elem, m.group(1))
elem[pos-1].tail = elem[pos-1].tail[:m.start()]
if not self.assign_attrs(elem, m.group(1), strict=True):
elem[pos-1].tail = elem[pos-1].tail[:m.start()]
elif elem.text:
# use text. `ul` is first child.
m = RE.search(elem.text)
if m:
self.assign_attrs(elem, m.group(1))
elem.text = elem.text[:m.start()]
if not self.assign_attrs(elem, m.group(1), strict=True):
elem.text = elem.text[:m.start()]
elif len(elem) and elem[-1].tail:
# has children. Get from tail of last child
m = RE.search(elem[-1].tail)
if m:
self.assign_attrs(elem, m.group(1))
elem[-1].tail = elem[-1].tail[:m.start()]
if isheader(elem):
# clean up trailing #s
elem[-1].tail = elem[-1].tail.rstrip('#').rstrip()
if not self.assign_attrs(elem, m.group(1), strict=True):
elem[-1].tail = elem[-1].tail[:m.start()]
if isheader(elem):
# clean up trailing #s
elem[-1].tail = elem[-1].tail.rstrip('#').rstrip()
elif elem.text:
# no children. Get from text.
m = RE.search(elem.text)
if m:
self.assign_attrs(elem, m.group(1))
elem.text = elem.text[:m.start()]
if isheader(elem):
# clean up trailing #s
elem.text = elem.text.rstrip('#').rstrip()
if not self.assign_attrs(elem, m.group(1), strict=True):
elem.text = elem.text[:m.start()]
if isheader(elem):
# clean up trailing #s
elem.text = elem.text.rstrip('#').rstrip()
else:
# inline: check for `attrs` at start of tail
if elem.tail:
m = self.INLINE_RE.match(elem.tail)
if m:
self.assign_attrs(elem, m.group(1))
elem.tail = elem.tail[m.end():]
remainder = self.assign_attrs(elem, m.group(1))
elem.tail = elem.tail[m.end():] + remainder

def assign_attrs(self, elem: Element, attrs_string: str, *, strict: bool = False) -> str:
""" Assign `attrs` to element.

If the `attrs_string` has an extra closing curly brace, the remaining text is returned.

The `strict` argument controls whether to still assign `attrs` if there is a remaining `}`.
"""
attrs, remainder = get_attrs_and_remainder(attrs_string)
if strict and remainder:
return remainder

def assign_attrs(self, elem: Element, attrs: str) -> None:
""" Assign `attrs` to element. """
for k, v in get_attrs(attrs):
for k, v in attrs:
if k == '.':
# add to class
cls = elem.get('class')
Expand All @@ -159,11 +181,13 @@ def assign_attrs(self, elem: Element, attrs: str) -> None:
else:
# assign attribute `k` with `v`
elem.set(self.sanitize_name(k), v)
# The text that we initially over-matched will be put back.
return remainder

def sanitize_name(self, name: str) -> str:
"""
Sanitize name as 'an XML Name, minus the ":"'.
See https://www.w3.org/TR/REC-xml-names/#NT-NCName
Sanitize name as 'an XML Name, minus the `:`.'
See <https://www.w3.org/TR/REC-xml-names/#NT-NCName>.
"""
return self.NAME_RE.sub('_', name)

Expand Down
15 changes: 11 additions & 4 deletions markdown/extensions/fenced_code.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
from . import Extension
from ..preprocessors import Preprocessor
from .codehilite import CodeHilite, CodeHiliteExtension, parse_hl_lines
from .attr_list import get_attrs, AttrListExtension
from .attr_list import get_attrs_and_remainder, AttrListExtension
from ..util import parseBoolValue
from ..serializers import _escape_attrib_html
import re
Expand Down Expand Up @@ -56,7 +56,7 @@ class FencedBlockPreprocessor(Preprocessor):
FENCED_BLOCK_RE = re.compile(
dedent(r'''
(?P<fence>^(?:~{3,}|`{3,}))[ ]* # opening fence
((\{(?P<attrs>[^\}\n]*)\})| # (optional {attrs} or
((\{(?P<attrs>[^\n]*)\})| # (optional {attrs} or
(\.?(?P<lang>[\w#.+-]*)[ ]*)? # optional (.)lang
(hl_lines=(?P<quot>"|')(?P<hl_lines>.*?)(?P=quot)[ ]*)?) # optional hl_lines)
\n # newline (end of opening fence)
Expand Down Expand Up @@ -94,12 +94,17 @@ def run(self, lines: list[str]) -> list[str]:
self.checked_for_deps = True

text = "\n".join(lines)
index = 0
while 1:
m = self.FENCED_BLOCK_RE.search(text)
m = self.FENCED_BLOCK_RE.search(text, index)
if m:
lang, id, classes, config = None, '', [], {}
if m.group('attrs'):
id, classes, config = self.handle_attrs(get_attrs(m.group('attrs')))
attrs, remainder = get_attrs_and_remainder(m.group('attrs'))
if remainder: # Does not have correctly matching curly braces, so the syntax is invalid.
index = m.end('attrs') # Explicitly skip over this, to prevent an infinite loop.
continue
id, classes, config = self.handle_attrs(attrs)
if len(classes):
lang = classes.pop(0)
else:
Expand Down Expand Up @@ -151,6 +156,8 @@ def run(self, lines: list[str]) -> list[str]:

placeholder = self.md.htmlStash.store(code)
text = f'{text[:m.start()]}\n{placeholder}\n{text[m.end():]}'
# Continue from after the replaced text in the next iteration.
index = m.start() + 1 + len(placeholder)
else:
break
return text.split("\n")
Expand Down
45 changes: 41 additions & 4 deletions tests/test_syntax/extensions/test_attr_list.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,16 +23,53 @@


class TestAttrList(TestCase):

maxDiff = None
default_kwargs = {'extensions': ['attr_list']}

# TODO: Move the rest of the `attr_list` tests here.

def test_empty_list(self):
def test_empty_attr_list(self):
self.assertMarkdownRenders(
'*foo*{ }',
'<p><em>foo</em>{ }</p>',
extensions=['attr_list']
'<p><em>foo</em>{ }</p>'
)

def test_curly_after_inline(self):
self.assertMarkdownRenders(
'*inline*{.a} } *text*{.a }}',
'<p><em class="a">inline</em> } <em class="a">text</em>}</p>'
)

def test_extra_eq_gets_ignored_inside_curly_inline(self):
# Undesired behavior but kept for historic compatibility.
self.assertMarkdownRenders(
'*inline*{data-test="x" =a} *text*',
'<p><em data-test="x">inline</em> <em>text</em></p>'
)

def test_curly_after_block(self):
self.assertMarkdownRenders(
'# Heading {.a} }',
'<h1>Heading {.a} }</h1>'
)

def test_curly_in_single_quote(self):
self.assertMarkdownRenders(
"# Heading {data-test='{}'}",
'<h1 data-test="{}">Heading</h1>'
)

def test_curly_in_double_quote(self):
self.assertMarkdownRenders(
'# Heading {data-test="{}"}',
'<h1 data-test="{}">Heading</h1>'
)

def test_unclosed_quote_ignored(self):
# Undesired behavior but kept for historic compatibility.
self.assertMarkdownRenders(
'# Heading {foo="bar}',
'<h1 foo="&quot;bar">Heading</h1>'
)

def test_table_td(self):
Expand Down
42 changes: 42 additions & 0 deletions tests/test_syntax/extensions/test_fenced_code.py
Original file line number Diff line number Diff line change
Expand Up @@ -394,6 +394,48 @@ def testFencedCodeEscapedAttrs(self):
extensions=['fenced_code', 'attr_list']
)

def testFencedCodeCurlyInAttrs(self):
self.assertMarkdownRenders(
self.dedent(
'''
``` { data-test="{}" }
# Some python code
```
'''
),
self.dedent(
'''
<pre><code data-test="{}"># Some python code
</code></pre>
'''
),
extensions=['fenced_code', 'attr_list']
)

def testFencedCodeMismatchedCurlyInAttrs(self):
self.assertMarkdownRenders(
self.dedent(
'''
``` { data-test="{}" } }
# Some python code
```
```
test
```
'''
),
self.dedent(
'''
<p>``` { data-test="{}" } }</p>
<h1>Some python code</h1>
<pre><code></code></pre>
<p>test
```</p>
'''
),
extensions=['fenced_code', 'attr_list']
)


class TestFencedCodeWithCodehilite(TestCase):

Expand Down