Python-Markdown · waylan · Mar 12, 2024 · Nov 10, 2023 · Mar 8, 2024 · Mar 8, 2024
diff --git a/.spell-dict b/.spell-dict
@@ -146,6 +146,7 @@ Treeprocessor
 Treeprocessors
 tuple
 tuples
+unparsable
 unclosed
 unescape
 unescaping

diff --git a/docs/changelog.md b/docs/changelog.md
@@ -34,8 +34,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 * Include `scripts/*.py` in the generated source tarballs (#1430).
 * Ensure lines after heading in loose list are properly detabbed (#1443).
 * Give smarty tree processor higher priority than toc (#1440).
-* Permit carrots (`^`) and square brackets (`]`) but explicitly exclude
+* Permit carets (`^`) and square brackets (`]`) but explicitly exclude
  backslashes (`\`) from abbreviations (#1444).
+* In attribute lists (`attr_list`, `fenced_code`), quoted attribute values are
+ now allowed to contain curly braces (`}`) (#1414).
 
 ## [3.5.2] -- 2024-01-10
 

diff --git a/markdown/extensions/attr_list.py b/markdown/extensions/attr_list.py
@@ -57,17 +57,30 @@ def _handle_word(s, t):
 
 
 _scanner = re.Scanner([
- (r'[^ =]+=".*?"', _handle_double_quote),
- (r"[^ =]+='.*?'", _handle_single_quote),
- (r'[^ =]+=[^ =]+', _handle_key_value),
- (r'[^ =]+', _handle_word),
+ (r'[^ =}]+=".*?"', _handle_double_quote),
+ (r"[^ =}]+='.*?'", _handle_single_quote),
+ (r'[^ =}]+=[^ =}]+', _handle_key_value),
+ (r'[^ =}]+', _handle_word),
  (r' ', None)
 ])
 
 
-def get_attrs(str: str) -> list[tuple[str, str]]:
- """ Parse attribute list and return a list of attribute tuples. """
- return _scanner.scan(str)[0]
+def get_attrs_and_remainder(attrs_string: str) -> tuple[list[tuple[str, str]], str]:
+ """ Parse attribute list and return a list of attribute tuples.
+
+ Additionally, return any text that remained after a curly brace. In typical cases, its presence
+ should mean that the input does not match the intended attribute list syntax.
+ """
+ attrs, remainder = _scanner.scan(attrs_string)
+ # To keep historic behavior, discard all unparsable text prior to '}'.
+ index = remainder.find('}')
+ remainder = remainder[index:] if index != -1 else ''
+ return attrs, remainder
+
+
+def get_attrs(str: str) -> list[tuple[str, str]]: # pragma: no cover
+ """ Soft-deprecated. Prefer `get_attrs_and_remainder`. """
+ return get_attrs_and_remainder(str)[0]
 
 
 def isheader(elem: Element) -> bool:
@@ -76,7 +89,7 @@ def isheader(elem: Element) -> bool:
 
 class AttrListTreeprocessor(Treeprocessor):
 
- BASE_RE = r'\{\:?[ ]*([^\}\n ][^\}\n]*)[ ]*\}'
+ BASE_RE = r'\{\:?[ ]*([^\}\n ][^\n]*)[ ]*\}'
  HEADER_RE = re.compile(r'[ ]+{}[ ]*$'.format(BASE_RE))
  BLOCK_RE = re.compile(r'\n[ ]*{}[ ]*$'.format(BASE_RE))
  INLINE_RE = re.compile(r'^{}'.format(BASE_RE))
@@ -106,49 +119,58 @@ def run(self, doc: Element) -> None:
  # use tail of last child. no `ul` or `ol`.
  m = RE.search(elem[-1].tail)
  if m:
- self.assign_attrs(elem, m.group(1))
- elem[-1].tail = elem[-1].tail[:m.start()]
+ if not self.assign_attrs(elem, m.group(1), strict=True):
+  elem[-1].tail = elem[-1].tail[:m.start()]
  elif pos is not None and pos > 0 and elem[pos-1].tail:
  # use tail of last child before `ul` or `ol`
  m = RE.search(elem[pos-1].tail)
  if m:
- self.assign_attrs(elem, m.group(1))
- elem[pos-1].tail = elem[pos-1].tail[:m.start()]
+ if not self.assign_attrs(elem, m.group(1), strict=True):
+  elem[pos-1].tail = elem[pos-1].tail[:m.start()]
  elif elem.text:
  # use text. `ul` is first child.
  m = RE.search(elem.text)
  if m:
- self.assign_attrs(elem, m.group(1))
- elem.text = elem.text[:m.start()]
+ if not self.assign_attrs(elem, m.group(1), strict=True):
+  elem.text = elem.text[:m.start()]
  elif len(elem) and elem[-1].tail:
  # has children. Get from tail of last child
  m = RE.search(elem[-1].tail)
  if m:
- self.assign_attrs(elem, m.group(1))
- elem[-1].tail = elem[-1].tail[:m.start()]
- if isheader(elem):
- # clean up trailing #s
- elem[-1].tail = elem[-1].tail.rstrip('#').rstrip()
+ if not self.assign_attrs(elem, m.group(1), strict=True):
+  elem[-1].tail = elem[-1].tail[:m.start()]
+  if isheader(elem):
+  # clean up trailing #s
+  elem[-1].tail = elem[-1].tail.rstrip('#').rstrip()
  elif elem.text:
  # no children. Get from text.
  m = RE.search(elem.text)
  if m:
- self.assign_attrs(elem, m.group(1))
- elem.text = elem.text[:m.start()]
- if isheader(elem):
- # clean up trailing #s
- elem.text = elem.text.rstrip('#').rstrip()
+ if not self.assign_attrs(elem, m.group(1), strict=True):
+  elem.text = elem.text[:m.start()]
+  if isheader(elem):
+  # clean up trailing #s
+  elem.text = elem.text.rstrip('#').rstrip()
  else:
  # inline: check for `attrs` at start of tail
  if elem.tail:
  m = self.INLINE_RE.match(elem.tail)
  if m:
- self.assign_attrs(elem, m.group(1))
- elem.tail = elem.tail[m.end():]
+ remainder = self.assign_attrs(elem, m.group(1))
+ elem.tail = elem.tail[m.end():] + remainder
+
+ def assign_attrs(self, elem: Element, attrs_string: str, *, strict: bool = False) -> str:
+ """ Assign `attrs` to element.
+
+ If the `attrs_string` has an extra closing curly brace, the remaining text is returned.
+
+ The `strict` argument controls whether to still assign `attrs` if there is a remaining `}`.
+ """
+ attrs, remainder = get_attrs_and_remainder(attrs_string)
+ if strict and remainder:
+ return remainder
 
- def assign_attrs(self, elem: Element, attrs: str) -> None:
- """ Assign `attrs` to element. """
- for k, v in get_attrs(attrs):
+ for k, v in attrs:
  if k == '.':
  # add to class
  cls = elem.get('class')
@@ -159,11 +181,13 @@ def assign_attrs(self, elem: Element, attrs: str) -> None:
  else:
  # assign attribute `k` with `v`
  elem.set(self.sanitize_name(k), v)
+ # The text that we initially over-matched will be put back.
+ return remainder
 
  def sanitize_name(self, name: str) -> str:
  """
- Sanitize name as 'an XML Name, minus the ":"'.
- See https://www.w3.org/TR/REC-xml-names/#NT-NCName
+ Sanitize name as 'an XML Name, minus the `:`.'
+ See <https://www.w3.org/TR/REC-xml-names/#NT-NCName>.
  """
  return self.NAME_RE.sub('_', name)
 

diff --git a/markdown/extensions/fenced_code.py b/markdown/extensions/fenced_code.py
@@ -25,7 +25,7 @@
 from . import Extension
 from ..preprocessors import Preprocessor
 from .codehilite import CodeHilite, CodeHiliteExtension, parse_hl_lines
-from .attr_list import get_attrs, AttrListExtension
+from .attr_list import get_attrs_and_remainder, AttrListExtension
 from ..util import parseBoolValue
 from ..serializers import _escape_attrib_html
 import re
@@ -56,7 +56,7 @@ class FencedBlockPreprocessor(Preprocessor):
  FENCED_BLOCK_RE = re.compile(
  dedent(r'''
  (?P<fence>^(?:~{3,}|`{3,}))[ ]* # opening fence
- ((\{(?P<attrs>[^\}\n]*)\})| # (optional {attrs} or
+ ((\{(?P<attrs>[^\n]*)\})|  # (optional {attrs} or
  (\.?(?P<lang>[\w#.+-]*)[ ]*)? # optional (.)lang
  (hl_lines=(?P<quot>"|')(?P<hl_lines>.*?)(?P=quot)[ ]*)?) # optional hl_lines)
  \n # newline (end of opening fence)
@@ -94,12 +94,17 @@ def run(self, lines: list[str]) -> list[str]:
  self.checked_for_deps = True
 
  text = "\n".join(lines)
+ index = 0
  while 1:
- m = self.FENCED_BLOCK_RE.search(text)
+ m = self.FENCED_BLOCK_RE.search(text, index)
  if m:
  lang, id, classes, config = None, '', [], {}
  if m.group('attrs'):
- id, classes, config = self.handle_attrs(get_attrs(m.group('attrs')))
+ attrs, remainder = get_attrs_and_remainder(m.group('attrs'))
+ if remainder: # Does not have correctly matching curly braces, so the syntax is invalid.
+ index = m.end('attrs') # Explicitly skip over this, to prevent an infinite loop.
+ continue
+ id, classes, config = self.handle_attrs(attrs)
  if len(classes):
  lang = classes.pop(0)
  else:
@@ -151,6 +156,8 @@ def run(self, lines: list[str]) -> list[str]:
 
  placeholder = self.md.htmlStash.store(code)
  text = f'{text[:m.start()]}\n{placeholder}\n{text[m.end():]}'
+ # Continue from after the replaced text in the next iteration.
+ index = m.start() + 1 + len(placeholder)
  else:
  break
  return text.split("\n")

diff --git a/tests/test_syntax/extensions/test_attr_list.py b/tests/test_syntax/extensions/test_attr_list.py
@@ -23,16 +23,53 @@
 
 
 class TestAttrList(TestCase):
-
  maxDiff = None
+ default_kwargs = {'extensions': ['attr_list']}
 
  # TODO: Move the rest of the `attr_list` tests here.
 
- def test_empty_list(self):
+ def test_empty_attr_list(self):
  self.assertMarkdownRenders(
  '*foo*{ }',
- '<p><em>foo</em>{ }</p>',
- extensions=['attr_list']
+ '<p><em>foo</em>{ }</p>'
+ )
+
+ def test_curly_after_inline(self):
+ self.assertMarkdownRenders(
+ '*inline*{.a} } *text*{.a }}',
+ '<p><em class="a">inline</em> } <em class="a">text</em>}</p>'
+ )
+
+ def test_extra_eq_gets_ignored_inside_curly_inline(self):
+ # Undesired behavior but kept for historic compatibility.
+ self.assertMarkdownRenders(
+ '*inline*{data-test="x" =a} *text*',
+ '<p><em data-test="x">inline</em> <em>text</em></p>'
+ )
+
+ def test_curly_after_block(self):
+ self.assertMarkdownRenders(
+ '# Heading {.a} }',
+ '<h1>Heading {.a} }</h1>'
+ )
+
+ def test_curly_in_single_quote(self):
+ self.assertMarkdownRenders(
+ "# Heading {data-test='{}'}",
+ '<h1 data-test="{}">Heading</h1>'
+ )
+
+ def test_curly_in_double_quote(self):
+ self.assertMarkdownRenders(
+ '# Heading {data-test="{}"}',
+ '<h1 data-test="{}">Heading</h1>'
+ )
+
+ def test_unclosed_quote_ignored(self):
+ # Undesired behavior but kept for historic compatibility.
+ self.assertMarkdownRenders(
+ '# Heading {foo="bar}',
+ '<h1 foo="&quot;bar">Heading</h1>'
  )
 
  def test_table_td(self):

diff --git a/tests/test_syntax/extensions/test_fenced_code.py b/tests/test_syntax/extensions/test_fenced_code.py
@@ -394,6 +394,48 @@ def testFencedCodeEscapedAttrs(self):
  extensions=['fenced_code', 'attr_list']
  )
 
+ def testFencedCodeCurlyInAttrs(self):
+ self.assertMarkdownRenders(
+ self.dedent(
+ '''
+ ``` { data-test="{}" }
+ # Some python code
+ ```
+ '''
+ ),
+ self.dedent(
+ '''
+ <pre><code data-test="{}"># Some python code
+ </code></pre>
+ '''
+ ),
+ extensions=['fenced_code', 'attr_list']
+ )
+
+ def testFencedCodeMismatchedCurlyInAttrs(self):
+ self.assertMarkdownRenders(
+ self.dedent(
+ '''
+ ``` { data-test="{}" } }
+ # Some python code
+ ```
+ ```
+ test
+ ```
+ '''
+ ),
+ self.dedent(
+ '''
+ <p>``` { data-test="{}" } }</p>
+ <h1>Some python code</h1>
+ <pre><code></code></pre>
+ <p>test
+ ```</p>
+ '''
+ ),
+ extensions=['fenced_code', 'attr_list']
+ )
+
 
 class TestFencedCodeWithCodehilite(TestCase):