Python-Markdown
diff --git a/‎markdown/inlinepatterns.py‎
Lines changed: 31 additions & 32 deletions b/‎markdown/inlinepatterns.py‎
Lines changed: 31 additions & 32 deletions
diff --git a/‎markdown/treeprocessors.py‎
Lines changed: 111 additions & 1 deletion b/‎markdown/treeprocessors.py‎
Lines changed: 111 additions & 1 deletion
@@ -95,32 +95,32 @@ def build_inlinepatterns(md, **kwargs):
 NOIMG = r'(?<!\!)'
 
 # `e=f()` or ``e=f("`")``
-BACKTICK_RE = r'(?<!\\)(`+)(.+?)(?<!`)\2(?!`)'
+BACKTICK_RE = r'(?<!\\)(`+)(.+?)(?<!`)\1(?!`)'
 
 # \<
 ESCAPE_RE = r'\\(.)'
 
 # *emphasis*
-EMPHASIS_RE = r'(\*)([^\*]+)\2'
+EMPHASIS_RE = r'(\*)([^\*]+)\1'
 
 # **strong**
-STRONG_RE = r'(\*{2})(.+?)\2'
+STRONG_RE = r'(\*{2})(.+?)\1'
 
 # __smart__strong__
-SMART_STRONG_RE = r'(?<!\w)(_{2})(?!_)(.+?)(?<!_)\2(?!\w)'
+SMART_STRONG_RE = r'(?<!\w)(_{2})(?!_)(.+?)(?<!_)\1(?!\w)'
 
 # _smart_emphasis_
-SMART_EMPHASIS_RE = r'(?<!\w)(_)(?!_)(.+?)(?<!_)\2(?!\w)'
+SMART_EMPHASIS_RE = r'(?<!\w)(_)(?!_)(.+?)(?<!_)\1(?!\w)'
 
 # ***strongem*** or ***em*strong**
-EM_STRONG_RE = r'(\*|_)\2{2}(.+?)\2(.*?)\2{2}'
+EM_STRONG_RE = r'(\*|_)\1{2}(.+?)\1(.*?)\1{2}'
 
 # ***strong**em*
-STRONG_EM_RE = r'(\*|_)\2{2}(.+?)\2{2}(.*?)\2'
+STRONG_EM_RE = r'(\*|_)\1{2}(.+?)\1{2}(.*?)\1'
 
 # [text](url) or [text](<url>) or [text](url "title")
 LINK_RE = NOIMG + BRK + \
- r'''\(\s*(<.*?>|((?:(?:\(.*?\))|[^\(\)]))*?)\s*((['"])(.*?)\12\s*)?\)'''
+ r'''\(\s*(<.*?>|((?:(?:\(.*?\))|[^\(\)]))*?)\s*((['"])(.*?)\1\s*)?\)'''
 
 # ![alttxt](http://x.com/) or ![alttxt](<http://x.com/>)
 IMAGE_LINK_RE = r'\!' + BRK + r'\s*\((<.*?>|([^")]+"[^"]*"|[^\)]*))\)'
@@ -181,8 +181,7 @@ def __init__(self, pattern, md=None):
 
  """
  self.pattern = pattern
- self.compiled_re = re.compile("^(.*?)%s(.*?)$" % pattern,
- re.DOTALL | re.UNICODE)
+ self.compiled_re = re.compile(pattern, re.DOTALL | re.UNICODE)
 
  if md:
  self.md = md
@@ -215,7 +214,7 @@ def unescape(self, text):
  return text
 
  def get_stash(m):
- id = m.group(1)
+ id = m.group(0)
  if id in stash:
  value = stash.get(id)
  if isinstance(value, util.string_type):
@@ -227,16 +226,16 @@ def get_stash(m):
 
 
 class SimpleTextPattern(Pattern):
- """ Return a simple text of group(2) of a Pattern. """
+ """ Return a simple text of group(1) of a Pattern. """
  def handleMatch(self, m):
- return m.group(2)
+ return m.group(1)
 
 
 class EscapePattern(Pattern):
  """ Return an escaped character. """
 
  def handleMatch(self, m):
- char = m.group(2)
+ char = m.group(1)
  if char in self.md.ESCAPED_CHARS:
  return '%s%s%s' % (util.STX, ord(char), util.ETX)
  else:
@@ -245,7 +244,7 @@ def handleMatch(self, m):
 
 class SimpleTagPattern(Pattern):
  """
- Return element of type `tag` with a text attribute of group(3)
+ Return element of type `tag` with a text attribute of group(2)
  of a Pattern.
 
  """
@@ -255,7 +254,7 @@ def __init__(self, pattern, tag):
 
  def handleMatch(self, m):
  el = util.etree.Element(self.tag)
- el.text = m.group(3)
+ el.text = m.group(2)
  return el
 
 
@@ -273,7 +272,7 @@ def __init__(self, pattern):
 
  def handleMatch(self, m):
  el = util.etree.Element(self.tag)
- el.text = util.AtomicString(m.group(3).strip())
+ el.text = util.AtomicString(m.group(2).strip())
  return el
 
 
@@ -287,16 +286,16 @@ def handleMatch(self, m):
  tag1, tag2 = self.tag.split(",")
  el1 = util.etree.Element(tag1)
  el2 = util.etree.SubElement(el1, tag2)
- el2.text = m.group(3)
- if len(m.groups()) == 5:
- el2.tail = m.group(4)
+ el2.text = m.group(2)
+ if len(m.groups()) == 3: # TODO: confirm this is right. maybe 4?
+ el2.tail = m.group(3)
  return el1
 
 
 class HtmlPattern(Pattern):
  """ Store raw inline html and return a placeholder. """
  def handleMatch(self, m):
- rawhtml = self.unescape(m.group(2))
+ rawhtml = self.unescape(m.group(1))
  place_holder = self.md.htmlStash.store(rawhtml)
  return place_holder
 
@@ -308,7 +307,7 @@ def unescape(self, text):
  return text
 
  def get_stash(m):
- id = m.group(1)
+ id = m.group(0)
  value = stash.get(id)
  if value is not None:
  try:
@@ -323,9 +322,9 @@ class LinkPattern(Pattern):
  """ Return a link element from the given match. """
  def handleMatch(self, m):
  el = util.etree.Element("a")
- el.text = m.group(2)
- title = m.group(13)
- href = m.group(9)
+ el.text = m.group(1)
+ title = m.group(12)
+ href = m.group(8)
 
  if href:
  if href[0] == "<":
@@ -344,7 +343,7 @@ class ImagePattern(LinkPattern):
  """ Return a img element from the given match. """
  def handleMatch(self, m):
  el = util.etree.Element("img")
- src_parts = m.group(9).split()
+ src_parts = m.group(8).split()
  if src_parts:
  src = src_parts[0]
  if src[0] == "<" and src[-1] == ">":
@@ -365,21 +364,21 @@ class ReferencePattern(LinkPattern):
 
  def handleMatch(self, m):
  try:
- id = m.group(9).lower()
+ id = m.group(8).lower()
  except IndexError:
  id = None
  if not id:
  # if we got something like "[Google][]" or "[Goggle]"
  # we'll use "google" as the id
- id = m.group(2).lower()
+ id = m.group(1).lower()
 
  # Clean up linebreaks in id
  id = self.NEWLINE_CLEANUP_RE.sub(' ', id)
  if id not in self.md.references: # ignore undefined refs
  return None
  href, title = self.md.references[id]
 
- text = m.group(2)
+ text = m.group(1)
  return self.makeTag(href, title, text)
 
  def makeTag(self, href, title, text):
@@ -408,8 +407,8 @@ class AutolinkPattern(Pattern):
  """ Return a link Element given an autolink (`<http://example/com>`). """
  def handleMatch(self, m):
  el = util.etree.Element("a")
- el.set('href', self.unescape(m.group(2)))
- el.text = util.AtomicString(m.group(2))
+ el.set('href', self.unescape(m.group(1)))
+ el.text = util.AtomicString(m.group(1))
  return el
 
 
@@ -419,7 +418,7 @@ class AutomailPattern(Pattern):
  """
  def handleMatch(self, m):
  el = util.etree.Element('a')
- email = self.unescape(m.group(2))
+ email = self.unescape(m.group(1))
  if email.startswith("mailto:"):
  email = email[len("mailto:"):]
 
 
@@ -1,5 +1,6 @@
 from __future__ import unicode_literals
 from __future__ import absolute_import
+import re
 from . import util
 from . import odict
 
@@ -44,6 +45,115 @@ class InlineProcessor(Treeprocessor):
  """
  A Treeprocessor that traverses a tree, applying inline patterns.
  """
+ 
+ def __init__(self, md):
+ super(InlineProcessor, self).__init__(md)
+ self.TOKEN_RE = re.compile(r'|'.join('\\{0}'.format(x) for x in md.ESCAPED_CHARS))
+
+ def apply_patterns(self, text):
+ """
+ Match patterns at begining og given text.
+ 
+ """
+
+ match = node = None
+ for pattern in self.md.inlinePatterns.values():
+ match = pattern.getCompiledRegExp().match(text)
+ if match:
+ node = pattern.handleMatch(match)
+ text = text[match.end():]
+ break
+ 
+ if not match:
+ # Step forward one character
+ return text[0], None, text[1:]
+
+ if isString(node):
+ return node, None, text
+
+ return '', node, text
+
+ def handle_inline(self, text):
+ """
+ Apply inline patterns to the given text.
+ 
+ """
+
+ elem_text = ''
+ children = []
+ while text:
+ m = self.TOKEN_RE.search(text)
+ if m:
+ if children:
+ children[-1].tail = children[-1].tail + text[:m.start()] if children[-1].tail else text[:m.start()]
+ else:
+ elem_text += text[:m.start()]
+
+ pre_text, node, text = self.apply_patterns(text[m.start():])
+
+ if children:
+ children[-1].tail = children[-1].tail + pre_text if children[-1].tail else pre_text
+ else:
+ elem_text += pre_text
+
+ if node is not None:
+ children.append(node)
+ else:
+ # No more matches.
+ if children:
+ children[-1].tail = children[-1].tail + text if children[-1].tail else text
+ else:
+ elem_text += text
+ break
+ return elem_text, children
+ 
+ def handle_elem(self, elem, parent, pos):
+ """
+ Apply patterns to an element and its children recursively.
+ 
+ """
+ 
+ if elem.text and elem.text.strip() and not isinstance(elem.text, util.AtomicString):
+ text = elem.text
+ elem.text = None
+ elem.text, children = self.handle_inline(text)
+ elem.extend(children)
+ if elem.tail and elem.tail.strip() and not isinstance(elem.tail, util.AtomicString):
+ tail = elem.tail
+ elem.tail = None
+ elem.tail, siblings = self.handle_inline(tail)
+ parent.extend(siblings) # TODO: maybe fix this?
+
+ # Recursively step through children
+ for cpos, child in enumerate(elem):
+ self.handle_elem(child, elem, cpos)
+ 
+ def run(self, tree):
+ """
+ Apply inline patterns to a parsed Markdown tree.
+
+ Iterate over ElementTree, find elements with inline tag, apply inline
+ patterns and append newly created Elements to tree. If you don't
+ want to process your data with inline paterns, instead of normal
+ string, use subclass AtomicString:
+
+ node.text = markdown.util.AtomicString("This will not be processed.")
+
+ Arguments:
+
+ * tree: ElementTree object, representing Markdown tree.
+
+ Returns: None.
+
+ """
+
+ for pos, child in enumerate(tree):
+ self.handle_elem(child, tree, pos)
+
+class _InlineProcessor(Treeprocessor):
+ """
+ A Treeprocessor that traverses a tree, applying inline patterns.
+ """
 
  def __init__(self, md):
  self.__placeholder_prefix = util.INLINE_PLACEHOLDER_PREFIX
@@ -268,7 +378,7 @@ def run(self, tree):
  want to process your data with inline paterns, instead of normal
  string, use subclass AtomicString:
 
- node.text = markdown.AtomicString("This will not be processed.")
+ node.text = markdown.util.AtomicString("This will not be processed.")
 
  Arguments: