executablebooks · chrisjsewell · Aug 17, 2020 · Aug 14, 2020 · Aug 14, 2020 · Aug 15, 2020
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -14,7 +14,7 @@ exclude: >
 
 repos:
 
- - repo: git://github.com/pre-commit/pre-commit-hooks
+ - repo: https://github.com/pre-commit/pre-commit-hooks
  rev: v2.2.3
  hooks:
  - id: check-json

diff --git a/markdown_it/extensions/container/index.py b/markdown_it/extensions/container/index.py
@@ -36,7 +36,7 @@ def container_func(state: StateBlock, startLine: int, endLine: int, silent: bool
 
  # Check out the first character quickly,
  # this should filter out most of non-containers
- if marker_char != charCodeAt(state.src, start):
+ if marker_char != state.srcCharCode[start]:
  return False
 
  # Check out the rest of the marker string
@@ -79,7 +79,7 @@ def container_func(state: StateBlock, startLine: int, endLine: int, silent: bool
  # test
  break
 
- if marker_char != charCodeAt(state.src, start):
+ if marker_char != state.srcCharCode[start]:
  continue
 
  if state.sCount[nextLine] - state.blkIndent >= 4:

diff --git a/markdown_it/extensions/deflist/index.py b/markdown_it/extensions/deflist/index.py
@@ -1,6 +1,5 @@
 """Process definition lists."""
 from markdown_it import MarkdownIt
-from markdown_it.common.utils import charCodeAt
 from markdown_it.rules_block import StateBlock
 
 
@@ -16,7 +15,7 @@ def skipMarker(state: StateBlock, line: int):
  return -1
 
  # Check bullet
- marker = charCodeAt(state.src, start)
+ marker = state.srcCharCode[start]
  start += 1
  if marker != 0x7E and marker != 0x3A: # ~ :
  return -1
@@ -119,7 +118,7 @@ def deflist(state: StateBlock, startLine: int, endLine: int, silent: bool):
  )
 
  while pos < maximum:
- ch = charCodeAt(state.src, pos)
+ ch = state.srcCharCode[pos]
 
  if isSpace(ch):
  if ch == 0x09:

diff --git a/markdown_it/extensions/footnote/index.py b/markdown_it/extensions/footnote/index.py
@@ -6,7 +6,7 @@
 from markdown_it.rules_inline import StateInline
 from markdown_it.rules_block import StateBlock
 from markdown_it.helpers import parseLinkLabel
-from markdown_it.common.utils import isSpace, charCodeAt
+from markdown_it.common.utils import isSpace
 
 
 def footnote_plugin(md: MarkdownIt):
@@ -43,23 +43,23 @@ def footnote_def(state: StateBlock, startLine: int, endLine: int, silent: bool):
  if start + 4 > maximum:
  return False
 
- if charCodeAt(state.src, start) != 0x5B: # /* [ */
+ if state.srcCharCode[start] != 0x5B: # /* [ */
  return False
- if charCodeAt(state.src, start + 1) != 0x5E: # /* ^ */
+ if state.srcCharCode[start + 1] != 0x5E: # /* ^ */
  return False
 
  pos = start + 2
  while pos < maximum:
- if charCodeAt(state.src, pos) == 0x20:
+ if state.srcCharCode[pos] == 0x20:
  return False
- if charCodeAt(state.src, pos) == 0x5D: # /* ] */
+ if state.srcCharCode[pos] == 0x5D: # /* ] */
  break
  pos += 1
 
  if pos == start + 2: # no empty footnote labels
  return False
  pos += 1
- if pos + 1 >= maximum or charCodeAt(state.src, pos) != 0x3A: # /* : */
+ if pos + 1 >= maximum or state.srcCharCode[pos] != 0x3A: # /* : */
  return False
  if silent:
  return True
@@ -87,7 +87,7 @@ def footnote_def(state: StateBlock, startLine: int, endLine: int, silent: bool):
  )
 
  while pos < maximum:
- ch = charCodeAt(state.src, pos)
+ ch = state.srcCharCode[pos]
 
  if isSpace(ch):
  if ch == 0x09:
@@ -136,9 +136,9 @@ def footnote_inline(state: StateInline, silent: bool):
 
  if start + 2 >= maximum:
  return False
- if charCodeAt(state.src, start) != 0x5E: # /* ^ */
+ if state.srcCharCode[start] != 0x5E: # /* ^ */
  return False
- if charCodeAt(state.src, start + 1) != 0x5B: # /* [ */
+ if state.srcCharCode[start + 1] != 0x5B: # /* [ */
  return False
 
  labelStart = start + 2
@@ -182,18 +182,18 @@ def footnote_ref(state: StateInline, silent: bool):
 
  if "footnotes" not in state.env or "refs" not in state.env["footnotes"]:
  return False
- if charCodeAt(state.src, start) != 0x5B: # /* [ */
+ if state.srcCharCode[start] != 0x5B: # /* [ */
  return False
- if charCodeAt(state.src, start + 1) != 0x5E: # /* ^ */
+ if state.srcCharCode[start + 1] != 0x5E: # /* ^ */
  return False
 
  pos = start + 2
  while pos < maximum:
- if charCodeAt(state.src, pos) == 0x20:
+ if state.srcCharCode[pos] == 0x20:
  return False
- if charCodeAt(state.src, pos) == 0x0A:
+ if state.srcCharCode[pos] == 0x0A:
  return False
- if charCodeAt(state.src, pos) == 0x5D: # /* ] */
+ if state.srcCharCode[pos] == 0x5D: # /* ] */
  break
  pos += 1
 

diff --git a/markdown_it/extensions/front_matter/index.py b/markdown_it/extensions/front_matter/index.py
@@ -29,7 +29,7 @@ def frontMatter(state: StateBlock, startLine: int, endLine: int, silent: bool):
 
  # Check out the first character of the first line quickly,
  # this should filter out non-front matter
- if startLine != 0 or marker_char != charCodeAt(state.src, 0):
+ if startLine != 0 or marker_char != state.srcCharCode[0]:
  return False
 
  # Check out the rest of the marker string
@@ -73,7 +73,7 @@ def frontMatter(state: StateBlock, startLine: int, endLine: int, silent: bool):
  # test
  break
 
- if marker_char != charCodeAt(state.src, start):
+ if marker_char != state.srcCharCode[start]:
  continue
 
  if state.sCount[nextLine] - state.blkIndent >= 4:

diff --git a/markdown_it/extensions/myst_blocks/index.py b/markdown_it/extensions/myst_blocks/index.py
@@ -2,7 +2,7 @@
 
 from markdown_it import MarkdownIt
 from markdown_it.rules_block import StateBlock
-from markdown_it.common.utils import charCodeAt, isSpace, escapeHtml
+from markdown_it.common.utils import isSpace, escapeHtml
 
 
 TARGET_PATTERN = re.compile(r"^\(([a-zA-Z0-9\|\@\<\>\*\.\/\_\-\+\:]{1,100})\)\=\s*$")
@@ -40,7 +40,7 @@ def line_comment(state: StateBlock, startLine: int, endLine: int, silent: bool):
  if state.sCount[startLine] - state.blkIndent >= 4:
  return False
 
- marker = charCodeAt(state.src, pos)
+ marker = state.srcCharCode[pos]
  pos += 1
 
  # Check block marker /* % */
@@ -70,7 +70,7 @@ def block_break(state: StateBlock, startLine: int, endLine: int, silent: bool):
  if state.sCount[startLine] - state.blkIndent >= 4:
  return False
 
- marker = charCodeAt(state.src, pos)
+ marker = state.srcCharCode[pos]
  pos += 1
 
  # Check block marker /* + */
@@ -81,7 +81,7 @@ def block_break(state: StateBlock, startLine: int, endLine: int, silent: bool):
 
  cnt = 1
  while pos < maximum:
- ch = charCodeAt(state.src, pos)
+ ch = state.srcCharCode[pos]
  if ch != marker and not isSpace(ch):
  break
  if ch == marker:

diff --git a/markdown_it/extensions/myst_role/index.py b/markdown_it/extensions/myst_role/index.py
@@ -2,7 +2,7 @@
 
 from markdown_it import MarkdownIt
 from markdown_it.rules_inline import StateInline
-from markdown_it.common.utils import charCodeAt, escapeHtml
+from markdown_it.common.utils import escapeHtml
 
 
 PATTERN = re.compile(r"^\{([a-zA-Z0-9\_\-\+\:]{1,36})\}(`+)(?!`)(.+?)(?<!`)\2(?!`)")
@@ -15,7 +15,7 @@ def myst_role_plugin(md: MarkdownIt):
 
 def myst_role(state: StateInline, silent: bool):
  try:
- if charCodeAt(state.src, state.pos - 1) == 0x5C: # /* \ */
+ if state.srcCharCode[state.pos - 1] == 0x5C: # /* \ */
  # escaped (this could be improved in the case of edge case '\\{')
  return False
  except IndexError:

diff --git a/markdown_it/helpers/parse_link_label.py b/markdown_it/helpers/parse_link_label.py
@@ -5,7 +5,6 @@
 returns the end of the label
 
 """
-from ..common.utils import charCodeAt
 
 
 def parseLinkLabel(state, start, disableNested=False):
@@ -18,7 +17,7 @@ def parseLinkLabel(state, start, disableNested=False):
  level = 1
 
  while state.pos < state.posMax:
- marker = charCodeAt(state.src, state.pos)
+ marker = state.srcCharCode[state.pos]
  if marker == 0x5D: # /* ] */)
  level -= 1
  if level == 0:

diff --git a/markdown_it/parser_block.py b/markdown_it/parser_block.py
@@ -92,10 +92,10 @@ def tokenize(
  line += 1
  state.line = line
 
- def parse(self, src: str, md, env, outTokens: List[Token]):
+ def parse(self, src: str, md, env, outTokens: List[Token], ords: List[int] = None):
  """Process input string and push block tokens into `outTokens`."""
  if not src:
  return
- state = StateBlock(src, md, env, outTokens)
+ state = StateBlock(src, md, env, outTokens, ords)
  self.tokenize(state, state.line, state.lineMax)
  return state.tokens
diff --git a/markdown_it/port.yaml b/markdown_it/port.yaml
@@ -11,6 +11,10 @@
  this is generally the main difference between the codes,
  because in python you can't do e.g. `for {i=1;i<x;i++} {}`
  - Use python version of `charCodeAt`
+ - |
+ Reduce use of charCodeAt() by storing char codes in a srcCharCodes attribute for state
+ objects and sharing those whenever possible
+ This provides a significant performance boost
  - |
  Use python's built-in `html.escape` and `urlparse.quote` methods, as a replacement for
  the JS dependencies [mdurl](https://www.npmjs.com/package/mdurl)

diff --git a/markdown_it/rules_block/blockquote.py b/markdown_it/rules_block/blockquote.py
@@ -2,7 +2,7 @@
 import logging
 
 from .state_block import StateBlock
-from ..common.utils import isSpace, charCodeAt
+from ..common.utils import isSpace
 
 LOGGER = logging.getLogger(__name__)
 
@@ -22,7 +22,7 @@ def blockquote(state: StateBlock, startLine: int, endLine: int, silent: bool):
  return False
 
  # check the block quote marker
- if charCodeAt(state.src, pos) != 0x3E: # /* > */
+ if state.srcCharCode[pos] != 0x3E: # /* > */
  pos += 1
  return False
  pos += 1
@@ -40,15 +40,15 @@ def blockquote(state: StateBlock, startLine: int, endLine: int, silent: bool):
  )
 
  # skip one optional space after '>'
- if charCodeAt(state.src, pos) == 0x20: # /* space */
+ if state.srcCharCode[pos] == 0x20: # /* space */
  # ' > test '
  # ^ -- position start of line here:
  pos += 1
  initial += 1
  offset += 1
  adjustTab = False
  spaceAfterMarker = True
- elif charCodeAt(state.src, pos) == 0x09: # /* tab */
+ elif state.srcCharCode[pos] == 0x09: # /* tab */
  spaceAfterMarker = True
 
  if (state.bsCount[startLine] + offset) % 4 == 3:
@@ -71,7 +71,7 @@ def blockquote(state: StateBlock, startLine: int, endLine: int, silent: bool):
  state.bMarks[startLine] = pos
 
  while pos < max:
- ch = charCodeAt(state.src, pos)
+ ch = state.srcCharCode[pos]
 
  if isSpace(ch):
  if ch == 0x09: # / tab /
@@ -147,9 +147,7 @@ def blockquote(state: StateBlock, startLine: int, endLine: int, silent: bool):
  # Case 1: line is not inside the blockquote, and this line is empty.
  break
 
- evaluatesTrue = (
- charCodeAt(state.src, pos) == 0x3E and not wasOutdented
- ) # /* > */
+ evaluatesTrue = state.srcCharCode[pos] == 0x3E and not wasOutdented # /* > */
  pos += 1
  if evaluatesTrue:
  # This line is inside the blockquote.
@@ -162,15 +160,15 @@ def blockquote(state: StateBlock, startLine: int, endLine: int, silent: bool):
  )
 
  # skip one optional space after '>'
- if charCodeAt(state.src, pos) == 0x20: # /* space */
+ if state.srcCharCode[pos] == 0x20: # /* space */
  # ' > test '
  # ^ -- position start of line here:
  pos += 1
  initial += 1
  offset += 1
  adjustTab = False
  spaceAfterMarker = True
- elif charCodeAt(state.src, pos) == 0x09: # /* tab */
+ elif state.srcCharCode[pos] == 0x09: # /* tab */
  spaceAfterMarker = True
 
  if (state.bsCount[nextLine] + offset) % 4 == 3:
@@ -193,7 +191,7 @@ def blockquote(state: StateBlock, startLine: int, endLine: int, silent: bool):
  state.bMarks[nextLine] = pos
 
  while pos < max:
- ch = charCodeAt(state.src, pos)
+ ch = state.srcCharCode[pos]
 
  if isSpace(ch):
  if ch == 0x09:

diff --git a/markdown_it/rules_block/fence.py b/markdown_it/rules_block/fence.py
@@ -1,7 +1,7 @@
 # fences (``` lang, ~~~ lang)
 import logging
 
-from ..common.utils import charCodeAt, stripEscape
+from ..common.utils import stripEscape
 from .state_block import StateBlock
 
 LOGGER = logging.getLogger(__name__)
@@ -22,7 +22,7 @@ def fence(state: StateBlock, startLine: int, endLine: int, silent: bool):
  if pos + 3 > maximum:
  return False
 
- marker = charCodeAt(state.src, pos)
+ marker = state.srcCharCode[pos]
 
  # /* ~ */ /* ` */
  if marker != 0x7E and marker != 0x60:
@@ -68,7 +68,7 @@ def fence(state: StateBlock, startLine: int, endLine: int, silent: bool):
  # test
  break
 
- if charCodeAt(state.src, pos) != marker:
+ if state.srcCharCode[pos] != marker:
  continue
 
  if state.sCount[nextLine] - state.blkIndent >= 4: