python-trio · ntninja · Dec 18, 2020 · Dec 18, 2020 · Dec 18, 2020 · Sep 6, 2020
diff --git a/.coveragerc b/.coveragerc
@@ -2,8 +2,12 @@
 branch=True
 source=unasync
 
+[paths]
+source = src/unasync
+
 [report]
 precision = 1
 exclude_lines =
  pragma: no cover
  abc.abstractmethod
+ \# PY2
diff --git a/.coveragerc-py2 b/.coveragerc-py2
@@ -0,0 +1,13 @@
+[run]
+branch=True
+source=unasync
+
+[paths]
+source = src/unasync
+
+[report]
+precision = 1
+exclude_lines =
+ pragma: no cover
+ abc.abstractmethod
+ \# PY3
diff --git a/ci/travis.sh b/ci/travis.sh
@@ -51,6 +51,16 @@ if [ "$USE_PYPY_RELEASE_VERSION" != "" ]; then
  source testenv/bin/activate
 fi
 
+case "${MACPYTHON:-${TRAVIS_PYTHON_VERSION:-}}" in
+2*)
+COVERAGE_FILE=.coveragerc-py2
+;;
+
+*)
+COVERAGE_FILE=.coveragerc
+;;
+esac
+
 pip install -U pip setuptools wheel
 
 if [ "$CHECK_FORMATTING" = "1" ]; then
@@ -91,7 +101,7 @@ else
  mkdir empty
  cd empty
 
- pytest -ra -v --cov=unasync --cov-config=../.coveragerc --verbose ../tests
+ pytest -ra -v --cov=unasync --cov-config="../${COVERAGE_FILE}" --verbose ../tests
 
  bash <(curl -s https://codecov.io/bash)
 fi
diff --git a/src/unasync/__init__.py b/src/unasync/__init__.py
@@ -1,9 +1,11 @@
+# -*- encoding: utf8 -*-
 """Top-level package for unasync."""
 
 from __future__ import print_function
 
 import collections
 import errno
+import io
 import os
 import sys
 import tokenize as std_tokenize
@@ -34,13 +36,34 @@
  "StopAsyncIteration": "StopIteration",
 }
 
+_TYPE_COMMENT_PREFIX = "# type: "
+
+
+if sys.version_info[0] == 2: # PY2
+
+ def isidentifier(s):
+ return all([c.isalnum() or c == "_" for c in s])
+
+ StringIO = io.BytesIO
+else: # PY3
+
+ def isidentifier(s):
+ return s.isidentifier()
+
+ StringIO = io.StringIO
+
+if hasattr(os, "fspath"): # PY3
+ fspath = os.fspath
+else: # PY2
+ fspath = str
+
 
 class Rule:
  """A single set of rules for 'unasync'ing file(s)"""
 
  def __init__(self, fromdir, todir, additional_replacements=None):
- self.fromdir = fromdir.replace("/", os.sep)
- self.todir = todir.replace("/", os.sep)
+ self.fromdir = fspath(fromdir).replace("/", os.sep)
+ self.todir = fspath(todir).replace("/", os.sep)
 
  # Add any additional user-defined token replacements to our list.
  self.token_replacements = _ASYNC_TO_SYNC.copy()
@@ -51,6 +74,8 @@ def _match(self, filepath):
  """Determines if a Rule matches a given filepath and if so
  returns a higher comparable value if the match is more specific.
  """
+ filepath = fspath(filepath)
+
  file_segments = [x for x in filepath.split(os.sep) if x]
  from_segments = [x for x in self.fromdir.split(os.sep) if x]
  len_from_segments = len(from_segments)
@@ -65,9 +90,10 @@ def _match(self, filepath):
  return False
 
  def _unasync_file(self, filepath):
+ filepath = fspath(filepath)
  with open(filepath, "rb") as f:
  write_kwargs = {}
- if sys.version_info[0] >= 3:
+ if sys.version_info[0] >= 3: # PY3 # pragma: no branch
  encoding, _ = std_tokenize.detect_encoding(f.readline)
  write_kwargs["encoding"] = encoding
  f.seek(0)
@@ -82,7 +108,57 @@ def _unasync_file(self, filepath):
  def _unasync_tokens(self, tokens):
  # TODO __await__, ...?
  used_space = None
+ context = None # Can be `None`, `"func_decl"`, `"func_name"`, `"arg_list"`, `"arg_list_end"`, `"return_type"`
+ brace_depth = 0
+ typing_ctx = False
+
  for space, toknum, tokval in tokens:
+ # Update context state tracker
+ if context is None and toknum == std_tokenize.NAME and tokval == "def":
+ context = "func_decl"
+ elif context == "func_decl" and toknum == std_tokenize.NAME:
+ context = "func_name"
+ elif context == "func_name" and toknum == std_tokenize.OP and tokval == "(":
+ context = "arg_list"
+ elif context == "arg_list":
+ if toknum == std_tokenize.OP and tokval in ("(", "["):
+ brace_depth += 1
+ elif (
+ toknum == std_tokenize.OP
+ and tokval in (")", "]")
+ and brace_depth >= 1
+ ):
+ brace_depth -= 1
+ elif toknum == std_tokenize.OP and tokval == ")":
+ context = "arg_list_end"
+ elif toknum == std_tokenize.OP and tokval == ":" and brace_depth < 1:
+ typing_ctx = True
+ elif toknum == std_tokenize.OP and tokval == "," and brace_depth < 1:
+ typing_ctx = False
+ elif (
+ context == "arg_list_end"
+ and toknum == std_tokenize.OP
+ and tokval == "->"
+ ):
+ context = "return_type"
+ typing_ctx = True
+ elif context == "return_type":
+ if toknum == std_tokenize.OP and tokval in ("(", "["):
+ brace_depth += 1
+ elif (
+ toknum == std_tokenize.OP
+ and tokval in (")", "]")
+ and brace_depth >= 1
+ ):
+ brace_depth -= 1
+ elif toknum == std_tokenize.OP and tokval == ":":
+ context = None
+ typing_ctx = False
+ else: # Something unexpected happend - reset state
+ context = None
+ brace_depth = 0
+ typing_ctx = False
+
  if tokval in ["async", "await"]:
  # When removing async or await, we want to use the whitespace that
  # was before async/await before the next token so that
@@ -93,8 +169,59 @@ def _unasync_tokens(self, tokens):
  if toknum == std_tokenize.NAME:
  tokval = self._unasync_name(tokval)
  elif toknum == std_tokenize.STRING:
- left_quote, name, right_quote = tokval[0], tokval[1:-1], tokval[-1]
- tokval = left_quote + self._unasync_name(name) + right_quote
+ # Strings in typing context are forward-references and should be unasyncified
+ quote = ""
+ prefix = ""
+ while ord(tokval[0]) in range(ord("a"), ord("z") + 1):
+ prefix += tokval[0]
+ tokval = tokval[1:]
+
+ if tokval.startswith('"""') and tokval.endswith('"""'):
+ quote = '"""' # Broken syntax highlighters workaround: """
+ elif tokval.startswith("'''") and tokval.endswith("'''"):
+ quote = "'''" # Broken syntax highlighters wokraround: '''
+ elif tokval.startswith('"') and tokval.endswith('"'):
+ quote = '"'
+ elif tokval.startswith( # pragma: no branch
+ "'"
+ ) and tokval.endswith("'"):
+ quote = "'"
+ assert (
+ len(quote) > 0
+ ), "Quoting style of string {0!r} unknown".format(tokval)
+ stringval = tokval[len(quote) : -len(quote)]
+ if typing_ctx:
+ stringval = _untokenize(
+ self._unasync_tokens(_tokenize(StringIO(stringval)))
+ )
+ else:
+ stringval = self._unasync_name(stringval)
+ tokval = prefix + quote + stringval + quote
+ elif toknum == std_tokenize.COMMENT and tokval.startswith(
+ _TYPE_COMMENT_PREFIX
+ ):
+ type_decl, suffix = tokval[len(_TYPE_COMMENT_PREFIX) :], ""
+ if "#" in type_decl:
+ type_decl, suffix = type_decl.split("#", 1)
+ suffix = "#" + suffix
+ type_decl_stripped = type_decl.strip()
+
+ # Do not process `type: ignore` or `type: ignore[…]` as these aren't actual identifiers
+ is_type_ignore = type_decl_stripped == "ignore"
+ is_type_ignore |= type_decl_stripped.startswith(
+ "ignore"
+ ) and not isidentifier(type_decl_stripped[0:7])
+ if not is_type_ignore:
+ # Preserve trailing whitespace since the tokenizer won't
+ trailing_space_len = len(type_decl) - len(type_decl.rstrip())
+ if trailing_space_len > 0:
+ suffix = type_decl[-trailing_space_len:] + suffix
+ type_decl = type_decl[:-trailing_space_len]
+ type_decl = _untokenize(
+ self._unasync_tokens(_tokenize(StringIO(type_decl)))
+ )
+
+ tokval = _TYPE_COMMENT_PREFIX + type_decl + suffix
  if used_space is None:
  used_space = space
  yield (used_space, tokval)
@@ -128,12 +255,16 @@ def unasync_files(fpath_list, rules):
 
 
 def _get_tokens(f):
- if sys.version_info[0] == 2:
+ if sys.version_info[0] == 2: # PY2
  for tok in std_tokenize.generate_tokens(f.readline):
  type_, string, start, end, line = tok
  yield Token(type_, string, start, end, line)
- else:
- for tok in std_tokenize.tokenize(f.readline):
+ else: # PY3
+ if isinstance(f, io.TextIOBase):
+ gen = std_tokenize.generate_tokens(f.readline)
+ else:
+ gen = std_tokenize.tokenize(f.readline)
+ for tok in gen:
  if tok.type == std_tokenize.ENCODING:
  continue
  yield tok
@@ -143,13 +274,16 @@ def _tokenize(f):
  last_end = (1, 0)
  for tok in _get_tokens(f):
  if last_end[0] < tok.start[0]:
- yield ("", std_tokenize.STRING, " \\\n")
+ # Somehow Python 3.5 and below produce the ENDMARKER in a way that
+ # causes superfluous continuation lines to be generated
+ if tok.type != std_tokenize.ENDMARKER:
+ yield (" ", std_tokenize.NEWLINE, "\\\n")
  last_end = (tok.start[0], 0)
 
  space = ""
  if tok.start > last_end:
  assert tok.start[0] == last_end[0]
- space = " " * (tok.start[1] - last_end[1])
+ space = tok.line[last_end[1] : tok.start[1]]
  yield (space, tok.type, tok.string)
 
  last_end = tok.end

diff --git a/test-requirements.txt b/test-requirements.txt
@@ -1,2 +1,3 @@
 pytest>=4.3.0
-pytest-cov
+pytest-cov
+pathlib2 ; python_version < '3.5'
diff --git a/tests/data/async/tabs.py b/tests/data/async/tabs.py
@@ -0,0 +1,8 @@
+# fmt: off
+async def dummy():
+await dummy2() # This line is indented with a tab that should be preserved
+# fmt: on
+
+
+async def dummy2():
+ await dummy() # This one uses 4 spaces and these should also be preserved
diff --git a/tests/data/async/typing.py b/tests/data/async/typing.py
@@ -3,3 +3,26 @@
 typing.AsyncIterable[bytes]
 typing.AsyncIterator[bytes]
 typing.AsyncGenerator[bytes]
+
+# A typed function that takes the first item of an (a)sync iterator and returns it
+async def func1(a: typing.AsyncIterable[int]) -> str:
+ it: typing.AsyncIterator[int] = a.__aiter__()
+ b: int = await it.__anext__()
+ return str(b)
+
+
+# Same as the above but using old-style typings (mainly for Python 2.7 – 3.5 compatibility)
+async def func2(a): # type: (typing.AsyncIterable[int]) -> str
+ it = a.__aiter__() # type: typing.AsyncIterator[int]
+ b = await it.__anext__() # type: int
+ return str(b)
+
+
+# And some funky edge cases to at least cover the relevant at all in this test
+a: int = 5
+b: str = a # type: ignore # This is the actual comment and the type declaration silences the warning that would otherwise happen
+c: str = a # type: ignore2 # This is the actual comment and the declaration declares another type, both of which are wrong
+
+# fmt: off
+# And some genuine trailing whitespace (uww…)
+z = a # type: int 
diff --git a/tests/data/async/typing_py3.py b/tests/data/async/typing_py3.py
@@ -0,0 +1,13 @@
+# fmt: off
+# A forward-reference typed function that returns an iterator for an (a)sync iterable
+async def aiter1(a: "typing.AsyncIterable[int]") -> 'typing.AsyncIterable[int]':
+return a.__aiter__()
+
+# Same as the above but using tripple-quoted strings
+async def aiter2(a: """typing.AsyncIterable[int]""") -> r'''typing.AsyncIterable[int]''':
+return a.__aiter__()
+
+# Same as the above but without forward-references
+async def aiter3(a: typing.AsyncIterable[int]) -> typing.AsyncIterable[int]:
+return a.__aiter__()
+# fmt: on
diff --git a/tests/data/sync/tabs.py b/tests/data/sync/tabs.py
@@ -0,0 +1,8 @@
+# fmt: off
+def dummy():
+dummy2() # This line is indented with a tab that should be preserved
+# fmt: on
+
+
+def dummy2():
+ dummy() # This one uses 4 spaces and these should also be preserved
diff --git a/tests/data/sync/typing.py b/tests/data/sync/typing.py
@@ -3,3 +3,26 @@
 typing.Iterable[bytes]
 typing.Iterator[bytes]
 typing.Generator[bytes]
+
+# A typed function that takes the first item of an (a)sync iterator and returns it
+def func1(a: typing.Iterable[int]) -> str:
+ it: typing.Iterator[int] = a.__iter__()
+ b: int = it.__next__()
+ return str(b)
+
+
+# Same as the above but using old-style typings (mainly for Python 2.7 – 3.5 compatibility)
+def func2(a): # type: (typing.Iterable[int]) -> str
+ it = a.__iter__() # type: typing.Iterator[int]
+ b = it.__next__() # type: int
+ return str(b)
+
+
+# And some funky edge cases to at least cover the relevant at all in this test
+a: int = 5
+b: str = a # type: ignore # This is the actual comment and the type declaration silences the warning that would otherwise happen
+c: str = a # type: ignore2 # This is the actual comment and the declaration declares another type, both of which are wrong
+
+# fmt: off
+# And some genuine trailing whitespace (uww…)
+z = a # type: int 
diff --git a/tests/data/sync/typing_py3.py b/tests/data/sync/typing_py3.py
@@ -0,0 +1,13 @@
+# fmt: off
+# A forward-reference typed function that returns an iterator for an (a)sync iterable
+def aiter1(a: "typing.Iterable[int]") -> 'typing.Iterable[int]':
+return a.__iter__()
+
+# Same as the above but using tripple-quoted strings
+def aiter2(a: """typing.Iterable[int]""") -> r'''typing.Iterable[int]''':
+return a.__iter__()
+
+# Same as the above but without forward-references
+def aiter3(a: typing.Iterable[int]) -> typing.Iterable[int]:
+return a.__iter__()
+# fmt: on