IanMatthewHuff
diff --git a/‎news/2 Fixes/14048.md‎
Lines changed: 1 addition & 0 deletions b/‎news/2 Fixes/14048.md‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎pythonFiles/normalizeSelection.py‎
Lines changed: 128 additions & 0 deletions b/‎pythonFiles/normalizeSelection.py‎
Lines changed: 128 additions & 0 deletions
diff --git a/‎pythonFiles/tests/test_normalize_selection.py‎
Lines changed: 191 additions & 0 deletions b/‎pythonFiles/tests/test_normalize_selection.py‎
Lines changed: 191 additions & 0 deletions
@@ -0,0 +1 @@
+Update the logic for parsing and sending selected code to the REPL.
@@ -0,0 +1,128 @@
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+
+import ast
+import textwrap
+import re
+import sys
+
+
+def split_lines(source):
+ """
+ Split selection lines in a version-agnostic way.
+
+ Python grammar only treats \r, \n, and \r\n as newlines.
+ But splitlines() in Python 3 has a much larger list: for example, it also includes \v, \f.
+ As such, this function will split lines across all Python versions.
+ """
+ return re.split(r"[\n\r]+", source)
+
+
+def _get_statements(selection):
+ """
+ Process a multiline selection into a list of its top-level statements.
+ This will remove empty newlines around and within the selection, dedent it,
+ and split it using the result of `ast.parse()`.
+ """
+
+ # Remove blank lines within the selection to prevent the REPL from thinking the block is finished.
+ lines = (line for line in split_lines(selection) if line.strip() != "")
+
+ # Dedent the selection and parse it using the ast module.
+ # Note that leading comments in the selection will be discarded during parsing.
+ source = textwrap.dedent("\n".join(lines))
+ tree = ast.parse(source)
+
+ # We'll need the dedented lines to rebuild the selection.
+ lines = split_lines(source)
+
+ # Get the line ranges for top-level blocks returned from parsing the dedented text
+ # and split the selection accordingly.
+ # tree.body is a list of AST objects, which we rely on to extract top-level statements.
+ # If we supported Python 3.8+ only we could use the lineno and end_lineno attributes of each object
+ # to get the boundaries of each block.
+ # However, earlier Python versions only have the lineno attribute, which is the range start position (1-indexed).
+ # Therefore, to retrieve the end line of each block in a version-agnostic way we need to do
+ # `end = next_block.lineno - 1`
+ # for all blocks except the last one, which will will just run until the last line.
+ ends = [node.lineno - 1 for node in tree.body[1:]] + [len(lines)]
+ for node, end in zip(tree.body, ends):
+ # Given this selection:
+ # 1: if (m > 0 and
+ # 2: n < 3):
+ # 3: print('foo')
+ # 4: value = 'bar'
+ #
+ # The first block would have lineno = 1,and the second block lineno = 4
+ start = node.lineno - 1
+ block = "\n".join(lines[start:end])
+
+ # If the block is multiline, add an extra newline character at its end.
+ # This way, when joining blocks back together, there will be a blank line between each multiline statement
+ # and no blank lines between single-line statements, or it would look like this:
+ # >>> x = 22
+ # >>>
+ # >>> total = x + 30
+ # >>>
+ # Note that for the multiline parentheses case this newline is redundant,
+ # since the closing parenthesis terminates the statement already.
+ # This means that for this pattern we'll end up with:
+ # >>> x = [
+ # ... 1
+ # ... ]
+ # >>>
+ # >>> y = [
+ # ... 2
+ # ...]
+ if end - start > 1:
+ block += "\n"
+
+ yield block
+
+
+def normalize_lines(selection):
+ """
+ Normalize the text selection received from the extension and send it to the REPL.
+
+ If it is a single line selection, dedent it, append a newline and send it to the REPL.
+ Otherwise, sanitize the multiline selection before sending it to the REPL:
+ split it in a list of top-level statements
+ and add newlines between each of them to tell the REPL where each block ends.
+ """
+
+ try:
+ # Parse the selection into a list of top-level blocks.
+ # We don't differentiate between single and multiline statements
+ # because it's not a perf bottleneck,
+ # and the overhead from splitting and rejoining strings in the multiline case is one-off.
+ statements = _get_statements(selection)
+
+ # Insert a newline between each top-level statement, and append a newline to the selection.
+ source = "\n".join(statements) + "\n"
+ except:
+ # If there's a problem when parsing statements,
+ # append a blank line to end the block and send it as-is.
+ source = selection + "\n\n"
+
+ # `source` is a unicode instance at this point on Python 2,
+ # so if we used `sys.stdout.write` to send it to the REPL,
+ # Python will implicitly encode it using sys.getdefaultencoding(),
+ # which we don't want.
+ stdout = sys.stdout if sys.version_info < (3,) else sys.stdout.buffer
+ stdout.write(source.encode("utf-8"))
+ stdout.flush()
+
+
+if __name__ == "__main__":
+ # This will fail on a large file.
+ # See https://github.com/microsoft/vscode-python/issues/14471
+ contents = sys.argv[1]
+ try:
+ default_encoding = sys.getdefaultencoding()
+ encoded_contents = contents.encode(default_encoding, "surrogateescape")
+ contents = encoded_contents.decode(default_encoding, "replace")
+ except (UnicodeError, LookupError):
+ pass
+ if isinstance(contents, bytes):
+ contents = contents.decode("utf8")
+ normalize_lines(contents)
@@ -0,0 +1,191 @@
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+
+import textwrap
+
+import normalizeSelection
+
+
+class TestNormalizationScript(object):
+ """Unit tests for the normalization script."""
+
+ def test_basicNormalization(self, capsys):
+ src = 'print("this is a test")'
+ expected = src + "\n"
+ normalizeSelection.normalize_lines(src)
+ captured = capsys.readouterr()
+ assert captured.out == expected
+
+ def test_moreThanOneLine(self, capsys):
+ src = textwrap.dedent(
+ """\
+ # Some rando comment
+
+ def show_something():
+ print("Something")
+ """
+ )
+ expected = textwrap.dedent(
+ """\
+ def show_something():
+ print("Something")
+ 
+ """
+ )
+ normalizeSelection.normalize_lines(src)
+ captured = capsys.readouterr()
+ assert captured.out == expected
+
+ def test_withHangingIndent(self, capsys):
+ src = textwrap.dedent(
+ """\
+ x = 22
+ y = 30
+ z = -10
+ result = x + y + z
+
+ if result == 42:
+ print("The answer to life, the universe, and everything")
+ """
+ )
+ expected = textwrap.dedent(
+ """\
+ x = 22
+ y = 30
+ z = -10
+ result = x + y + z
+ if result == 42:
+ print("The answer to life, the universe, and everything")
+ 
+ """
+ )
+ normalizeSelection.normalize_lines(src)
+ captured = capsys.readouterr()
+ assert captured.out == expected
+
+ def test_clearOutExtraneousNewlines(self, capsys):
+ src = textwrap.dedent(
+ """\
+ value_x = 22
+
+ value_y = 30
+
+ value_z = -10
+
+ print(value_x + value_y + value_z)
+
+ """
+ )
+ expectedResult = textwrap.dedent(
+ """\
+ value_x = 22
+ value_y = 30
+ value_z = -10
+ print(value_x + value_y + value_z)
+ """
+ )
+ normalizeSelection.normalize_lines(src)
+ result = capsys.readouterr()
+ assert result.out == expectedResult
+
+ def test_clearOutExtraLinesAndWhitespace(self, capsys):
+ src = textwrap.dedent(
+ """\
+ if True:
+ x = 22
+
+ y = 30
+
+ z = -10
+
+ print(x + y + z)
+
+ """
+ )
+ expectedResult = textwrap.dedent(
+ """\
+ if True:
+ x = 22
+ y = 30
+ z = -10
+
+ print(x + y + z)
+ """
+ )
+ normalizeSelection.normalize_lines(src)
+ result = capsys.readouterr()
+ assert result.out == expectedResult
+
+ def test_partialSingleLine(self, capsys):
+ src = " print('foo')"
+ expected = textwrap.dedent(src) + "\n"
+ normalizeSelection.normalize_lines(src)
+ result = capsys.readouterr()
+ assert result.out == expected
+
+ def test_multiLineWithIndent(self, capsys):
+ src = """\
+ 
+ if (x > 0
+ and condition == True):
+ print('foo')
+ else:
+
+ print('bar')
+ """
+
+ expectedResult = textwrap.dedent(
+ """\
+ if (x > 0
+ and condition == True):
+ print('foo')
+ else:
+ print('bar')
+ 
+ """
+ )
+
+ normalizeSelection.normalize_lines(src)
+ result = capsys.readouterr()
+ assert result.out == expectedResult
+
+ def test_multiLineWithComment(self, capsys):
+ src = textwrap.dedent(
+ """\
+
+ def show_something():
+ # A comment
+ print("Something")
+ """
+ )
+ expected = textwrap.dedent(
+ """\
+ def show_something():
+ # A comment
+ print("Something")
+ 
+ """
+ )
+ normalizeSelection.normalize_lines(src)
+ captured = capsys.readouterr()
+ assert captured.out == expected
+
+ def test_exception(self, capsys):
+ src = " if True:"
+ expected = src + "\n\n"
+ normalizeSelection.normalize_lines(src)
+ captured = capsys.readouterr()
+ assert captured.out == expected
+
+ def test_multilineException(self, capsys):
+ src = textwrap.dedent(
+ """\
+
+ def show_something():
+ if True:
+ """
+ )
+ expected = src + "\n\n"
+ normalizeSelection.normalize_lines(src)
+ captured = capsys.readouterr()
+ assert captured.out == expected
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1 @@`
	`1`	`+Update the logic for parsing and sending selected code to the REPL.`