Skip to content

Commit a70c2eb

Browse files
Code parsing for run selection in terminal - Python side (microsoft#14457)
* News file * Make changes * Update tests (no skip, more tests) * Update wording * Format with newer version of black * I guess it was overkill * UGH * Fix single line logic, update python tests * Simplify single-workspace tests + update samples * First round of reviews * Second round of reviews * Amend comment * More comment shuffling * Remove special-casing of single line statements * Update pythonFiles/normalizeForInterpreter.py Co-authored-by: Karthik Nadig <kanadig@microsoft.com> * Undo TS-side changes (temporarily) * move changes to normalizeSelection.py * Use correct function * Send the selection as-is if there's any problem Co-authored-by: Karthik Nadig <kanadig@microsoft.com>
1 parent 97cd456 commit a70c2eb

File tree

3 files changed

+320
-0
lines changed

3 files changed

+320
-0
lines changed

news/2 Fixes/14048.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Update the logic for parsing and sending selected code to the REPL.

pythonFiles/normalizeSelection.py

Lines changed: 128 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,128 @@
1+
# Copyright (c) Microsoft Corporation. All rights reserved.
2+
# Licensed under the MIT License.
3+
4+
import ast
5+
import textwrap
6+
import re
7+
import sys
8+
9+
10+
def split_lines(source):
11+
"""
12+
Split selection lines in a version-agnostic way.
13+
14+
Python grammar only treats \r, \n, and \r\n as newlines.
15+
But splitlines() in Python 3 has a much larger list: for example, it also includes \v, \f.
16+
As such, this function will split lines across all Python versions.
17+
"""
18+
return re.split(r"[\n\r]+", source)
19+
20+
21+
def _get_statements(selection):
22+
"""
23+
Process a multiline selection into a list of its top-level statements.
24+
This will remove empty newlines around and within the selection, dedent it,
25+
and split it using the result of `ast.parse()`.
26+
"""
27+
28+
# Remove blank lines within the selection to prevent the REPL from thinking the block is finished.
29+
lines = (line for line in split_lines(selection) if line.strip() != "")
30+
31+
# Dedent the selection and parse it using the ast module.
32+
# Note that leading comments in the selection will be discarded during parsing.
33+
source = textwrap.dedent("\n".join(lines))
34+
tree = ast.parse(source)
35+
36+
# We'll need the dedented lines to rebuild the selection.
37+
lines = split_lines(source)
38+
39+
# Get the line ranges for top-level blocks returned from parsing the dedented text
40+
# and split the selection accordingly.
41+
# tree.body is a list of AST objects, which we rely on to extract top-level statements.
42+
# If we supported Python 3.8+ only we could use the lineno and end_lineno attributes of each object
43+
# to get the boundaries of each block.
44+
# However, earlier Python versions only have the lineno attribute, which is the range start position (1-indexed).
45+
# Therefore, to retrieve the end line of each block in a version-agnostic way we need to do
46+
# `end = next_block.lineno - 1`
47+
# for all blocks except the last one, which will will just run until the last line.
48+
ends = [node.lineno - 1 for node in tree.body[1:]] + [len(lines)]
49+
for node, end in zip(tree.body, ends):
50+
# Given this selection:
51+
# 1: if (m > 0 and
52+
# 2: n < 3):
53+
# 3: print('foo')
54+
# 4: value = 'bar'
55+
#
56+
# The first block would have lineno = 1,and the second block lineno = 4
57+
start = node.lineno - 1
58+
block = "\n".join(lines[start:end])
59+
60+
# If the block is multiline, add an extra newline character at its end.
61+
# This way, when joining blocks back together, there will be a blank line between each multiline statement
62+
# and no blank lines between single-line statements, or it would look like this:
63+
# >>> x = 22
64+
# >>>
65+
# >>> total = x + 30
66+
# >>>
67+
# Note that for the multiline parentheses case this newline is redundant,
68+
# since the closing parenthesis terminates the statement already.
69+
# This means that for this pattern we'll end up with:
70+
# >>> x = [
71+
# ... 1
72+
# ... ]
73+
# >>>
74+
# >>> y = [
75+
# ... 2
76+
# ...]
77+
if end - start > 1:
78+
block += "\n"
79+
80+
yield block
81+
82+
83+
def normalize_lines(selection):
84+
"""
85+
Normalize the text selection received from the extension and send it to the REPL.
86+
87+
If it is a single line selection, dedent it, append a newline and send it to the REPL.
88+
Otherwise, sanitize the multiline selection before sending it to the REPL:
89+
split it in a list of top-level statements
90+
and add newlines between each of them to tell the REPL where each block ends.
91+
"""
92+
93+
try:
94+
# Parse the selection into a list of top-level blocks.
95+
# We don't differentiate between single and multiline statements
96+
# because it's not a perf bottleneck,
97+
# and the overhead from splitting and rejoining strings in the multiline case is one-off.
98+
statements = _get_statements(selection)
99+
100+
# Insert a newline between each top-level statement, and append a newline to the selection.
101+
source = "\n".join(statements) + "\n"
102+
except:
103+
# If there's a problem when parsing statements,
104+
# append a blank line to end the block and send it as-is.
105+
source = selection + "\n\n"
106+
107+
# `source` is a unicode instance at this point on Python 2,
108+
# so if we used `sys.stdout.write` to send it to the REPL,
109+
# Python will implicitly encode it using sys.getdefaultencoding(),
110+
# which we don't want.
111+
stdout = sys.stdout if sys.version_info < (3,) else sys.stdout.buffer
112+
stdout.write(source.encode("utf-8"))
113+
stdout.flush()
114+
115+
116+
if __name__ == "__main__":
117+
# This will fail on a large file.
118+
# See https://github.com/microsoft/vscode-python/issues/14471
119+
contents = sys.argv[1]
120+
try:
121+
default_encoding = sys.getdefaultencoding()
122+
encoded_contents = contents.encode(default_encoding, "surrogateescape")
123+
contents = encoded_contents.decode(default_encoding, "replace")
124+
except (UnicodeError, LookupError):
125+
pass
126+
if isinstance(contents, bytes):
127+
contents = contents.decode("utf8")
128+
normalize_lines(contents)
Lines changed: 191 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,191 @@
1+
# Copyright (c) Microsoft Corporation. All rights reserved.
2+
# Licensed under the MIT License.
3+
4+
import textwrap
5+
6+
import normalizeSelection
7+
8+
9+
class TestNormalizationScript(object):
10+
"""Unit tests for the normalization script."""
11+
12+
def test_basicNormalization(self, capsys):
13+
src = 'print("this is a test")'
14+
expected = src + "\n"
15+
normalizeSelection.normalize_lines(src)
16+
captured = capsys.readouterr()
17+
assert captured.out == expected
18+
19+
def test_moreThanOneLine(self, capsys):
20+
src = textwrap.dedent(
21+
"""\
22+
# Some rando comment
23+
24+
def show_something():
25+
print("Something")
26+
"""
27+
)
28+
expected = textwrap.dedent(
29+
"""\
30+
def show_something():
31+
print("Something")
32+
33+
"""
34+
)
35+
normalizeSelection.normalize_lines(src)
36+
captured = capsys.readouterr()
37+
assert captured.out == expected
38+
39+
def test_withHangingIndent(self, capsys):
40+
src = textwrap.dedent(
41+
"""\
42+
x = 22
43+
y = 30
44+
z = -10
45+
result = x + y + z
46+
47+
if result == 42:
48+
print("The answer to life, the universe, and everything")
49+
"""
50+
)
51+
expected = textwrap.dedent(
52+
"""\
53+
x = 22
54+
y = 30
55+
z = -10
56+
result = x + y + z
57+
if result == 42:
58+
print("The answer to life, the universe, and everything")
59+
60+
"""
61+
)
62+
normalizeSelection.normalize_lines(src)
63+
captured = capsys.readouterr()
64+
assert captured.out == expected
65+
66+
def test_clearOutExtraneousNewlines(self, capsys):
67+
src = textwrap.dedent(
68+
"""\
69+
value_x = 22
70+
71+
value_y = 30
72+
73+
value_z = -10
74+
75+
print(value_x + value_y + value_z)
76+
77+
"""
78+
)
79+
expectedResult = textwrap.dedent(
80+
"""\
81+
value_x = 22
82+
value_y = 30
83+
value_z = -10
84+
print(value_x + value_y + value_z)
85+
"""
86+
)
87+
normalizeSelection.normalize_lines(src)
88+
result = capsys.readouterr()
89+
assert result.out == expectedResult
90+
91+
def test_clearOutExtraLinesAndWhitespace(self, capsys):
92+
src = textwrap.dedent(
93+
"""\
94+
if True:
95+
x = 22
96+
97+
y = 30
98+
99+
z = -10
100+
101+
print(x + y + z)
102+
103+
"""
104+
)
105+
expectedResult = textwrap.dedent(
106+
"""\
107+
if True:
108+
x = 22
109+
y = 30
110+
z = -10
111+
112+
print(x + y + z)
113+
"""
114+
)
115+
normalizeSelection.normalize_lines(src)
116+
result = capsys.readouterr()
117+
assert result.out == expectedResult
118+
119+
def test_partialSingleLine(self, capsys):
120+
src = " print('foo')"
121+
expected = textwrap.dedent(src) + "\n"
122+
normalizeSelection.normalize_lines(src)
123+
result = capsys.readouterr()
124+
assert result.out == expected
125+
126+
def test_multiLineWithIndent(self, capsys):
127+
src = """\
128+
129+
if (x > 0
130+
and condition == True):
131+
print('foo')
132+
else:
133+
134+
print('bar')
135+
"""
136+
137+
expectedResult = textwrap.dedent(
138+
"""\
139+
if (x > 0
140+
and condition == True):
141+
print('foo')
142+
else:
143+
print('bar')
144+
145+
"""
146+
)
147+
148+
normalizeSelection.normalize_lines(src)
149+
result = capsys.readouterr()
150+
assert result.out == expectedResult
151+
152+
def test_multiLineWithComment(self, capsys):
153+
src = textwrap.dedent(
154+
"""\
155+
156+
def show_something():
157+
# A comment
158+
print("Something")
159+
"""
160+
)
161+
expected = textwrap.dedent(
162+
"""\
163+
def show_something():
164+
# A comment
165+
print("Something")
166+
167+
"""
168+
)
169+
normalizeSelection.normalize_lines(src)
170+
captured = capsys.readouterr()
171+
assert captured.out == expected
172+
173+
def test_exception(self, capsys):
174+
src = " if True:"
175+
expected = src + "\n\n"
176+
normalizeSelection.normalize_lines(src)
177+
captured = capsys.readouterr()
178+
assert captured.out == expected
179+
180+
def test_multilineException(self, capsys):
181+
src = textwrap.dedent(
182+
"""\
183+
184+
def show_something():
185+
if True:
186+
"""
187+
)
188+
expected = src + "\n\n"
189+
normalizeSelection.normalize_lines(src)
190+
captured = capsys.readouterr()
191+
assert captured.out == expected

0 commit comments

Comments
 (0)