Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 19 additions & 0 deletions Lib/test/test_ast.py
Original file line number Diff line number Diff line change
Expand Up @@ -683,6 +683,25 @@ def test_get_docstring_none(self):
node = ast.parse('async def foo():\n x = "not docstring"')
self.assertIsNone(ast.get_docstring(node.body[0]))

def test_multi_line_docstring_col_offset_and_lineno_issue16806(self):
node = ast.parse(
'"""line one\nline two"""\n\n'
'def foo():\n """line one\n line two"""\n\n'
' def bar():\n """line one\n line two"""\n'
' """line one\n line two"""\n'
'"""line one\nline two"""\n\n'
)
self.assertEqual(node.body[0].col_offset, 0)
self.assertEqual(node.body[0].lineno, 1)
self.assertEqual(node.body[1].body[0].col_offset, 2)
self.assertEqual(node.body[1].body[0].lineno, 5)
self.assertEqual(node.body[1].body[1].body[0].col_offset, 4)
self.assertEqual(node.body[1].body[1].body[0].lineno, 9)
self.assertEqual(node.body[1].body[2].col_offset, 2)
self.assertEqual(node.body[1].body[2].lineno, 11)
self.assertEqual(node.body[2].col_offset, 0)
self.assertEqual(node.body[2].lineno, 13)

def test_literal_eval(self):
self.assertEqual(ast.literal_eval('[1, 2, 3]'), [1, 2, 3])
self.assertEqual(ast.literal_eval('{"foo": 42}'), {"foo": 42})
Expand Down
32 changes: 13 additions & 19 deletions Lib/test/test_fstring.py
Original file line number Diff line number Diff line change
Expand Up @@ -270,10 +270,7 @@ def test_ast_line_numbers_duplicate_expression(self):
self.assertEqual(binop.right.col_offset, 7) # FIXME: this is wrong

def test_ast_line_numbers_multiline_fstring(self):
# FIXME: This test demonstrates invalid behavior due to JoinedStr's
# immediate child nodes containing the wrong lineno. The enclosed
# expressions have valid line information and column offsets.
# See bpo-16806 and bpo-30465 for details.
# See bpo-30465 for details.
expr = """
a = 10
f'''
Expand All @@ -298,19 +295,16 @@ def test_ast_line_numbers_multiline_fstring(self):
self.assertEqual(type(t.body[1].value.values[1]), ast.FormattedValue)
self.assertEqual(type(t.body[1].value.values[2]), ast.Constant)
self.assertEqual(type(t.body[1].value.values[2].value), str)
# NOTE: the following invalid behavior is described in bpo-16806.
# - line number should be the *first* line (3), not the *last* (8)
# - column offset should not be -1
self.assertEqual(t.body[1].lineno, 8)
self.assertEqual(t.body[1].value.lineno, 8)
self.assertEqual(t.body[1].value.values[0].lineno, 8)
self.assertEqual(t.body[1].value.values[1].lineno, 8)
self.assertEqual(t.body[1].value.values[2].lineno, 8)
self.assertEqual(t.body[1].col_offset, -1)
self.assertEqual(t.body[1].value.col_offset, -1)
self.assertEqual(t.body[1].value.values[0].col_offset, -1)
self.assertEqual(t.body[1].value.values[1].col_offset, -1)
self.assertEqual(t.body[1].value.values[2].col_offset, -1)
self.assertEqual(t.body[1].lineno, 3)
self.assertEqual(t.body[1].value.lineno, 3)
self.assertEqual(t.body[1].value.values[0].lineno, 3)
self.assertEqual(t.body[1].value.values[1].lineno, 3)
self.assertEqual(t.body[1].value.values[2].lineno, 3)
self.assertEqual(t.body[1].col_offset, 0)
self.assertEqual(t.body[1].value.col_offset, 0)
self.assertEqual(t.body[1].value.values[0].col_offset, 0)
self.assertEqual(t.body[1].value.values[1].col_offset, 0)
self.assertEqual(t.body[1].value.values[2].col_offset, 0)
# NOTE: the following lineno information and col_offset is correct for
# expressions within FormattedValues.
binop = t.body[1].value.values[1].value
Expand All @@ -321,8 +315,8 @@ def test_ast_line_numbers_multiline_fstring(self):
self.assertEqual(binop.lineno, 4)
self.assertEqual(binop.left.lineno, 4)
self.assertEqual(binop.right.lineno, 6)
self.assertEqual(binop.col_offset, 3)
self.assertEqual(binop.left.col_offset, 3)
self.assertEqual(binop.col_offset, 4)
self.assertEqual(binop.left.col_offset, 4)
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I suspect this was incorrect before (being adjusted by -1 due to the multiline string).

For instance, here's the ast dump of (using astpretty)

x = 1 + 1
$ python ~/workspace/astpretty/astpretty.py t.py Module( body=[ Assign( lineno=1, col_offset=0, targets=[Name(lineno=1, col_offset=0, id='x', ctx=Store())], value=BinOp( lineno=1, col_offset=4, left=Num(lineno=1, col_offset=4, n=1), op=Add(), right=Num(lineno=1, col_offset=8, n=1), ), ), ], ) 

You'll notice that the BinOp and its left (Num) have the same col_offset

self.assertEqual(binop.right.col_offset, 7)

def test_docstring(self):
Expand Down
2 changes: 1 addition & 1 deletion Lib/test/test_opcodes.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ def test_setup_annotations_line(self):
with open(ann_module.__file__) as f:
txt = f.read()
co = compile(txt, ann_module.__file__, 'exec')
self.assertEqual(co.co_firstlineno, 6)
self.assertEqual(co.co_firstlineno, 3)
except OSError:
pass

Expand Down
8 changes: 4 additions & 4 deletions Lib/test/test_string_literals.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ def test_eval_str_invalid_escape(self):
eval("'''\n\\z'''")
self.assertEqual(len(w), 1)
self.assertEqual(w[0].filename, '<string>')
self.assertEqual(w[0].lineno, 2)
self.assertEqual(w[0].lineno, 1)

with warnings.catch_warnings(record=True) as w:
warnings.simplefilter('error', category=SyntaxWarning)
Expand All @@ -126,7 +126,7 @@ def test_eval_str_invalid_escape(self):
exc = cm.exception
self.assertEqual(w, [])
self.assertEqual(exc.filename, '<string>')
self.assertEqual(exc.lineno, 2)
self.assertEqual(exc.lineno, 1)

def test_eval_str_raw(self):
self.assertEqual(eval(""" r'x' """), 'x')
Expand Down Expand Up @@ -166,7 +166,7 @@ def test_eval_bytes_invalid_escape(self):
eval("b'''\n\\z'''")
self.assertEqual(len(w), 1)
self.assertEqual(w[0].filename, '<string>')
self.assertEqual(w[0].lineno, 2)
self.assertEqual(w[0].lineno, 1)

with warnings.catch_warnings(record=True) as w:
warnings.simplefilter('error', category=SyntaxWarning)
Expand All @@ -175,7 +175,7 @@ def test_eval_bytes_invalid_escape(self):
exc = cm.exception
self.assertEqual(w, [])
self.assertEqual(exc.filename, '<string>')
self.assertEqual(exc.lineno, 2)
self.assertEqual(exc.lineno, 1)

def test_eval_bytes_raw(self):
self.assertEqual(eval(""" br'x' """), b'x')
Expand Down
1 change: 1 addition & 0 deletions Misc/ACKS
Original file line number Diff line number Diff line change
Expand Up @@ -1844,3 +1844,4 @@ Gennadiy Zlobin
Doug Zongker
Peter Åstrand
Zheao Li
Carsten Klein
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Fix ``lineno`` and ``col_offset`` for multi-line string tokens.
15 changes: 12 additions & 3 deletions Parser/parsetok.c
Original file line number Diff line number Diff line change
Expand Up @@ -205,6 +205,8 @@ parsetok(struct tok_state *tok, grammar *g, int start, perrdetail *err_ret,
size_t len;
char *str;
col_offset = -1;
int lineno;
const char *line_start;

type = PyTokenizer_Get(tok, &a, &b);
if (type == ERRORTOKEN) {
Expand Down Expand Up @@ -253,8 +255,15 @@ parsetok(struct tok_state *tok, grammar *g, int start, perrdetail *err_ret,
}
}
#endif
if (a != NULL && a >= tok->line_start) {
col_offset = Py_SAFE_DOWNCAST(a - tok->line_start,

/* Nodes of type STRING, especially multi line strings
must be handled differently in order to get both
the starting line number and the column offset right.
(cf. issue 16806) */
lineno = type == STRING ? tok->first_lineno : tok->lineno;
line_start = type == STRING ? tok->multi_line_start : tok->line_start;
if (a != NULL && a >= line_start) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just a question: after fixing this, when a < line_start possible?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would think it's not possible now, though I don't know of all the cases for every other token. I think tqs are the only multiline token (since iirc escaped newlines are not a token)

col_offset = Py_SAFE_DOWNCAST(a - line_start,
intptr_t, int);
}
else {
Expand All @@ -263,7 +272,7 @@ parsetok(struct tok_state *tok, grammar *g, int start, perrdetail *err_ret,

if ((err_ret->error =
PyParser_AddToken(ps, (int)type, str,
tok->lineno, col_offset,
lineno, col_offset,
&(err_ret->expected))) != E_OK) {
if (err_ret->error != E_DONE) {
PyObject_FREE(str);
Expand Down
7 changes: 7 additions & 0 deletions Parser/tokenizer.c
Original file line number Diff line number Diff line change
Expand Up @@ -1519,6 +1519,13 @@ tok_get(struct tok_state *tok, char **p_start, char **p_end)
int quote_size = 1; /* 1 or 3 */
int end_quote_size = 0;

/* Nodes of type STRING, especially multi line strings
must be handled differently in order to get both
the starting line number and the column offset right.
(cf. issue 16806) */
tok->first_lineno = tok->lineno;
tok->multi_line_start = tok->line_start;

/* Find the quote size and start of string */
c = tok_nextc(tok);
if (c == quote) {
Expand Down
5 changes: 5 additions & 0 deletions Parser/tokenizer.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,8 @@ struct tok_state {
int pendin; /* Pending indents (if > 0) or dedents (if < 0) */
const char *prompt, *nextprompt; /* For interactive prompting */
int lineno; /* Current line number */
int first_lineno; /* First line of a single line or multi line string
expression (cf. issue 16806) */
int level; /* () [] {} Parentheses nesting level */
/* Used to allow free continuations inside them */
#ifndef PGEN
Expand All @@ -58,6 +60,9 @@ struct tok_state {
char *encoding; /* Source encoding. */
int cont_line; /* whether we are in a continuation line. */
const char* line_start; /* pointer to start of current line */
const char* multi_line_start; /* pointer to start of first line of
a single line or multi line string
expression (cf. issue 16806) */
#ifndef PGEN
PyObject *decoding_readline; /* open(...).readline */
PyObject *decoding_buffer;
Expand Down
10 changes: 7 additions & 3 deletions Python/ast.c
Original file line number Diff line number Diff line change
Expand Up @@ -4282,9 +4282,13 @@ fstring_fix_node_location(const node *parent, node *n, char *expr_str)
start--;
}
cols += (int)(substr - start);
/* Fix lineno in mulitline strings. */
while ((substr = strchr(substr + 1, '\n')))
lines--;
/* adjust the start based on the number of newlines encountered
before the f-string expression */
for (char* p = parent->n_str; p < substr; p++) {
if (*p == '\n') {
lines++;
}
}
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't understand this function well.

 int lines = LINENO(parent) - 1; int cols = parent->n_col_offset; 

These value are get from parent, before this loop:

 /* Find the full fstring to fix location information in `n`. */ while (parent && parent->n_type != STRING) parent = parent->n_child; 

Is this OK?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yeah I think so, what's happening here (and the variable names aren't super great):

  1. find the original ast offset of the parent node
  2. find the position in the string that the fstring expression appears at
  3. adjust the line/col based on that

From reading #1800 (where this function was introduced) it was discussed briefly but I didn't pick up the rationale for that loop there. When I removed / adjusted it during testing it broke all of the f-string column / offset tests so I think it's necessary.

}
}
fstring_shift_node_locations(n, lines, cols);
Expand Down
16 changes: 8 additions & 8 deletions Python/importlib.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

18 changes: 9 additions & 9 deletions Python/importlib_external.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 4 additions & 4 deletions Python/importlib_zipimport.h
Original file line number Diff line number Diff line change
Expand Up @@ -484,7 +484,7 @@ const unsigned char _Py_M__zipimport[] = {
64,0,0,0,114,65,0,0,0,114,78,0,0,0,114,82,
0,0,0,114,83,0,0,0,114,9,0,0,0,114,9,0,
0,0,114,9,0,0,0,114,10,0,0,0,114,4,0,0,
0,45,0,0,0,115,24,0,0,0,8,13,4,5,8,46,
0,45,0,0,0,115,24,0,0,0,8,1,4,17,8,46,
10,32,10,12,8,10,8,21,8,11,8,26,8,13,8,38,
8,18,122,12,95,95,105,110,105,116,95,95,46,112,121,99,
84,114,60,0,0,0,70,41,3,122,4,46,112,121,99,84,
Expand Down Expand Up @@ -1044,7 +1044,7 @@ const unsigned char _Py_M__zipimport[] = {
34,0,0,0,114,182,0,0,0,114,183,0,0,0,114,184,
0,0,0,114,189,0,0,0,114,9,0,0,0,114,9,0,
0,0,114,9,0,0,0,114,10,0,0,0,114,80,0,0,
0,212,2,0,0,115,14,0,0,0,8,5,4,1,4,2,
0,212,2,0,0,115,14,0,0,0,8,1,4,5,4,2,
8,4,8,9,8,6,8,11,114,80,0,0,0,41,45,114,
84,0,0,0,90,26,95,102,114,111,122,101,110,95,105,109,
112,111,114,116,108,105,98,95,101,120,116,101,114,110,97,108,
Expand All @@ -1065,8 +1065,8 @@ const unsigned char _Py_M__zipimport[] = {
0,0,114,170,0,0,0,114,152,0,0,0,114,150,0,0,
0,114,44,0,0,0,114,80,0,0,0,114,9,0,0,0,
114,9,0,0,0,114,9,0,0,0,114,10,0,0,0,218,
8,60,109,111,100,117,108,101,62,13,0,0,0,115,88,0,
0,0,4,4,8,1,16,1,8,1,8,1,8,1,8,1,
8,60,109,111,100,117,108,101,62,1,0,0,0,115,88,0,
0,0,4,16,8,1,16,1,8,1,8,1,8,1,8,1,
8,1,8,2,8,3,6,1,14,3,16,4,4,2,8,2,
4,1,4,1,4,2,14,127,0,127,0,1,12,1,12,1,
2,1,2,252,4,9,8,4,8,9,8,31,8,126,2,254,
Expand Down