python · methane · Jan 13, 2019 · Oct 21, 2018 · Oct 25, 2018 · Oct 26, 2018
diff --git a/Lib/test/test_ast.py b/Lib/test/test_ast.py
@@ -683,6 +683,25 @@ def test_get_docstring_none(self):
  node = ast.parse('async def foo():\n x = "not docstring"')
  self.assertIsNone(ast.get_docstring(node.body[0]))
 
+ def test_multi_line_docstring_col_offset_and_lineno_issue16806(self):
+ node = ast.parse(
+ '"""line one\nline two"""\n\n'
+ 'def foo():\n """line one\n line two"""\n\n'
+ ' def bar():\n """line one\n line two"""\n'
+ ' """line one\n line two"""\n'
+ '"""line one\nline two"""\n\n'
+ )
+ self.assertEqual(node.body[0].col_offset, 0)
+ self.assertEqual(node.body[0].lineno, 1)
+ self.assertEqual(node.body[1].body[0].col_offset, 2)
+ self.assertEqual(node.body[1].body[0].lineno, 5)
+ self.assertEqual(node.body[1].body[1].body[0].col_offset, 4)
+ self.assertEqual(node.body[1].body[1].body[0].lineno, 9)
+ self.assertEqual(node.body[1].body[2].col_offset, 2)
+ self.assertEqual(node.body[1].body[2].lineno, 11)
+ self.assertEqual(node.body[2].col_offset, 0)
+ self.assertEqual(node.body[2].lineno, 13)
+
  def test_literal_eval(self):
  self.assertEqual(ast.literal_eval('[1, 2, 3]'), [1, 2, 3])
  self.assertEqual(ast.literal_eval('{"foo": 42}'), {"foo": 42})

diff --git a/Lib/test/test_fstring.py b/Lib/test/test_fstring.py
@@ -270,10 +270,7 @@ def test_ast_line_numbers_duplicate_expression(self):
  self.assertEqual(binop.right.col_offset, 7) # FIXME: this is wrong
 
  def test_ast_line_numbers_multiline_fstring(self):
- # FIXME: This test demonstrates invalid behavior due to JoinedStr's
- # immediate child nodes containing the wrong lineno. The enclosed
- # expressions have valid line information and column offsets.
- # See bpo-16806 and bpo-30465 for details.
+ # See bpo-30465 for details.
  expr = """
 a = 10
 f'''
@@ -298,19 +295,16 @@ def test_ast_line_numbers_multiline_fstring(self):
  self.assertEqual(type(t.body[1].value.values[1]), ast.FormattedValue)
  self.assertEqual(type(t.body[1].value.values[2]), ast.Constant)
  self.assertEqual(type(t.body[1].value.values[2].value), str)
- # NOTE: the following invalid behavior is described in bpo-16806.
- # - line number should be the *first* line (3), not the *last* (8)
- # - column offset should not be -1
- self.assertEqual(t.body[1].lineno, 8)
- self.assertEqual(t.body[1].value.lineno, 8)
- self.assertEqual(t.body[1].value.values[0].lineno, 8)
- self.assertEqual(t.body[1].value.values[1].lineno, 8)
- self.assertEqual(t.body[1].value.values[2].lineno, 8)
- self.assertEqual(t.body[1].col_offset, -1)
- self.assertEqual(t.body[1].value.col_offset, -1)
- self.assertEqual(t.body[1].value.values[0].col_offset, -1)
- self.assertEqual(t.body[1].value.values[1].col_offset, -1)
- self.assertEqual(t.body[1].value.values[2].col_offset, -1)
+ self.assertEqual(t.body[1].lineno, 3)
+ self.assertEqual(t.body[1].value.lineno, 3)
+ self.assertEqual(t.body[1].value.values[0].lineno, 3)
+ self.assertEqual(t.body[1].value.values[1].lineno, 3)
+ self.assertEqual(t.body[1].value.values[2].lineno, 3)
+ self.assertEqual(t.body[1].col_offset, 0)
+ self.assertEqual(t.body[1].value.col_offset, 0)
+ self.assertEqual(t.body[1].value.values[0].col_offset, 0)
+ self.assertEqual(t.body[1].value.values[1].col_offset, 0)
+ self.assertEqual(t.body[1].value.values[2].col_offset, 0)
  # NOTE: the following lineno information and col_offset is correct for
  # expressions within FormattedValues.
  binop = t.body[1].value.values[1].value
@@ -321,8 +315,8 @@ def test_ast_line_numbers_multiline_fstring(self):
  self.assertEqual(binop.lineno, 4)
  self.assertEqual(binop.left.lineno, 4)
  self.assertEqual(binop.right.lineno, 6)
- self.assertEqual(binop.col_offset, 3)
- self.assertEqual(binop.left.col_offset, 3)
+ self.assertEqual(binop.col_offset, 4)
+ self.assertEqual(binop.left.col_offset, 4)
  self.assertEqual(binop.right.col_offset, 7)
 
  def test_docstring(self):

diff --git a/Lib/test/test_opcodes.py b/Lib/test/test_opcodes.py
@@ -27,7 +27,7 @@ def test_setup_annotations_line(self):
  with open(ann_module.__file__) as f:
  txt = f.read()
  co = compile(txt, ann_module.__file__, 'exec')
- self.assertEqual(co.co_firstlineno, 6)
+ self.assertEqual(co.co_firstlineno, 3)
  except OSError:
  pass
 

diff --git a/Lib/test/test_string_literals.py b/Lib/test/test_string_literals.py
@@ -117,7 +117,7 @@ def test_eval_str_invalid_escape(self):
  eval("'''\n\\z'''")
  self.assertEqual(len(w), 1)
  self.assertEqual(w[0].filename, '<string>')
- self.assertEqual(w[0].lineno, 2)
+ self.assertEqual(w[0].lineno, 1)
 
  with warnings.catch_warnings(record=True) as w:
  warnings.simplefilter('error', category=SyntaxWarning)
@@ -126,7 +126,7 @@ def test_eval_str_invalid_escape(self):
  exc = cm.exception
  self.assertEqual(w, [])
  self.assertEqual(exc.filename, '<string>')
- self.assertEqual(exc.lineno, 2)
+ self.assertEqual(exc.lineno, 1)
 
  def test_eval_str_raw(self):
  self.assertEqual(eval(""" r'x' """), 'x')
@@ -166,7 +166,7 @@ def test_eval_bytes_invalid_escape(self):
  eval("b'''\n\\z'''")
  self.assertEqual(len(w), 1)
  self.assertEqual(w[0].filename, '<string>')
- self.assertEqual(w[0].lineno, 2)
+ self.assertEqual(w[0].lineno, 1)
 
  with warnings.catch_warnings(record=True) as w:
  warnings.simplefilter('error', category=SyntaxWarning)
@@ -175,7 +175,7 @@ def test_eval_bytes_invalid_escape(self):
  exc = cm.exception
  self.assertEqual(w, [])
  self.assertEqual(exc.filename, '<string>')
- self.assertEqual(exc.lineno, 2)
+ self.assertEqual(exc.lineno, 1)
 
  def test_eval_bytes_raw(self):
  self.assertEqual(eval(""" br'x' """), b'x')

diff --git a/Misc/ACKS b/Misc/ACKS
@@ -1844,3 +1844,4 @@ Gennadiy Zlobin
 Doug Zongker
 Peter Åstrand
 Zheao Li
+Carsten Klein
diff --git a/Misc/NEWS.d/next/Core and Builtins/2018-10-20-18-05-58.bpo-16806.zr3A9N.rst b/Misc/NEWS.d/next/Core and Builtins/2018-10-20-18-05-58.bpo-16806.zr3A9N.rst
@@ -0,0 +1 @@
+Fix ``lineno`` and ``col_offset`` for multi-line string tokens.
diff --git a/Parser/parsetok.c b/Parser/parsetok.c
@@ -205,6 +205,8 @@ parsetok(struct tok_state *tok, grammar *g, int start, perrdetail *err_ret,
  size_t len;
  char *str;
  col_offset = -1;
+ int lineno;
+ const char *line_start;
 
  type = PyTokenizer_Get(tok, &a, &b);
  if (type == ERRORTOKEN) {
@@ -253,8 +255,15 @@ parsetok(struct tok_state *tok, grammar *g, int start, perrdetail *err_ret,
  }
  }
 #endif
- if (a != NULL && a >= tok->line_start) {
- col_offset = Py_SAFE_DOWNCAST(a - tok->line_start,
+
+ /* Nodes of type STRING, especially multi line strings
+ must be handled differently in order to get both
+ the starting line number and the column offset right.
+ (cf. issue 16806) */
+ lineno = type == STRING ? tok->first_lineno : tok->lineno;
+ line_start = type == STRING ? tok->multi_line_start : tok->line_start;
+ if (a != NULL && a >= line_start) {
+ col_offset = Py_SAFE_DOWNCAST(a - line_start,
  intptr_t, int);
  }
  else {
@@ -263,7 +272,7 @@ parsetok(struct tok_state *tok, grammar *g, int start, perrdetail *err_ret,
 
  if ((err_ret->error =
  PyParser_AddToken(ps, (int)type, str,
- tok->lineno, col_offset,
+ lineno, col_offset,
  &(err_ret->expected))) != E_OK) {
  if (err_ret->error != E_DONE) {
  PyObject_FREE(str);

diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c
@@ -1519,6 +1519,13 @@ tok_get(struct tok_state *tok, char **p_start, char **p_end)
  int quote_size = 1; /* 1 or 3 */
  int end_quote_size = 0;
 
+ /* Nodes of type STRING, especially multi line strings
+ must be handled differently in order to get both
+ the starting line number and the column offset right.
+ (cf. issue 16806) */
+ tok->first_lineno = tok->lineno;
+ tok->multi_line_start = tok->line_start;
+
  /* Find the quote size and start of string */
  c = tok_nextc(tok);
  if (c == quote) {

diff --git a/Parser/tokenizer.h b/Parser/tokenizer.h
@@ -38,6 +38,8 @@ struct tok_state {
  int pendin; /* Pending indents (if > 0) or dedents (if < 0) */
  const char *prompt, *nextprompt; /* For interactive prompting */
  int lineno; /* Current line number */
+ int first_lineno; /* First line of a single line or multi line string
+ expression (cf. issue 16806) */
  int level; /* () [] {} Parentheses nesting level */
  /* Used to allow free continuations inside them */
 #ifndef PGEN
@@ -58,6 +60,9 @@ struct tok_state {
  char *encoding; /* Source encoding. */
  int cont_line; /* whether we are in a continuation line. */
  const char* line_start; /* pointer to start of current line */
+ const char* multi_line_start; /* pointer to start of first line of
+ a single line or multi line string
+ expression (cf. issue 16806) */
 #ifndef PGEN
  PyObject *decoding_readline; /* open(...).readline */
  PyObject *decoding_buffer;

diff --git a/Python/ast.c b/Python/ast.c
@@ -4282,9 +4282,13 @@ fstring_fix_node_location(const node *parent, node *n, char *expr_str)
  start--;
  }
  cols += (int)(substr - start);
- /* Fix lineno in mulitline strings. */
- while ((substr = strchr(substr + 1, '\n')))
- lines--;
+ /* adjust the start based on the number of newlines encountered
+ before the f-string expression */
+ for (char* p = parent->n_str; p < substr; p++) {
+ if (*p == '\n') {
+ lines++;
+ }
+ }
  }
  }
  fstring_shift_node_locations(n, lines, cols);

diff --git a/Python/importlib.h b/Python/importlib.h
diff --git a/Python/importlib_external.h b/Python/importlib_external.h
diff --git a/Python/importlib_zipimport.h b/Python/importlib_zipimport.h
@@ -484,7 +484,7 @@ const unsigned char _Py_M__zipimport[] = {
  64,0,0,0,114,65,0,0,0,114,78,0,0,0,114,82,
  0,0,0,114,83,0,0,0,114,9,0,0,0,114,9,0,
  0,0,114,9,0,0,0,114,10,0,0,0,114,4,0,0,
- 0,45,0,0,0,115,24,0,0,0,8,13,4,5,8,46,
+ 0,45,0,0,0,115,24,0,0,0,8,1,4,17,8,46,
  10,32,10,12,8,10,8,21,8,11,8,26,8,13,8,38,
  8,18,122,12,95,95,105,110,105,116,95,95,46,112,121,99,
  84,114,60,0,0,0,70,41,3,122,4,46,112,121,99,84,
@@ -1044,7 +1044,7 @@ const unsigned char _Py_M__zipimport[] = {
  34,0,0,0,114,182,0,0,0,114,183,0,0,0,114,184,
  0,0,0,114,189,0,0,0,114,9,0,0,0,114,9,0,
  0,0,114,9,0,0,0,114,10,0,0,0,114,80,0,0,
- 0,212,2,0,0,115,14,0,0,0,8,5,4,1,4,2,
+ 0,212,2,0,0,115,14,0,0,0,8,1,4,5,4,2,
  8,4,8,9,8,6,8,11,114,80,0,0,0,41,45,114,
  84,0,0,0,90,26,95,102,114,111,122,101,110,95,105,109,
  112,111,114,116,108,105,98,95,101,120,116,101,114,110,97,108,
@@ -1065,8 +1065,8 @@ const unsigned char _Py_M__zipimport[] = {
  0,0,114,170,0,0,0,114,152,0,0,0,114,150,0,0,
  0,114,44,0,0,0,114,80,0,0,0,114,9,0,0,0,
  114,9,0,0,0,114,9,0,0,0,114,10,0,0,0,218,
- 8,60,109,111,100,117,108,101,62,13,0,0,0,115,88,0,
- 0,0,4,4,8,1,16,1,8,1,8,1,8,1,8,1,
+ 8,60,109,111,100,117,108,101,62,1,0,0,0,115,88,0,
+ 0,0,4,16,8,1,16,1,8,1,8,1,8,1,8,1,
  8,1,8,2,8,3,6,1,14,3,16,4,4,2,8,2,
  4,1,4,1,4,2,14,127,0,127,0,1,12,1,12,1,
  2,1,2,252,4,9,8,4,8,9,8,31,8,126,2,254,
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		Fix ``lineno`` and ``col_offset`` for multi-line string tokens.