Skip to content

Commit 1f0f4ab

Browse files
authored
bpo-41076: Pre-feed the parser with the f-string expression location (pythonGH-21054)
This commit changes the parsing of f-string expressions with the new parser. The parser gets pre-fed with the location of the expression itself (not the f-string, which was what we were doing before). This allows us to completely skip the shifting of the AST nodes after the parsing is completed.
1 parent 89e82c4 commit 1f0f4ab

File tree

5 files changed

+2426
-2642
lines changed

5 files changed

+2426
-2642
lines changed
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Pre-feed the parser with the location of the f-string expression, not the f-string itself, which allows us to skip the shifting of the AST node locations after the parsing is completed.

Parser/pegen.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -423,6 +423,9 @@ _PyPegen_raise_error_known_location(Parser *p, PyObject *errtype,
423423
}
424424
}
425425

426+
if (p->start_rule == Py_fstring_input) {
427+
col_offset -= p->starting_col_offset;
428+
}
426429
Py_ssize_t col_number = col_offset;
427430

428431
if (p->tok->encoding != NULL) {

Parser/string_parser.c

Lines changed: 22 additions & 242 deletions
Original file line numberDiff line numberDiff line change
@@ -271,243 +271,14 @@ _PyPegen_parsestr(Parser *p, int *bytesmode, int *rawmode, PyObject **result,
271271

272272
// FSTRING STUFF
273273

274-
static void fstring_shift_expr_locations(expr_ty n, int lineno, int col_offset);
275-
static void fstring_shift_argument(expr_ty parent, arg_ty args, int lineno, int col_offset);
276-
277-
278-
static inline void shift_expr(expr_ty parent, expr_ty n, int line, int col) {
279-
if (n == NULL) {
280-
return;
281-
}
282-
if (parent->lineno < n->lineno) {
283-
col = 0;
284-
}
285-
fstring_shift_expr_locations(n, line, col);
286-
}
287-
288-
static inline void shift_arg(expr_ty parent, arg_ty n, int line, int col) {
289-
if (parent->lineno < n->lineno) {
290-
col = 0;
291-
}
292-
fstring_shift_argument(parent, n, line, col);
293-
}
294-
295-
static void fstring_shift_seq_locations(expr_ty parent, asdl_seq *seq, int lineno, int col_offset) {
296-
for (Py_ssize_t i = 0, l = asdl_seq_LEN(seq); i < l; i++) {
297-
expr_ty expr = asdl_seq_GET(seq, i);
298-
if (expr == NULL){
299-
continue;
300-
}
301-
shift_expr(parent, expr, lineno, col_offset);
302-
}
303-
}
304-
305-
static void fstring_shift_slice_locations(expr_ty parent, expr_ty slice, int lineno, int col_offset) {
306-
switch (slice->kind) {
307-
case Slice_kind:
308-
if (slice->v.Slice.lower) {
309-
shift_expr(parent, slice->v.Slice.lower, lineno, col_offset);
310-
}
311-
if (slice->v.Slice.upper) {
312-
shift_expr(parent, slice->v.Slice.upper, lineno, col_offset);
313-
}
314-
if (slice->v.Slice.step) {
315-
shift_expr(parent, slice->v.Slice.step, lineno, col_offset);
316-
}
317-
break;
318-
case Tuple_kind:
319-
fstring_shift_seq_locations(parent, slice->v.Tuple.elts, lineno, col_offset);
320-
break;
321-
default:
322-
break;
323-
}
324-
}
325-
326-
static void fstring_shift_comprehension(expr_ty parent, comprehension_ty comp, int lineno, int col_offset) {
327-
shift_expr(parent, comp->target, lineno, col_offset);
328-
shift_expr(parent, comp->iter, lineno, col_offset);
329-
fstring_shift_seq_locations(parent, comp->ifs, lineno, col_offset);
330-
}
331-
332-
static void fstring_shift_argument(expr_ty parent, arg_ty arg, int lineno, int col_offset) {
333-
if (arg->annotation != NULL){
334-
shift_expr(parent, arg->annotation, lineno, col_offset);
335-
}
336-
arg->col_offset = arg->col_offset + col_offset;
337-
arg->end_col_offset = arg->end_col_offset + col_offset;
338-
arg->lineno = arg->lineno + lineno;
339-
arg->end_lineno = arg->end_lineno + lineno;
340-
}
341-
342-
static void fstring_shift_arguments(expr_ty parent, arguments_ty args, int lineno, int col_offset) {
343-
for (Py_ssize_t i = 0, l = asdl_seq_LEN(args->posonlyargs); i < l; i++) {
344-
arg_ty arg = asdl_seq_GET(args->posonlyargs, i);
345-
shift_arg(parent, arg, lineno, col_offset);
346-
}
347-
348-
for (Py_ssize_t i = 0, l = asdl_seq_LEN(args->args); i < l; i++) {
349-
arg_ty arg = asdl_seq_GET(args->args, i);
350-
shift_arg(parent, arg, lineno, col_offset);
351-
}
352-
353-
if (args->vararg != NULL) {
354-
shift_arg(parent, args->vararg, lineno, col_offset);
355-
}
356-
357-
for (Py_ssize_t i = 0, l = asdl_seq_LEN(args->kwonlyargs); i < l; i++) {
358-
arg_ty arg = asdl_seq_GET(args->kwonlyargs, i);
359-
shift_arg(parent, arg, lineno, col_offset);
360-
}
361-
362-
fstring_shift_seq_locations(parent, args->kw_defaults, lineno, col_offset);
363-
364-
if (args->kwarg != NULL) {
365-
shift_arg(parent, args->kwarg, lineno, col_offset);
366-
}
367-
368-
fstring_shift_seq_locations(parent, args->defaults, lineno, col_offset);
369-
}
370-
371-
static void fstring_shift_children_locations(expr_ty node, int lineno, int col_offset) {
372-
switch (node->kind) {
373-
case BoolOp_kind:
374-
fstring_shift_seq_locations(node, node->v.BoolOp.values, lineno, col_offset);
375-
break;
376-
case NamedExpr_kind:
377-
shift_expr(node, node->v.NamedExpr.target, lineno, col_offset);
378-
shift_expr(node, node->v.NamedExpr.value, lineno, col_offset);
379-
break;
380-
case BinOp_kind:
381-
shift_expr(node, node->v.BinOp.left, lineno, col_offset);
382-
shift_expr(node, node->v.BinOp.right, lineno, col_offset);
383-
break;
384-
case UnaryOp_kind:
385-
shift_expr(node, node->v.UnaryOp.operand, lineno, col_offset);
386-
break;
387-
case Lambda_kind:
388-
fstring_shift_arguments(node, node->v.Lambda.args, lineno, col_offset);
389-
shift_expr(node, node->v.Lambda.body, lineno, col_offset);
390-
break;
391-
case IfExp_kind:
392-
shift_expr(node, node->v.IfExp.test, lineno, col_offset);
393-
shift_expr(node, node->v.IfExp.body, lineno, col_offset);
394-
shift_expr(node, node->v.IfExp.orelse, lineno, col_offset);
395-
break;
396-
case Dict_kind:
397-
fstring_shift_seq_locations(node, node->v.Dict.keys, lineno, col_offset);
398-
fstring_shift_seq_locations(node, node->v.Dict.values, lineno, col_offset);
399-
break;
400-
case Set_kind:
401-
fstring_shift_seq_locations(node, node->v.Set.elts, lineno, col_offset);
402-
break;
403-
case ListComp_kind:
404-
shift_expr(node, node->v.ListComp.elt, lineno, col_offset);
405-
for (Py_ssize_t i = 0, l = asdl_seq_LEN(node->v.ListComp.generators); i < l; i++) {
406-
comprehension_ty comp = asdl_seq_GET(node->v.ListComp.generators, i);
407-
fstring_shift_comprehension(node, comp, lineno, col_offset);
408-
}
409-
break;
410-
case SetComp_kind:
411-
shift_expr(node, node->v.SetComp.elt, lineno, col_offset);
412-
for (Py_ssize_t i = 0, l = asdl_seq_LEN(node->v.SetComp.generators); i < l; i++) {
413-
comprehension_ty comp = asdl_seq_GET(node->v.SetComp.generators, i);
414-
fstring_shift_comprehension(node, comp, lineno, col_offset);
415-
}
416-
break;
417-
case DictComp_kind:
418-
shift_expr(node, node->v.DictComp.key, lineno, col_offset);
419-
shift_expr(node, node->v.DictComp.value, lineno, col_offset);
420-
for (Py_ssize_t i = 0, l = asdl_seq_LEN(node->v.DictComp.generators); i < l; i++) {
421-
comprehension_ty comp = asdl_seq_GET(node->v.DictComp.generators, i);
422-
fstring_shift_comprehension(node, comp, lineno, col_offset);
423-
}
424-
break;
425-
case GeneratorExp_kind:
426-
shift_expr(node, node->v.GeneratorExp.elt, lineno, col_offset);
427-
for (Py_ssize_t i = 0, l = asdl_seq_LEN(node->v.GeneratorExp.generators); i < l; i++) {
428-
comprehension_ty comp = asdl_seq_GET(node->v.GeneratorExp.generators, i);
429-
fstring_shift_comprehension(node, comp, lineno, col_offset);
430-
}
431-
break;
432-
case Await_kind:
433-
shift_expr(node, node->v.Await.value, lineno, col_offset);
434-
break;
435-
case Yield_kind:
436-
shift_expr(node, node->v.Yield.value, lineno, col_offset);
437-
break;
438-
case YieldFrom_kind:
439-
shift_expr(node, node->v.YieldFrom.value, lineno, col_offset);
440-
break;
441-
case Compare_kind:
442-
shift_expr(node, node->v.Compare.left, lineno, col_offset);
443-
fstring_shift_seq_locations(node, node->v.Compare.comparators, lineno, col_offset);
444-
break;
445-
case Call_kind:
446-
shift_expr(node, node->v.Call.func, lineno, col_offset);
447-
fstring_shift_seq_locations(node, node->v.Call.args, lineno, col_offset);
448-
for (Py_ssize_t i = 0, l = asdl_seq_LEN(node->v.Call.keywords); i < l; i++) {
449-
keyword_ty keyword = asdl_seq_GET(node->v.Call.keywords, i);
450-
shift_expr(node, keyword->value, lineno, col_offset);
451-
}
452-
break;
453-
case Attribute_kind:
454-
shift_expr(node, node->v.Attribute.value, lineno, col_offset);
455-
break;
456-
case Subscript_kind:
457-
shift_expr(node, node->v.Subscript.value, lineno, col_offset);
458-
fstring_shift_slice_locations(node, node->v.Subscript.slice, lineno, col_offset);
459-
shift_expr(node, node->v.Subscript.slice, lineno, col_offset);
460-
break;
461-
case Starred_kind:
462-
shift_expr(node, node->v.Starred.value, lineno, col_offset);
463-
break;
464-
case List_kind:
465-
fstring_shift_seq_locations(node, node->v.List.elts, lineno, col_offset);
466-
break;
467-
case Tuple_kind:
468-
fstring_shift_seq_locations(node, node->v.Tuple.elts, lineno, col_offset);
469-
break;
470-
case JoinedStr_kind:
471-
fstring_shift_seq_locations(node, node->v.JoinedStr.values, lineno, col_offset);
472-
break;
473-
case FormattedValue_kind:
474-
shift_expr(node, node->v.FormattedValue.value, lineno, col_offset);
475-
if (node->v.FormattedValue.format_spec) {
476-
shift_expr(node, node->v.FormattedValue.format_spec, lineno, col_offset);
477-
}
478-
break;
479-
default:
480-
return;
481-
}
482-
}
483-
484-
/* Shift locations for the given node and all its children by adding `lineno`
485-
and `col_offset` to existing locations. Note that n is the already parsed
486-
expression. */
487-
static void fstring_shift_expr_locations(expr_ty n, int lineno, int col_offset)
488-
{
489-
n->col_offset = n->col_offset + col_offset;
490-
491-
// The following is needed, in order for nodes spanning across multiple lines
492-
// to be shifted correctly. An example of such a node is a Call node, the closing
493-
// parenthesis of which is not on the same line as its name.
494-
if (n->lineno == n->end_lineno) {
495-
n->end_col_offset = n->end_col_offset + col_offset;
496-
}
497-
498-
fstring_shift_children_locations(n, lineno, col_offset);
499-
n->lineno = n->lineno + lineno;
500-
n->end_lineno = n->end_lineno + lineno;
501-
}
502-
503274
/* Fix locations for the given node and its children.
504275
505276
`parent` is the enclosing node.
506277
`n` is the node which locations are going to be fixed relative to parent.
507278
`expr_str` is the child node's string representation, including braces.
508279
*/
509280
static void
510-
fstring_fix_expr_location(Token *parent, expr_ty n, char *expr_str)
281+
fstring_find_expr_location(Token *parent, char *expr_str, int *p_lines, int *p_cols)
511282
{
512283
char *substr = NULL;
513284
char *start;
@@ -552,7 +323,8 @@ fstring_fix_expr_location(Token *parent, expr_ty n, char *expr_str)
552323
}
553324
}
554325
}
555-
fstring_shift_expr_locations(n, lines, cols);
326+
*p_lines = lines;
327+
*p_cols = cols;
556328
}
557329

558330

@@ -598,11 +370,26 @@ fstring_compile_expr(Parser *p, const char *expr_start, const char *expr_end,
598370
return NULL;
599371
}
600372

601-
str[0] = '(';
373+
// The call to fstring_find_expr_location is responsible for finding the column offset
374+
// the generated AST nodes need to be shifted to the right, which is equal to the number
375+
// of the f-string characters before the expression starts. In order to correctly compute
376+
// this offset, strstr gets called in fstring_find_expr_location which only succeeds
377+
// if curly braces appear before and after the f-string expression (exactly like they do
378+
// in the f-string itself), hence the following lines.
379+
str[0] = '{';
602380
memcpy(str+1, expr_start, len);
603-
str[len+1] = ')';
381+
str[len+1] = '}';
604382
str[len+2] = 0;
605383

384+
int lines, cols;
385+
fstring_find_expr_location(t, str, &lines, &cols);
386+
387+
// The parentheses are needed in order to allow for leading whitespace withing
388+
// the f-string expression. This consequently gets parsed as a group (see the
389+
// group rule in python.gram).
390+
str[0] = '(';
391+
str[len+1] = ')';
392+
606393
struct tok_state* tok = PyTokenizer_FromString(str, 1);
607394
if (tok == NULL) {
608395
PyMem_Free(str);
@@ -613,21 +400,14 @@ fstring_compile_expr(Parser *p, const char *expr_start, const char *expr_end,
613400

614401
Parser *p2 = _PyPegen_Parser_New(tok, Py_fstring_input, p->flags, p->feature_version,
615402
NULL, p->arena);
616-
p2->starting_lineno = p->starting_lineno + p->tok->first_lineno - 1;
617-
p2->starting_col_offset = p->tok->first_lineno == p->tok->lineno
618-
? p->starting_col_offset + t->col_offset : 0;
403+
p2->starting_lineno = t->lineno + lines - 1;
404+
p2->starting_col_offset = p->tok->first_lineno == p->tok->lineno ? t->col_offset + cols : cols;
619405

620406
expr = _PyPegen_run_parser(p2);
621407

622408
if (expr == NULL) {
623409
goto exit;
624410
}
625-
626-
/* Reuse str to find the correct column offset. */
627-
str[0] = '{';
628-
str[len+1] = '}';
629-
fstring_fix_expr_location(t, expr, str);
630-
631411
result = expr;
632412

633413
exit:

0 commit comments

Comments
 (0)