Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Pre-feed the parser with the location of the f-string expression, not the f-string itself, which allows us to skip the shifting of the AST node locations after the parsing is completed.
264 changes: 22 additions & 242 deletions Parser/pegen/parse_string.c
Original file line number Diff line number Diff line change
Expand Up @@ -276,243 +276,14 @@ _PyPegen_parsestr(Parser *p, int *bytesmode, int *rawmode, PyObject **result,

// FSTRING STUFF

static void fstring_shift_expr_locations(expr_ty n, int lineno, int col_offset);
static void fstring_shift_argument(expr_ty parent, arg_ty args, int lineno, int col_offset);


static inline void shift_expr(expr_ty parent, expr_ty n, int line, int col) {
if (n == NULL) {
return;
}
if (parent->lineno < n->lineno) {
col = 0;
}
fstring_shift_expr_locations(n, line, col);
}

static inline void shift_arg(expr_ty parent, arg_ty n, int line, int col) {
if (parent->lineno < n->lineno) {
col = 0;
}
fstring_shift_argument(parent, n, line, col);
}

static void fstring_shift_seq_locations(expr_ty parent, asdl_seq *seq, int lineno, int col_offset) {
for (Py_ssize_t i = 0, l = asdl_seq_LEN(seq); i < l; i++) {
expr_ty expr = asdl_seq_GET(seq, i);
if (expr == NULL){
continue;
}
shift_expr(parent, expr, lineno, col_offset);
}
}

static void fstring_shift_slice_locations(expr_ty parent, expr_ty slice, int lineno, int col_offset) {
switch (slice->kind) {
case Slice_kind:
if (slice->v.Slice.lower) {
shift_expr(parent, slice->v.Slice.lower, lineno, col_offset);
}
if (slice->v.Slice.upper) {
shift_expr(parent, slice->v.Slice.upper, lineno, col_offset);
}
if (slice->v.Slice.step) {
shift_expr(parent, slice->v.Slice.step, lineno, col_offset);
}
break;
case Tuple_kind:
fstring_shift_seq_locations(parent, slice->v.Tuple.elts, lineno, col_offset);
break;
default:
break;
}
}

static void fstring_shift_comprehension(expr_ty parent, comprehension_ty comp, int lineno, int col_offset) {
shift_expr(parent, comp->target, lineno, col_offset);
shift_expr(parent, comp->iter, lineno, col_offset);
fstring_shift_seq_locations(parent, comp->ifs, lineno, col_offset);
}

static void fstring_shift_argument(expr_ty parent, arg_ty arg, int lineno, int col_offset) {
if (arg->annotation != NULL){
shift_expr(parent, arg->annotation, lineno, col_offset);
}
arg->col_offset = arg->col_offset + col_offset;
arg->end_col_offset = arg->end_col_offset + col_offset;
arg->lineno = arg->lineno + lineno;
arg->end_lineno = arg->end_lineno + lineno;
}

static void fstring_shift_arguments(expr_ty parent, arguments_ty args, int lineno, int col_offset) {
for (Py_ssize_t i = 0, l = asdl_seq_LEN(args->posonlyargs); i < l; i++) {
arg_ty arg = asdl_seq_GET(args->posonlyargs, i);
shift_arg(parent, arg, lineno, col_offset);
}

for (Py_ssize_t i = 0, l = asdl_seq_LEN(args->args); i < l; i++) {
arg_ty arg = asdl_seq_GET(args->args, i);
shift_arg(parent, arg, lineno, col_offset);
}

if (args->vararg != NULL) {
shift_arg(parent, args->vararg, lineno, col_offset);
}

for (Py_ssize_t i = 0, l = asdl_seq_LEN(args->kwonlyargs); i < l; i++) {
arg_ty arg = asdl_seq_GET(args->kwonlyargs, i);
shift_arg(parent, arg, lineno, col_offset);
}

fstring_shift_seq_locations(parent, args->kw_defaults, lineno, col_offset);

if (args->kwarg != NULL) {
shift_arg(parent, args->kwarg, lineno, col_offset);
}

fstring_shift_seq_locations(parent, args->defaults, lineno, col_offset);
}

static void fstring_shift_children_locations(expr_ty node, int lineno, int col_offset) {
switch (node->kind) {
case BoolOp_kind:
fstring_shift_seq_locations(node, node->v.BoolOp.values, lineno, col_offset);
break;
case NamedExpr_kind:
shift_expr(node, node->v.NamedExpr.target, lineno, col_offset);
shift_expr(node, node->v.NamedExpr.value, lineno, col_offset);
break;
case BinOp_kind:
shift_expr(node, node->v.BinOp.left, lineno, col_offset);
shift_expr(node, node->v.BinOp.right, lineno, col_offset);
break;
case UnaryOp_kind:
shift_expr(node, node->v.UnaryOp.operand, lineno, col_offset);
break;
case Lambda_kind:
fstring_shift_arguments(node, node->v.Lambda.args, lineno, col_offset);
shift_expr(node, node->v.Lambda.body, lineno, col_offset);
break;
case IfExp_kind:
shift_expr(node, node->v.IfExp.test, lineno, col_offset);
shift_expr(node, node->v.IfExp.body, lineno, col_offset);
shift_expr(node, node->v.IfExp.orelse, lineno, col_offset);
break;
case Dict_kind:
fstring_shift_seq_locations(node, node->v.Dict.keys, lineno, col_offset);
fstring_shift_seq_locations(node, node->v.Dict.values, lineno, col_offset);
break;
case Set_kind:
fstring_shift_seq_locations(node, node->v.Set.elts, lineno, col_offset);
break;
case ListComp_kind:
shift_expr(node, node->v.ListComp.elt, lineno, col_offset);
for (Py_ssize_t i = 0, l = asdl_seq_LEN(node->v.ListComp.generators); i < l; i++) {
comprehension_ty comp = asdl_seq_GET(node->v.ListComp.generators, i);
fstring_shift_comprehension(node, comp, lineno, col_offset);
}
break;
case SetComp_kind:
shift_expr(node, node->v.SetComp.elt, lineno, col_offset);
for (Py_ssize_t i = 0, l = asdl_seq_LEN(node->v.SetComp.generators); i < l; i++) {
comprehension_ty comp = asdl_seq_GET(node->v.SetComp.generators, i);
fstring_shift_comprehension(node, comp, lineno, col_offset);
}
break;
case DictComp_kind:
shift_expr(node, node->v.DictComp.key, lineno, col_offset);
shift_expr(node, node->v.DictComp.value, lineno, col_offset);
for (Py_ssize_t i = 0, l = asdl_seq_LEN(node->v.DictComp.generators); i < l; i++) {
comprehension_ty comp = asdl_seq_GET(node->v.DictComp.generators, i);
fstring_shift_comprehension(node, comp, lineno, col_offset);
}
break;
case GeneratorExp_kind:
shift_expr(node, node->v.GeneratorExp.elt, lineno, col_offset);
for (Py_ssize_t i = 0, l = asdl_seq_LEN(node->v.GeneratorExp.generators); i < l; i++) {
comprehension_ty comp = asdl_seq_GET(node->v.GeneratorExp.generators, i);
fstring_shift_comprehension(node, comp, lineno, col_offset);
}
break;
case Await_kind:
shift_expr(node, node->v.Await.value, lineno, col_offset);
break;
case Yield_kind:
shift_expr(node, node->v.Yield.value, lineno, col_offset);
break;
case YieldFrom_kind:
shift_expr(node, node->v.YieldFrom.value, lineno, col_offset);
break;
case Compare_kind:
shift_expr(node, node->v.Compare.left, lineno, col_offset);
fstring_shift_seq_locations(node, node->v.Compare.comparators, lineno, col_offset);
break;
case Call_kind:
shift_expr(node, node->v.Call.func, lineno, col_offset);
fstring_shift_seq_locations(node, node->v.Call.args, lineno, col_offset);
for (Py_ssize_t i = 0, l = asdl_seq_LEN(node->v.Call.keywords); i < l; i++) {
keyword_ty keyword = asdl_seq_GET(node->v.Call.keywords, i);
shift_expr(node, keyword->value, lineno, col_offset);
}
break;
case Attribute_kind:
shift_expr(node, node->v.Attribute.value, lineno, col_offset);
break;
case Subscript_kind:
shift_expr(node, node->v.Subscript.value, lineno, col_offset);
fstring_shift_slice_locations(node, node->v.Subscript.slice, lineno, col_offset);
shift_expr(node, node->v.Subscript.slice, lineno, col_offset);
break;
case Starred_kind:
shift_expr(node, node->v.Starred.value, lineno, col_offset);
break;
case List_kind:
fstring_shift_seq_locations(node, node->v.List.elts, lineno, col_offset);
break;
case Tuple_kind:
fstring_shift_seq_locations(node, node->v.Tuple.elts, lineno, col_offset);
break;
case JoinedStr_kind:
fstring_shift_seq_locations(node, node->v.JoinedStr.values, lineno, col_offset);
break;
case FormattedValue_kind:
shift_expr(node, node->v.FormattedValue.value, lineno, col_offset);
if (node->v.FormattedValue.format_spec) {
shift_expr(node, node->v.FormattedValue.format_spec, lineno, col_offset);
}
break;
default:
return;
}
}

/* Shift locations for the given node and all its children by adding `lineno`
and `col_offset` to existing locations. Note that n is the already parsed
expression. */
static void fstring_shift_expr_locations(expr_ty n, int lineno, int col_offset)
{
n->col_offset = n->col_offset + col_offset;

// The following is needed, in order for nodes spanning across multiple lines
// to be shifted correctly. An example of such a node is a Call node, the closing
// parenthesis of which is not on the same line as its name.
if (n->lineno == n->end_lineno) {
n->end_col_offset = n->end_col_offset + col_offset;
}

fstring_shift_children_locations(n, lineno, col_offset);
n->lineno = n->lineno + lineno;
n->end_lineno = n->end_lineno + lineno;
}

/* Fix locations for the given node and its children.

`parent` is the enclosing node.
`n` is the node which locations are going to be fixed relative to parent.
`expr_str` is the child node's string representation, including braces.
*/
static void
fstring_fix_expr_location(Token *parent, expr_ty n, char *expr_str)
fstring_find_expr_location(Token *parent, char *expr_str, int *p_lines, int *p_cols)
{
char *substr = NULL;
char *start;
Expand Down Expand Up @@ -557,7 +328,8 @@ fstring_fix_expr_location(Token *parent, expr_ty n, char *expr_str)
}
}
}
fstring_shift_expr_locations(n, lines, cols);
*p_lines = lines;
*p_cols = cols;
}


Expand Down Expand Up @@ -603,11 +375,26 @@ fstring_compile_expr(Parser *p, const char *expr_start, const char *expr_end,
return NULL;
}

str[0] = '(';
// The call to fstring_find_expr_location is responsible for finding the column offset
// the generated AST nodes need to be shifted to the right, which is equal to the number
// of the f-string characters before the expression starts. In order to correctly compute
// this offset, strstr gets called in fstring_find_expr_location which only succeeds
// if curly braces appear before and after the f-string expression (exactly like they do
// in the f-string itself), hence the following lines.
str[0] = '{';
memcpy(str+1, expr_start, len);
str[len+1] = ')';
str[len+1] = '}';
str[len+2] = 0;

int lines, cols;
fstring_find_expr_location(t, str, &lines, &cols);

// The parentheses are needed in order to allow for leading whitespace withing
// the f-string expression. This consequently gets parsed as a group (see the
// group rule in python.gram).
str[0] = '(';
str[len+1] = ')';

struct tok_state* tok = PyTokenizer_FromString(str, 1);
if (tok == NULL) {
PyMem_Free(str);
Expand All @@ -618,21 +405,14 @@ fstring_compile_expr(Parser *p, const char *expr_start, const char *expr_end,

Parser *p2 = _PyPegen_Parser_New(tok, Py_fstring_input, p->flags, p->feature_version,
NULL, p->arena);
p2->starting_lineno = p->starting_lineno + p->tok->first_lineno - 1;
p2->starting_col_offset = p->tok->first_lineno == p->tok->lineno
? p->starting_col_offset + t->col_offset : 0;
p2->starting_lineno = t->lineno + lines - 1;
p2->starting_col_offset = p->tok->first_lineno == p->tok->lineno ? t->col_offset + cols : cols;

expr = _PyPegen_run_parser(p2);

if (expr == NULL) {
goto exit;
}

/* Reuse str to find the correct column offset. */
str[0] = '{';
str[len+1] = '}';
fstring_fix_expr_location(t, expr, str);

result = expr;

exit:
Expand Down
3 changes: 3 additions & 0 deletions Parser/pegen/pegen.c
Original file line number Diff line number Diff line change
Expand Up @@ -423,6 +423,9 @@ _PyPegen_raise_error_known_location(Parser *p, PyObject *errtype,
}
}

if (p->start_rule == Py_fstring_input) {
col_offset -= p->starting_col_offset;
}
Py_ssize_t col_number = col_offset;

if (p->tok->encoding != NULL) {
Expand Down
Loading