@@ -175,10 +175,20 @@ def transform(node: Node) -> str:
175175
176176 # `Repeat`.
177177 elif isinstance (node , Repeat ):
178- return "(?:%s){%i,%s}%s" % (
178+ if node .max_repeat is None :
179+ if node .min_repeat == 0 :
180+ repeat_sign = "*"
181+ elif node .min_repeat == 1 :
182+ repeat_sign = "+"
183+ else :
184+ repeat_sign = "{%i,%s}" % (
185+ node .min_repeat ,
186+ ("" if node .max_repeat is None else str (node .max_repeat )),
187+ )
188+
189+ return "(?:%s)%s%s" % (
179190 transform (node .childnode ),
180- node .min_repeat ,
181- ("" if node .max_repeat is None else str (node .max_repeat )),
191+ repeat_sign ,
182192 ("" if node .greedy else "?" ),
183193 )
184194 else :
@@ -194,39 +204,113 @@ def _transform_prefix(
194204 Yield all the regular expressions matching a prefix of the grammar
195205 defined by the `Node` instance.
196206
197- This can yield multiple expressions, because in the case of on OR
198- operation in the grammar, we can have another outcome depending on
199- which clause would appear first. E.g. "(A|B)C" is not the same as
200- "(B|A)C" because the regex engine is lazy and takes the first match.
201- However, because we the current input is actually a prefix of the
202- grammar which might not yet contain the data for "C", we need to know
203- both intermediate states, in order to call the appropriate
204- autocompletion for both cases.
207+ For each `Variable`, one regex pattern will be generated, with this
208+ named group at the end. This is required because a regex engine will
209+ terminate once a match is found. For autocompletion however, we need
210+ the matches for all possible paths, so that we can provide completions
211+ for each `Variable`.
212+
213+ - So, in the case of an `Any` (`A|B|C)', we generate a pattern for each
214+ clause. This is one for `A`, one for `B` and one for `C`. Unless some
215+ groups don't contain a `Variable`, then these can be merged together.
216+ - In the case of a `NodeSequence` (`ABC`), we generate a pattern for
217+ each prefix that ends with a variable, and one pattern for the whole
218+ sequence. So, that's one for `A`, one for `AB` and one for `ABC`.
205219
206220 :param root_node: The :class:`Node` instance for which we generate the grammar.
207221 :param create_group_func: A callable which takes a `Node` and returns the next
208222 free name for this node.
209223 """
210224
225+ def contains_variable (node : Node ) -> bool :
226+ if isinstance (node , Regex ):
227+ return False
228+ elif isinstance (node , Variable ):
229+ return True
230+ elif isinstance (node , (Lookahead , Repeat )):
231+ return contains_variable (node .childnode )
232+ elif isinstance (node , (NodeSequence , AnyNode )):
233+ return any (contains_variable (child ) for child in node .children )
234+
235+ return False
236+
211237 def transform (node : Node ) -> Iterable [str ]:
212- # Generate regexes for all permutations of this OR. Each node
213- # should be in front once.
238+ # Generate separate pattern for all terms that contain variables
239+ # within this OR. Terms that don't contain a variable can be merged
240+ # together in one pattern.
214241 if isinstance (node , AnyNode ):
242+ # If we have a definition like:
243+ # (?P<name> .*) | (?P<city> .*)
244+ # Then we want to be able to generate completions for both the
245+ # name as well as the city. We do this by yielding two
246+ # different regular expressions, because the engine won't
247+ # follow multiple paths, if multiple are possible.
248+ children_with_variable = []
249+ children_without_variable = []
215250 for c in node .children :
216- for r in transform (c ):
217- yield "(?:%s)?" % r
251+ if contains_variable (c ):
252+ children_with_variable .append (c )
253+ else :
254+ children_without_variable .append (c )
255+
256+ for c in children_with_variable :
257+ yield from transform (c )
218258
219- # For a sequence. We can either have a match for the sequence
220- # of all the children, or for an exact match of the first X
221- # children, followed by a partial match of the next children.
259+ # Merge options without variable together.
260+ if children_without_variable :
261+ yield "|" .join (
262+ r for c in children_without_variable for r in transform (c )
263+ )
264+
265+ # For a sequence, generate a pattern for each prefix that ends with
266+ # a variable + one pattern of the complete sequence.
267+ # (This is because, for autocompletion, we match the text before
268+ # the cursor, and completions are given for the variable that we
269+ # match right before the cursor.)
222270 elif isinstance (node , NodeSequence ):
271+ # For all components in the sequence, compute prefix patterns,
272+ # as well as full patterns.
273+ complete = [cls ._transform (c , create_group_func ) for c in node .children ]
274+ prefixes = [list (transform (c )) for c in node .children ]
275+ variable_nodes = [contains_variable (c ) for c in node .children ]
276+
277+ # If any child is contains a variable, we should yield a
278+ # pattern up to that point, so that we are sure this will be
279+ # matched.
223280 for i in range (len (node .children )):
224- a = [
225- cls ._transform (c , create_group_func ) for c in node .children [:i ]
226- ]
227-
228- for c_str in transform (node .children [i ]):
229- yield "(?:%s)" % ("" .join (a ) + c_str )
281+ if variable_nodes [i ]:
282+ for c_str in prefixes [i ]:
283+ yield "" .join (complete [:i ]) + c_str
284+
285+ # If there are non-variable nodes, merge all the prefixes into
286+ # one pattern. If the input is: "[part1] [part2] [part3]", then
287+ # this gets compiled into:
288+ # (complete1 + (complete2 + (complete3 | partial3) | partial2) | partial1 )
289+ # For nodes that contain a variable, we skip the "|partial"
290+ # part here, because thees are matched with the previous
291+ # patterns.
292+ if not all (variable_nodes ):
293+ result = []
294+
295+ # Start with complete patterns.
296+ for i in range (len (node .children )):
297+ result .append ("(?:" )
298+ result .append (complete [i ])
299+
300+ # Add prefix patterns.
301+ for i in range (len (node .children ) - 1 , - 1 , - 1 ):
302+ if variable_nodes [i ]:
303+ # No need to yield a prefix for this one, we did
304+ # the variable prefixes earlier.
305+ result .append (")" )
306+ else :
307+ result .append ("|(?:" )
308+ # If this yields multiple, we should yield all combinations.
309+ assert len (prefixes [i ]) == 1
310+ result .append (prefixes [i ][0 ])
311+ result .append ("))" )
312+
313+ yield "" .join (result )
230314
231315 elif isinstance (node , Regex ):
232316 yield "(?:%s)?" % node .regex
@@ -251,23 +335,26 @@ def transform(node: Node) -> Iterable[str]:
251335 # match, followed by a partial match.
252336 prefix = cls ._transform (node .childnode , create_group_func )
253337
254- for c_str in transform (node .childnode ):
255- if node .max_repeat :
256- repeat_sign = "{,%i}" % (node .max_repeat - 1 )
257- else :
258- repeat_sign = "*"
259- yield "(?:%s)%s%s(?:%s)?" % (
260- prefix ,
261- repeat_sign ,
262- ("" if node .greedy else "?" ),
263- c_str ,
264- )
338+ if node .max_repeat == 1 :
339+ yield from transform (node .childnode )
340+ else :
341+ for c_str in transform (node .childnode ):
342+ if node .max_repeat :
343+ repeat_sign = "{,%i}" % (node .max_repeat - 1 )
344+ else :
345+ repeat_sign = "*"
346+ yield "(?:%s)%s%s%s" % (
347+ prefix ,
348+ repeat_sign ,
349+ ("" if node .greedy else "?" ),
350+ c_str ,
351+ )
265352
266353 else :
267354 raise TypeError ("Got %r" % node )
268355
269356 for r in transform (root_node ):
270- yield "^%s $" % r
357+ yield "^(?:%s) $" % r
271358
272359 def match (self , string : str ) -> Optional ["Match" ]:
273360 """
@@ -303,6 +390,7 @@ def match_prefix(self, string: str) -> Optional["Match"]:
303390 return Match (
304391 string , matches2 , self ._group_names_to_nodes , self .unescape_funcs
305392 )
393+
306394 return None
307395
308396
@@ -343,7 +431,7 @@ def get_tuples() -> Iterable[Tuple[str, Tuple[int, int]]]:
343431
344432 def _nodes_to_values (self ) -> List [Tuple [str , str , Tuple [int , int ]]]:
345433 """
346- Returns list of list of (Node, string_value) tuples.
434+ Returns list of (Node, string_value) tuples.
347435 """
348436
349437 def is_none (sl : Tuple [int , int ]) -> bool :
0 commit comments