1- use std:: { ops:: Range , println } ;
1+ use std:: ops:: Range ;
22
33use crate :: {
44 codegen:: { get_nodes, Node , SyntaxKind } ,
55 lexer:: TokenType ,
66} ;
7+ use log:: debug;
78use petgraph:: {
89 stable_graph:: { DefaultIx , NodeIndex , StableGraph } ,
910 visit:: Bfs ,
@@ -18,6 +19,12 @@ pub fn libpg_query_node(parser: &mut Parser, node: NodeEnum, token_range: &Range
1819}
1920
2021pub static SKIPPABLE_TOKENS : & [ SyntaxKind ] = & [
22+ // "("
23+ SyntaxKind :: Ascii40 ,
24+ // ")"
25+ SyntaxKind :: Ascii41 ,
26+ // ","
27+ SyntaxKind :: Ascii44 ,
2128 // "."
2229 SyntaxKind :: Ascii46 ,
2330 // ";"
@@ -38,8 +45,8 @@ impl<'p> LibpgQueryNodeParser<'p> {
3845 node : NodeEnum ,
3946 token_range : & ' p Range < usize > ,
4047 ) -> LibpgQueryNodeParser < ' p > {
41- println ! ( "creating libpg_query_node_parser for node {:#?}" , node) ;
4248 let current_depth = parser. depth . clone ( ) ;
49+ debug ! ( "Parsing node {:#?}" , node) ;
4350 Self {
4451 parser,
4552 token_range,
@@ -52,17 +59,12 @@ impl<'p> LibpgQueryNodeParser<'p> {
5259 pub fn parse ( & mut self ) {
5360 while self . parser . pos < self . token_range . end {
5461 dbg ! ( & self . node_graph) ;
55- println ! ( "current node: {:?}" , self . current_node) ;
56- println ! ( "current token: {:?}" , self . current_token( ) ) ;
62+ debug ! ( "current node: {:# ?}" , self . current_node) ;
63+ debug ! ( "current token: {:# ?}" , self . current_token( ) ) ;
5764 if self . at_whitespace ( ) || self . at_skippable ( ) {
58- println ! (
59- "skipping token because whitespace {:?} or skippable {:?}" ,
60- self . at_whitespace( ) ,
61- self . at_skippable( )
62- ) ;
6365 self . parser . advance ( ) ;
6466 } else if let Some ( idx) = self . node_properties_position ( self . current_node ) {
65- println ! ( "found in current node {:?}" , self . current_node) ;
67+ println ! ( "found property at current node {:?}" , self . current_node) ;
6668 // token is in current node. remove and advance.
6769 // open if not opened yet.
6870 if !self . node_is_open ( & self . current_node ) {
@@ -71,7 +73,7 @@ impl<'p> LibpgQueryNodeParser<'p> {
7173 self . remove_property ( self . current_node , idx) ;
7274 self . parser . advance ( ) ;
7375 } else if let Some ( ( node_idx, prop_idx) ) = self . search_children_properties ( ) {
74- println ! ( "found in properties of {:?}" , node_idx) ;
76+ println ! ( "found property within children node {:?}" , node_idx) ;
7577 self . remove_property ( node_idx, prop_idx) ;
7678
7779 // close all nodes until the target depth is reached
@@ -97,11 +99,10 @@ impl<'p> LibpgQueryNodeParser<'p> {
9799 self . parser . advance ( ) ;
98100
99101 self . current_node = node_idx;
100- println ! ( "setting current node to: {:?}" , node_idx) ;
101102
102103 self . finish_open_leaf_nodes ( ) ;
103104 } else if let Some ( ( node_idx, prop_idx) ) = self . search_parent_properties ( ) {
104- println ! ( "found in properties of parent {:?}" , node_idx) ;
105+ println ! ( "found property within parent node {:?}" , node_idx) ;
105106 self . remove_property ( node_idx, prop_idx) ;
106107
107108 self . finish_nodes_until_depth ( self . node_graph [ node_idx] . depth + 1 ) ;
@@ -112,18 +113,11 @@ impl<'p> LibpgQueryNodeParser<'p> {
112113
113114 // set the current node to the deepest node (looking up from the current node) that has at least one children
114115 // has_children is true if there are outgoing neighbors
115- println ! ( "setting current node deepest node with at least one children starting from: {:?}" , node_idx) ;
116116 if self . has_children ( & node_idx) {
117- println ! (
118- "node {:?} has children, setting it as current node" ,
119- node_idx
120- ) ;
121117 self . current_node = node_idx;
122118 } else {
123119 for a in self . ancestors ( Some ( node_idx) ) {
124- println ! ( "checking node {:?}" , a) ;
125120 if self . has_children ( & a) {
126- println ! ( "node {:?} has children, breaking" , a) ;
127121 self . current_node = a;
128122 break ;
129123 }
@@ -133,14 +127,14 @@ impl<'p> LibpgQueryNodeParser<'p> {
133127 self . parser . advance ( ) ;
134128 } else {
135129 panic ! (
136- "could not find node for token {:?} at depth {}" ,
130+ "could not find node for token {:?} at depth {} in {:#?} " ,
137131 self . current_token( ) ,
138- self . parser. depth
132+ self . parser. depth,
133+ self . node_graph
139134 ) ;
140135 }
141136 }
142137 // close all remaining nodes
143- println ! ( "closing remaining nodes" ) ;
144138 for _ in 0 ..self . open_nodes . len ( ) {
145139 self . finish_node ( ) ;
146140 }
@@ -209,22 +203,34 @@ impl<'p> LibpgQueryNodeParser<'p> {
209203 None
210204 }
211205
206+ /// check if the current node has children that have properties that are in the part of the token stream that is not yet consumed
207+ fn has_children_with_relevant_properties ( & self ) -> bool {
208+ let tokens = & self . parser . tokens [ self . parser . pos ..self . token_range . end ] ;
209+ let mut b = Bfs :: new ( & self . node_graph , self . current_node ) ;
210+ while let Some ( nx) = b. next ( & self . node_graph ) {
211+ if self . node_graph [ nx]
212+ . properties
213+ . iter ( )
214+ . any ( |p| tokens. iter ( ) . any ( |t| cmp_tokens ( p, t) ) )
215+ {
216+ return true ;
217+ }
218+ }
219+ false
220+ }
221+
212222 /// finish current node while it is an open leaf node with no properties
213223 fn finish_open_leaf_nodes ( & mut self ) {
214- let tokens = self
215- . parser
216- . tokens
217- . get ( self . token_range . clone ( ) )
218- . unwrap ( )
219- . to_vec ( ) ;
220224 while self
221225 . node_graph
222226 . neighbors_directed ( self . current_node , Direction :: Outgoing )
223227 . count ( )
224228 == 0
229+ || !self . has_children_with_relevant_properties ( )
225230 {
226- // check if the node contains properties that are not at all in the token stream and remove them
231+ // check if the node contains properties that are not at all in the part of the token stream that is not yet consumed and remove them
227232 if self . node_graph [ self . current_node ] . properties . len ( ) > 0 {
233+ let tokens = & self . parser . tokens [ self . parser . pos ..self . token_range . end ] ;
228234 self . node_graph [ self . current_node ]
229235 . properties
230236 . retain ( |p| tokens. iter ( ) . any ( |t| cmp_tokens ( p, t) ) ) ;
@@ -239,10 +245,6 @@ impl<'p> LibpgQueryNodeParser<'p> {
239245 break ;
240246 }
241247 self . current_node = self . open_nodes . last ( ) . unwrap ( ) . clone ( ) ;
242- println ! (
243- "finish open leafes: set current node to: {:?}" ,
244- self . current_node
245- ) ;
246248 }
247249 }
248250
@@ -271,7 +273,6 @@ impl<'p> LibpgQueryNodeParser<'p> {
271273 }
272274
273275 fn finish_node ( & mut self ) {
274- println ! ( "finishing node {:?}" , self . open_nodes. last( ) ) ;
275276 self . node_graph . remove_node ( self . open_nodes . pop ( ) . unwrap ( ) ) ;
276277 self . parser . finish_node ( ) ;
277278 }
@@ -306,9 +307,36 @@ impl<'p> LibpgQueryNodeParser<'p> {
306307 }
307308}
308309
310+ /// list of aliases from https://www.postgresql.org/docs/current/datatype.html
311+ const ALIASES : [ & [ & str ] ; 2 ] = [ & [ "integer" , "int" , "int4" ] , & [ "real" , "float4" ] ] ;
312+
309313fn cmp_tokens ( p : & crate :: codegen:: TokenProperty , token : & crate :: lexer:: Token ) -> bool {
310- ( !p. value . is_some ( ) || p. value . as_ref ( ) . unwrap ( ) == & token. text )
311- && ( !p. kind . is_some ( ) || p. kind . unwrap ( ) == token. kind )
314+ // TokenProperty has always either value or kind set
315+ assert ! ( p. value. is_some( ) || p. kind. is_some( ) ) ;
316+
317+ // TODO: move this to lexer
318+
319+ // remove enclosing ' quotes from token text
320+ let string_delimiter: & [ char ; 2 ] = & [ '\'' , '$' ] ;
321+ let token_text = token
322+ . text
323+ . trim_start_matches ( string_delimiter)
324+ . trim_end_matches ( string_delimiter)
325+ . to_string ( ) ;
326+ let token_text_values = aliases ( & token_text) ;
327+
328+ ( p. value . is_none ( ) || token_text_values. contains ( & p. value . as_ref ( ) . unwrap ( ) . as_str ( ) ) )
329+ && ( p. kind . is_none ( ) || p. kind . unwrap ( ) == token. kind )
330+ }
331+
332+ /// returns a list of aliases for a string. primarily used for data types.
333+ fn aliases ( text : & str ) -> Vec < & str > {
334+ for alias in ALIASES {
335+ if alias. contains ( & text) {
336+ return alias. to_vec ( ) ;
337+ }
338+ }
339+ return vec ! [ text] ;
312340}
313341
314342/// Custom iterator for walking ancestors of a node until the root of the tree is reached
0 commit comments