Skip to content

Commit 724c1ca

Browse files
committed
minor fixes
1 parent 44c416f commit 724c1ca

File tree

6 files changed

+162
-67
lines changed

6 files changed

+162
-67
lines changed

crates/codegen/src/get_node_properties.rs

Lines changed: 37 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -156,6 +156,10 @@ fn custom_handlers(node: &Node) -> TokenStream {
156156
if n.where_clause.is_some() {
157157
tokens.push(TokenProperty::from(Token::Where));
158158
}
159+
if n.group_clause.len() > 0 {
160+
tokens.push(TokenProperty::from(Token::GroupP));
161+
tokens.push(TokenProperty::from(Token::By));
162+
}
159163
},
160164
"Integer" => quote! {
161165
tokens.push(TokenProperty::from(n));
@@ -174,8 +178,19 @@ fn custom_handlers(node: &Node) -> TokenStream {
174178
tokens.push(TokenProperty::from(Token::Ascii42));
175179
},
176180
"FuncCall" => quote! {
181+
if n.funcname.len() == 1 && n.args.len() == 0 {
182+
// check if count(*)
183+
if let Some(node) = &n.funcname[0].node {
184+
if let NodeEnum::String(n) = node {
185+
if n.sval == "count" {
186+
tokens.push(TokenProperty::from(Token::Ascii42));
187+
}
188+
}
189+
}
190+
}
177191
if n.agg_filter.is_some() {
178192
tokens.push(TokenProperty::from(Token::Filter));
193+
tokens.push(TokenProperty::from(Token::Where));
179194
}
180195
},
181196
"SqlvalueFunction" => quote! {
@@ -218,7 +233,6 @@ fn custom_handlers(node: &Node) -> TokenStream {
218233
tokens.push(TokenProperty::from(Token::Table));
219234
},
220235
"AlterTableCmd" => quote! {
221-
println!("AlterTableCmd {:#?}", n);
222236
tokens.push(TokenProperty::from(Token::Alter));
223237
match n.subtype {
224238
4 => {
@@ -229,12 +243,34 @@ fn custom_handlers(node: &Node) -> TokenStream {
229243
_ => panic!("Unknown AlterTableCmd {:#?}", n.subtype),
230244
}
231245
},
246+
"CopyStmt" => quote! {
247+
tokens.push(TokenProperty::from(Token::Copy));
248+
tokens.push(TokenProperty::from(Token::From));
249+
},
232250
"RenameStmt" => quote! {
233251
tokens.push(TokenProperty::from(Token::Alter));
234252
tokens.push(TokenProperty::from(Token::Table));
235253
tokens.push(TokenProperty::from(Token::Rename));
236254
tokens.push(TokenProperty::from(Token::To));
237255
},
256+
"Constraint" => quote! {
257+
match n.contype {
258+
10 => {
259+
// ConstrForeign
260+
tokens.push(TokenProperty::from(Token::References));
261+
},
262+
_ => panic!("Unknown Constraint {:#?}", n.contype),
263+
}
264+
},
265+
"CreateStmt" => quote! {
266+
tokens.push(TokenProperty::from(Token::Create));
267+
tokens.push(TokenProperty::from(Token::Table));
268+
if n.if_not_exists {
269+
tokens.push(TokenProperty::from(Token::IfP));
270+
tokens.push(TokenProperty::from(Token::Not));
271+
tokens.push(TokenProperty::from(Token::Exists));
272+
}
273+
},
238274
_ => quote! {},
239275
}
240276
}

crates/parser/src/lexer.rs

Lines changed: 25 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
use std::{collections::VecDeque, sync::LazyLock};
22

3-
use pg_query::protobuf::KeywordKind;
3+
use pg_query::protobuf::{KeywordKind, ScanToken};
44
use regex::Regex;
55

66
use cstree::text::{TextRange, TextSize};
@@ -17,14 +17,18 @@ pub enum TokenType {
1717
ReservedKeyword,
1818
}
1919

20-
impl From<KeywordKind> for TokenType {
21-
fn from(kind: KeywordKind) -> TokenType {
22-
match kind {
23-
KeywordKind::NoKeyword => TokenType::NoKeyword,
24-
KeywordKind::UnreservedKeyword => TokenType::UnreservedKeyword,
25-
KeywordKind::ColNameKeyword => TokenType::ColNameKeyword,
26-
KeywordKind::TypeFuncNameKeyword => TokenType::TypeFuncNameKeyword,
27-
KeywordKind::ReservedKeyword => TokenType::ReservedKeyword,
20+
impl From<&ScanToken> for TokenType {
21+
fn from(token: &ScanToken) -> TokenType {
22+
match token.token {
23+
// SqlComment
24+
275 => TokenType::Whitespace,
25+
_ => match token.keyword_kind() {
26+
KeywordKind::NoKeyword => TokenType::NoKeyword,
27+
KeywordKind::UnreservedKeyword => TokenType::UnreservedKeyword,
28+
KeywordKind::ColNameKeyword => TokenType::ColNameKeyword,
29+
KeywordKind::TypeFuncNameKeyword => TokenType::TypeFuncNameKeyword,
30+
KeywordKind::ReservedKeyword => TokenType::ReservedKeyword,
31+
},
2832
}
2933
}
3034
}
@@ -38,7 +42,7 @@ pub struct Token {
3842
}
3943

4044
static PATTERN_LEXER: LazyLock<Regex> =
41-
LazyLock::new(|| Regex::new(r"(?P<whitespace> )|(?P<newline>\n)").unwrap());
45+
LazyLock::new(|| Regex::new(r"(?P<whitespace> )|(?P<newline>\n)|(?P<tab>\t)").unwrap());
4246

4347
fn whitespace_tokens(input: &str) -> VecDeque<Token> {
4448
let mut tokens = VecDeque::new();
@@ -64,6 +68,16 @@ fn whitespace_tokens(input: &str) -> VecDeque<Token> {
6468
TextSize::from(u32::try_from(newline.end()).unwrap()),
6569
),
6670
});
71+
} else if let Some(tab) = cap.name("tab") {
72+
tokens.push_back(Token {
73+
token_type: TokenType::Whitespace,
74+
kind: SyntaxKind::Newline,
75+
text: tab.as_str().to_string(),
76+
span: TextRange::new(
77+
TextSize::from(u32::try_from(tab.start()).unwrap()),
78+
TextSize::from(u32::try_from(tab.end()).unwrap()),
79+
),
80+
});
6781
} else {
6882
panic!("No match");
6983
};
@@ -103,9 +117,7 @@ pub fn lex(text: &str) -> Vec<Token> {
103117
let len = token_text.len();
104118
let has_whitespace = token_text.contains(" ") || token_text.contains("\n");
105119
tokens.push(Token {
106-
token_type: TokenType::from(
107-
KeywordKind::from_i32(pg_query_token.keyword_kind).unwrap(),
108-
),
120+
token_type: TokenType::from(&pg_query_token),
109121
kind: SyntaxKind::from(&pg_query_token),
110122
text: token_text,
111123
span: TextRange::new(

crates/parser/src/parse/libpg_query_node.rs

Lines changed: 65 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,10 @@
1-
use std::{ops::Range, println};
1+
use std::ops::Range;
22

33
use crate::{
44
codegen::{get_nodes, Node, SyntaxKind},
55
lexer::TokenType,
66
};
7+
use log::debug;
78
use petgraph::{
89
stable_graph::{DefaultIx, NodeIndex, StableGraph},
910
visit::Bfs,
@@ -18,6 +19,12 @@ pub fn libpg_query_node(parser: &mut Parser, node: NodeEnum, token_range: &Range
1819
}
1920

2021
pub static SKIPPABLE_TOKENS: &[SyntaxKind] = &[
22+
// "("
23+
SyntaxKind::Ascii40,
24+
// ")"
25+
SyntaxKind::Ascii41,
26+
// ","
27+
SyntaxKind::Ascii44,
2128
// "."
2229
SyntaxKind::Ascii46,
2330
// ";"
@@ -38,8 +45,8 @@ impl<'p> LibpgQueryNodeParser<'p> {
3845
node: NodeEnum,
3946
token_range: &'p Range<usize>,
4047
) -> LibpgQueryNodeParser<'p> {
41-
println!("creating libpg_query_node_parser for node {:#?}", node);
4248
let current_depth = parser.depth.clone();
49+
debug!("Parsing node {:#?}", node);
4350
Self {
4451
parser,
4552
token_range,
@@ -52,17 +59,12 @@ impl<'p> LibpgQueryNodeParser<'p> {
5259
pub fn parse(&mut self) {
5360
while self.parser.pos < self.token_range.end {
5461
dbg!(&self.node_graph);
55-
println!("current node: {:?}", self.current_node);
56-
println!("current token: {:?}", self.current_token());
62+
debug!("current node: {:#?}", self.current_node);
63+
debug!("current token: {:#?}", self.current_token());
5764
if self.at_whitespace() || self.at_skippable() {
58-
println!(
59-
"skipping token because whitespace {:?} or skippable {:?}",
60-
self.at_whitespace(),
61-
self.at_skippable()
62-
);
6365
self.parser.advance();
6466
} else if let Some(idx) = self.node_properties_position(self.current_node) {
65-
println!("found in current node {:?}", self.current_node);
67+
println!("found property at current node {:?}", self.current_node);
6668
// token is in current node. remove and advance.
6769
// open if not opened yet.
6870
if !self.node_is_open(&self.current_node) {
@@ -71,7 +73,7 @@ impl<'p> LibpgQueryNodeParser<'p> {
7173
self.remove_property(self.current_node, idx);
7274
self.parser.advance();
7375
} else if let Some((node_idx, prop_idx)) = self.search_children_properties() {
74-
println!("found in properties of {:?}", node_idx);
76+
println!("found property within children node {:?}", node_idx);
7577
self.remove_property(node_idx, prop_idx);
7678

7779
// close all nodes until the target depth is reached
@@ -97,11 +99,10 @@ impl<'p> LibpgQueryNodeParser<'p> {
9799
self.parser.advance();
98100

99101
self.current_node = node_idx;
100-
println!("setting current node to: {:?}", node_idx);
101102

102103
self.finish_open_leaf_nodes();
103104
} else if let Some((node_idx, prop_idx)) = self.search_parent_properties() {
104-
println!("found in properties of parent {:?}", node_idx);
105+
println!("found property within parent node {:?}", node_idx);
105106
self.remove_property(node_idx, prop_idx);
106107

107108
self.finish_nodes_until_depth(self.node_graph[node_idx].depth + 1);
@@ -112,18 +113,11 @@ impl<'p> LibpgQueryNodeParser<'p> {
112113

113114
// set the current node to the deepest node (looking up from the current node) that has at least one children
114115
// has_children is true if there are outgoing neighbors
115-
println!("setting current node deepest node with at least one children starting from: {:?}", node_idx);
116116
if self.has_children(&node_idx) {
117-
println!(
118-
"node {:?} has children, setting it as current node",
119-
node_idx
120-
);
121117
self.current_node = node_idx;
122118
} else {
123119
for a in self.ancestors(Some(node_idx)) {
124-
println!("checking node {:?}", a);
125120
if self.has_children(&a) {
126-
println!("node {:?} has children, breaking", a);
127121
self.current_node = a;
128122
break;
129123
}
@@ -133,14 +127,14 @@ impl<'p> LibpgQueryNodeParser<'p> {
133127
self.parser.advance();
134128
} else {
135129
panic!(
136-
"could not find node for token {:?} at depth {}",
130+
"could not find node for token {:?} at depth {} in {:#?}",
137131
self.current_token(),
138-
self.parser.depth
132+
self.parser.depth,
133+
self.node_graph
139134
);
140135
}
141136
}
142137
// close all remaining nodes
143-
println!("closing remaining nodes");
144138
for _ in 0..self.open_nodes.len() {
145139
self.finish_node();
146140
}
@@ -209,22 +203,34 @@ impl<'p> LibpgQueryNodeParser<'p> {
209203
None
210204
}
211205

206+
/// check if the current node has children that have properties that are in the part of the token stream that is not yet consumed
207+
fn has_children_with_relevant_properties(&self) -> bool {
208+
let tokens = &self.parser.tokens[self.parser.pos..self.token_range.end];
209+
let mut b = Bfs::new(&self.node_graph, self.current_node);
210+
while let Some(nx) = b.next(&self.node_graph) {
211+
if self.node_graph[nx]
212+
.properties
213+
.iter()
214+
.any(|p| tokens.iter().any(|t| cmp_tokens(p, t)))
215+
{
216+
return true;
217+
}
218+
}
219+
false
220+
}
221+
212222
/// finish current node while it is an open leaf node with no properties
213223
fn finish_open_leaf_nodes(&mut self) {
214-
let tokens = self
215-
.parser
216-
.tokens
217-
.get(self.token_range.clone())
218-
.unwrap()
219-
.to_vec();
220224
while self
221225
.node_graph
222226
.neighbors_directed(self.current_node, Direction::Outgoing)
223227
.count()
224228
== 0
229+
|| !self.has_children_with_relevant_properties()
225230
{
226-
// check if the node contains properties that are not at all in the token stream and remove them
231+
// check if the node contains properties that are not at all in the part of the token stream that is not yet consumed and remove them
227232
if self.node_graph[self.current_node].properties.len() > 0 {
233+
let tokens = &self.parser.tokens[self.parser.pos..self.token_range.end];
228234
self.node_graph[self.current_node]
229235
.properties
230236
.retain(|p| tokens.iter().any(|t| cmp_tokens(p, t)));
@@ -239,10 +245,6 @@ impl<'p> LibpgQueryNodeParser<'p> {
239245
break;
240246
}
241247
self.current_node = self.open_nodes.last().unwrap().clone();
242-
println!(
243-
"finish open leafes: set current node to: {:?}",
244-
self.current_node
245-
);
246248
}
247249
}
248250

@@ -271,7 +273,6 @@ impl<'p> LibpgQueryNodeParser<'p> {
271273
}
272274

273275
fn finish_node(&mut self) {
274-
println!("finishing node {:?}", self.open_nodes.last());
275276
self.node_graph.remove_node(self.open_nodes.pop().unwrap());
276277
self.parser.finish_node();
277278
}
@@ -306,9 +307,36 @@ impl<'p> LibpgQueryNodeParser<'p> {
306307
}
307308
}
308309

310+
/// list of aliases from https://www.postgresql.org/docs/current/datatype.html
311+
const ALIASES: [&[&str]; 2] = [&["integer", "int", "int4"], &["real", "float4"]];
312+
309313
fn cmp_tokens(p: &crate::codegen::TokenProperty, token: &crate::lexer::Token) -> bool {
310-
(!p.value.is_some() || p.value.as_ref().unwrap() == &token.text)
311-
&& (!p.kind.is_some() || p.kind.unwrap() == token.kind)
314+
// TokenProperty has always either value or kind set
315+
assert!(p.value.is_some() || p.kind.is_some());
316+
317+
// TODO: move this to lexer
318+
319+
// remove enclosing ' quotes from token text
320+
let string_delimiter: &[char; 2] = &['\'', '$'];
321+
let token_text = token
322+
.text
323+
.trim_start_matches(string_delimiter)
324+
.trim_end_matches(string_delimiter)
325+
.to_string();
326+
let token_text_values = aliases(&token_text);
327+
328+
(p.value.is_none() || token_text_values.contains(&p.value.as_ref().unwrap().as_str()))
329+
&& (p.kind.is_none() || p.kind.unwrap() == token.kind)
330+
}
331+
332+
/// returns a list of aliases for a string. primarily used for data types.
333+
fn aliases(text: &str) -> Vec<&str> {
334+
for alias in ALIASES {
335+
if alias.contains(&text) {
336+
return alias.to_vec();
337+
}
338+
}
339+
return vec![text];
312340
}
313341

314342
/// Custom iterator for walking ancestors of a node until the root of the tree is reached

crates/parser/src/parse/statement_start.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -172,7 +172,11 @@ pub static STATEMENT_START_TOKEN_MAPS: LazyLock<Vec<HashMap<SyntaxKind, Vec<Toke
172172
SyntaxToken::Optional(SyntaxKind::Temporary),
173173
SyntaxToken::Optional(SyntaxKind::Temp),
174174
SyntaxToken::Optional(SyntaxKind::Unlogged),
175+
SyntaxToken::Optional(SyntaxKind::IfP),
176+
SyntaxToken::Optional(SyntaxKind::Not),
177+
SyntaxToken::Optional(SyntaxKind::Exists),
175178
SyntaxToken::Required(SyntaxKind::Table),
179+
SyntaxToken::Required(SyntaxKind::Ident),
176180
],
177181
));
178182

0 commit comments

Comments
 (0)