Skip to content

Commit 0438601

Browse files
committed
fix: improve nested parsing and cleanup
1 parent c0d4900 commit 0438601

File tree

4 files changed

+51
-22
lines changed

4 files changed

+51
-22
lines changed

crates/parser/src/parser.rs

Lines changed: 39 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -7,13 +7,14 @@ use crate::syntax_error::SyntaxError;
77
use crate::syntax_kind::{SyntaxKind, SyntaxKindType};
88
use crate::syntax_node::SyntaxNode;
99

10-
#[derive(Default, Debug)]
10+
#[derive(Debug)]
1111
pub struct Parser {
1212
inner: GreenNodeBuilder<'static, 'static, SyntaxKind>,
1313
token_buffer: Vec<(SyntaxKind, String)>,
1414
curr_depth: i32,
1515
errors: Vec<SyntaxError>,
1616
stmts: Vec<RawStmt>,
17+
checkpoint: Option<i32>,
1718
is_parsing_erronous_node: bool,
1819
}
1920

@@ -26,34 +27,64 @@ pub struct Parse {
2627

2728
/// Main parser that controls the cst building process, and collects errors and statements
2829
impl Parser {
30+
pub fn new() -> Self {
31+
Self {
32+
curr_depth: -1,
33+
inner: GreenNodeBuilder::new(),
34+
token_buffer: Vec::new(),
35+
errors: Vec::new(),
36+
stmts: Vec::new(),
37+
checkpoint: None,
38+
is_parsing_erronous_node: false,
39+
}
40+
}
41+
2942
pub fn close_until_depth(&mut self, depth: i32) {
30-
while self.curr_depth >= depth && depth > 0 {
43+
while self.curr_depth >= depth {
3144
self.finish_node();
3245
self.curr_depth -= 1;
3346
}
3447
}
3548

49+
pub fn set_checkpoint(&mut self, is_parsing_erronous_node: bool) {
50+
assert!(self.checkpoint.is_none());
51+
assert!(self.token_buffer.is_empty());
52+
println!("set_checkpoint at {}", self.curr_depth);
53+
self.checkpoint = Some(self.curr_depth);
54+
self.is_parsing_erronous_node = is_parsing_erronous_node;
55+
}
56+
57+
pub fn close_checkpoint(&mut self) {
58+
self.consume_token_buffer();
59+
if self.checkpoint.is_some() {
60+
self.close_until_depth(self.checkpoint.unwrap());
61+
}
62+
self.checkpoint = None;
63+
self.is_parsing_erronous_node = false;
64+
}
65+
66+
pub fn start_node(&mut self, kind: SyntaxKind) {
67+
self.inner.start_node(kind);
68+
}
69+
3670
/// start a new node of `SyntaxKind` at `depth`
3771
/// handles closing previous nodes if necessary
3872
/// and consumes token buffer before starting new node
3973
///
4074
/// if `SyntaxKind` is `SyntaxKind::AnyStatement`, sets `is_parsing_erronous_node` to true
41-
pub fn start_node(&mut self, kind: SyntaxKind, depth: i32) {
75+
pub fn start_node_at(&mut self, kind: SyntaxKind, depth: Option<i32>) {
76+
let depth = depth.unwrap_or(self.curr_depth + 1);
4277
// close until target depth
4378
self.close_until_depth(depth);
4479

4580
self.consume_token_buffer();
4681

4782
self.curr_depth = depth;
48-
self.inner.start_node(kind);
49-
if kind == SyntaxKind::AnyStatement {
50-
self.is_parsing_erronous_node = true;
51-
}
83+
self.start_node(kind);
5284
}
5385

5486
pub fn finish_node(&mut self) {
5587
self.inner.finish_node();
56-
self.is_parsing_erronous_node = false;
5788
}
5889

5990
/// Drains the token buffer and applies all tokens

crates/parser/src/source_file.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ impl Parser {
2626
pub fn parse_source_file(&mut self, text: &str) {
2727
let mut lexer = SourceFileToken::lexer(text);
2828

29-
self.start_node(SyntaxKind::SourceFile, 0);
29+
self.start_node_at(SyntaxKind::SourceFile, Some(0));
3030
while let Some(token) = lexer.next() {
3131
match token {
3232
Ok(token) => {
@@ -92,7 +92,7 @@ select 1;
9292
9393
";
9494

95-
let mut parser = Parser::default();
95+
let mut parser = Parser::new();
9696
println!("input {:?}", input);
9797
parser.parse_source_file(input);
9898
let parsed = parser.finish();

crates/parser/src/statement.rs

Lines changed: 8 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -103,7 +103,6 @@ impl StatementToken {
103103

104104
impl Parser {
105105
pub fn parse_statement(&mut self, text: &str, at_offset: Option<u32>) {
106-
println!("#### parse_statement: {}", text);
107106
let offset = at_offset.unwrap_or(0);
108107
let range = TextRange::new(
109108
TextSize::from(offset),
@@ -117,7 +116,6 @@ impl Parser {
117116
Vec::new().into_iter().peekable()
118117
}
119118
};
120-
println!("pg_query_tokens: {:?}", pg_query_tokens);
121119

122120
let parsed = pg_query::parse(text);
123121
let proto;
@@ -139,18 +137,19 @@ impl Parser {
139137
Vec::new().into_iter().peekable()
140138
}
141139
};
142-
println!("pg_query_nodes: {:?}", pg_query_nodes);
143140

144141
let mut lexer = StatementToken::lexer(&text);
145142

146143
// parse root node if no syntax errors
147144
if pg_query_nodes.peek().is_some() {
148145
let (node, depth, _) = pg_query_nodes.next().unwrap();
149146
self.stmt(node.to_enum(), range);
150-
self.start_node(SyntaxKind::from_pg_query_node(&node), depth);
147+
self.start_node_at(SyntaxKind::from_pg_query_node(&node), Some(depth));
148+
self.set_checkpoint(false);
151149
} else {
152150
// fallback to generic node as root
153-
self.start_node(SyntaxKind::AnyStatement, 1);
151+
self.start_node_at(SyntaxKind::Stmt, None);
152+
self.set_checkpoint(true);
154153
}
155154

156155
while let Some(token) = lexer.next() {
@@ -166,7 +165,7 @@ impl Parser {
166165
} else {
167166
// node is within span
168167
let (node, depth, _) = pg_query_nodes.next().unwrap();
169-
self.start_node(SyntaxKind::from_pg_query_node(&node), depth);
168+
self.start_node_at(SyntaxKind::from_pg_query_node(&node), Some(depth));
170169
}
171170
}
172171

@@ -192,8 +191,7 @@ impl Parser {
192191
}
193192

194193
// close up nodes
195-
self.consume_token_buffer();
196-
self.close_until_depth(1);
194+
self.close_checkpoint();
197195
}
198196
}
199197

@@ -251,7 +249,7 @@ mod tests {
251249
fn test_statement_parser() {
252250
let input = "select *,some_col from contact where id = '123 4 5';";
253251

254-
let mut parser = Parser::default();
252+
let mut parser = Parser::new();
255253
parser.parse_statement(input, None);
256254
let parsed = parser.finish();
257255

@@ -264,7 +262,7 @@ mod tests {
264262
fn test_invalid_statement() {
265263
let input = "select select;";
266264

267-
let mut parser = Parser::default();
265+
let mut parser = Parser::new();
268266
parser.parse_statement(input, None);
269267
let parsed = parser.finish();
270268

crates/parser/src/syntax_kind.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,8 @@ pub enum SyntaxKind {
1212
Whitespace,
1313
Newline,
1414
Tab,
15-
Word, // common value for all keywords (select, from, ...)
16-
AnyStatement, // node for unknown statements (e.g. when parsing fails)
15+
Word, // common value for all keywords (select, from, ...)
16+
Stmt, // node for unknown statements (e.g. when parsing fails)
1717
// from here copyied from NodeEnum
1818
Alias,
1919
RangeVar,

0 commit comments

Comments
 (0)