Skip to content

Commit c081d3c

Browse files
committed
initial commit
1 parent f0bd45f commit c081d3c

File tree

4 files changed

+122
-0
lines changed

4 files changed

+122
-0
lines changed

.gitignore

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,3 +12,8 @@ Cargo.lock
1212

1313
# MSVC Windows builds of rustc generate these, which store debugging information
1414
*.pdb
15+
16+
17+
# Added by cargo
18+
19+
/target

Cargo.toml

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
[package]
2+
name = "postgres_lsp"
3+
version = "0.1.0"
4+
edition = "2021"
5+
6+
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
7+
8+
[dependencies]
9+
pg_query = "0.7"
10+
logos = "0.13.0"

src/example.sql

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
select * from contact where id = '123';
2+
3+
-- test comment
4+
5+
select wrong statement;
6+
7+
select id,username from contact
8+
9+
select id,name
10+
from contact -- test inline comment
11+
where id = '123';
12+

src/main.rs

Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,95 @@
1+
use logos::Logos;
2+
use std::fs;
3+
4+
#[derive(Logos, Debug, PartialEq)]
5+
#[logos(skip r"[ \t\f]+")] // Ignore this regex pattern between tokens
6+
enum Token {
7+
#[regex("[a-zA-Z0-9_]+[^;]*;"gm)]
8+
Expr,
9+
#[regex("\n+"gm)]
10+
Newline,
11+
#[regex("/\\*[^*]*\\*+(?:[^/*][^*]*\\*+)*/|--[^\n]*"g)]
12+
Comment,
13+
}
14+
15+
fn main() {
16+
let source = fs::read_to_string("./src/example.sql").unwrap();
17+
let mut lex = Token::lexer(&source);
18+
19+
println!("{:?}", source);
20+
21+
// https://github.com/domenicquirl/cstree
22+
// https://ericlippert.com/2012/06/08/red-green-trees/
23+
//
24+
// So, for example, to parse a struct definition the parser first "enters" the struct definition node, then parses the struct keyword and type name, then parses each field, and finally "finishes" parsing the struct node.
25+
//
26+
// 1. lexer: parse string into tokens. cstree will allow us to just move forward until next
27+
// statement. also, for comments, we should be able to store them separately since we are
28+
// just walking over the source code. tokens should be expr, whitespace, newlines, comments
29+
// and eof. does not work because lexer is "dumb". Token != SyntaxKind, so maybe we do not
30+
// need a real lexer.
31+
// 2. parser: parse tokens into cst with cstree. nodes are not typed, and we should be able to
32+
// use pg_query to parse string, and turn that into SyntaxKind tokens.
33+
//
34+
//
35+
// Notes:
36+
// - maybe we do not real a lexer to parse into statements. we can just use simple string
37+
// operations? or maybe lexer but with metadata on tokens because normally a token
38+
// translates into a constant which is not what we want. instead, we want a token Expr to
39+
// hold the expression string.
40+
41+
// problem: comments
42+
// general problem: declarative parsing by token will, based on initial research, not work well because we have tokens
43+
// within tokens (comment can be within a sql query)
44+
// let parser = any::<_, extra::Err<Simple<char>>>()
45+
// .and_is(just(';').not())
46+
// .repeated()
47+
// .collect::<String>()
48+
// .padded()
49+
// .separated_by(just(';'))
50+
// .collect::<Vec<String>>();
51+
//
52+
// let comment = just("--")
53+
// .then(
54+
// any::<_, extra::Err<Simple<char>>>()
55+
// .and_is(just('\n').not())
56+
// .repeated(),
57+
// )
58+
// .padded();
59+
//
60+
// let comments = comment.parse(source.as_str());
61+
// let result = parser.parse(source.as_str());
62+
//
63+
// println!("{:?}", source);
64+
// println!("{:?}", result);
65+
// println!("{:?}", comments);
66+
//
67+
// let pg_query_result = pg_query::parse("SELECT * FROM contacts").unwrap();
68+
//
69+
// println!("{:?}", pg_query_result.protobuf.nodes());
70+
}
71+
72+
#[test]
73+
fn test_lexer() {
74+
let input = "select * from contact where id = '123';\n\n-- test comment\n\nselect wrong statement;\n\nselect id,username from contact\n\nselect id,name\nfrom contact -- test inline comment\nwhere id = '123';\n\n";
75+
76+
let mut lex = Token::lexer(&input);
77+
78+
assert_eq!(lex.next(), Some(Ok(Token::Expr)));
79+
assert_eq!(lex.slice(), "select * from contact where id = '123';");
80+
81+
assert_eq!(lex.next(), Some(Ok(Token::Newline)));
82+
83+
assert_eq!(lex.next(), Some(Ok(Token::Comment)));
84+
assert_eq!(lex.slice(), "-- test comment");
85+
86+
assert_eq!(lex.next(), Some(Ok(Token::Newline)));
87+
88+
assert_eq!(lex.next(), Some(Ok(Token::Expr)));
89+
assert_eq!(lex.slice(), "select wrong statement;");
90+
91+
assert_eq!(lex.next(), Some(Ok(Token::Newline)));
92+
93+
assert_eq!(lex.next(), Some(Ok(Token::Expr)));
94+
assert_eq!(lex.slice(), "select id,username from contact\n\nselect id,name\nfrom contact -- test inline comment\nwhere id = '123';");
95+
}

0 commit comments

Comments
 (0)