From 5d8aa86abd718a5f5fc0d2660d114a305d06944b Mon Sep 17 00:00:00 2001 From: El-PowerPtr Date: Wed, 9 Jul 2025 15:20:59 -0400 Subject: [PATCH 01/11] feature: 'NOT' operator added --- parser/src/parser/ast.rs | 3 +++ parser/src/parser/lexer.rs | 4 ++++ parser/src/parser/sql/group_by.rs | 4 +--- 3 files changed, 8 insertions(+), 3 deletions(-) diff --git a/parser/src/parser/ast.rs b/parser/src/parser/ast.rs index 424d95c..5c21141 100644 --- a/parser/src/parser/ast.rs +++ b/parser/src/parser/ast.rs @@ -75,8 +75,11 @@ pub enum SyntaxKind { EQUAL, GT, LT, + AND, OR, + NOT, + GROUP, ORDER, BY, diff --git a/parser/src/parser/lexer.rs b/parser/src/parser/lexer.rs index 06a4751..0d4ca13 100644 --- a/parser/src/parser/lexer.rs +++ b/parser/src/parser/lexer.rs @@ -27,6 +27,9 @@ pub enum Token { #[regex("(?i)OR")] Or, + #[regex("(?i)NOT")] + Not, + #[regex("(?i)GROUP")] Group, @@ -106,6 +109,7 @@ impl Token { Token::GT => (SyntaxKind::GT, String::from(">")), Token::And => (SyntaxKind::AND, String::from("AND")), Token::Or => (SyntaxKind::OR, String::from("OR")), + Token::Not => (SyntaxKind::NOT, String::from("NOT")), Token::Group => (SyntaxKind::GROUP, String::from("GROUP")), Token::By => (SyntaxKind::BY, String::from("BY")), Token::Desc => (SyntaxKind::DESC, String::from("DESC")), diff --git a/parser/src/parser/sql/group_by.rs b/parser/src/parser/sql/group_by.rs index 4feab9e..9c78e05 100644 --- a/parser/src/parser/sql/group_by.rs +++ b/parser/src/parser/sql/group_by.rs @@ -1,6 +1,4 @@ use crate::parser::ast::SyntaxKind::*; use crate::parser::grammar::Grammar::{self, *}; -use crate::parser::grammar::GrammarType::*; -pub const GROUP_BY_GRAMMAR: Grammar = - Children(&[GROUP, BY], GROUP_BY, &[List(&[IDENTIFIER])]); +pub const GROUP_BY_GRAMMAR: Grammar = Children(&[GROUP, BY], GROUP_BY, &[List(&[IDENTIFIER])]); From 05432494e7242e77bc7395b3966bd37871c1203d Mon Sep 17 00:00:00 2001 From: El-PowerPtr Date: Fri, 18 Jul 2025 04:47:54 -0400 Subject: [PATCH 02/11] small changes in is_ddl and is_dql + some keywords added --- parser/src/parser/ast.rs | 23 ++++++++++++++++++----- parser/src/parser/lexer.rs | 8 ++++++++ 2 files changed, 26 insertions(+), 5 deletions(-) diff --git a/parser/src/parser/ast.rs b/parser/src/parser/ast.rs index 5c21141..b3a3170 100644 --- a/parser/src/parser/ast.rs +++ b/parser/src/parser/ast.rs @@ -59,6 +59,9 @@ pub enum SyntaxKind { GROUP_BY, CREATE, + ALTER, + DROP, + TRUNCATE, TABLE, @@ -72,14 +75,21 @@ pub enum SyntaxKind { PARENTHESES_END, VALUES, DEFINITION, + EQUAL, GT, LT, + LEQT, + GEQT, AND, OR, NOT, + TRUE, + FALSE, + BOOLEAN_OP, + GROUP, ORDER, BY, @@ -87,19 +97,22 @@ pub enum SyntaxKind { ASC, DISTINCT, - COMPARE, - GREATER, - LESS, + COMPARISON, EMPTY, ROOT, } impl SyntaxKind { pub fn is_dql(&self) -> bool { - (2..=3).contains(&(*self as u16)) + //(2..=3).contains(&(*self as u16)) + *self == SELECT } pub fn is_ddl(&self) -> bool { - (4..=4).contains(&(*self as u16)) + //(4..=4).contains(&(*self as u16)) + match *self { + CREATE | ALTER | DROP | TRUNCATE => true, + _ => false, + } } } diff --git a/parser/src/parser/lexer.rs b/parser/src/parser/lexer.rs index 0d4ca13..944308d 100644 --- a/parser/src/parser/lexer.rs +++ b/parser/src/parser/lexer.rs @@ -69,6 +69,12 @@ pub enum Token { #[token("=")] Equal, + #[token(">=")] + GEQT, + + #[token("<=")] + LEQT, + #[token(">")] GT, @@ -107,6 +113,8 @@ impl Token { Token::Where => (SyntaxKind::WHERE, String::from("WHERE")), Token::LT => (SyntaxKind::LT, String::from("<")), Token::GT => (SyntaxKind::GT, String::from(">")), + Token::LEQT => (SyntaxKind::LT, String::from("<=")), + Token::GEQT => (SyntaxKind::GT, String::from(">=")), Token::And => (SyntaxKind::AND, String::from("AND")), Token::Or => (SyntaxKind::OR, String::from("OR")), Token::Not => (SyntaxKind::NOT, String::from("NOT")), From 46934a00e1c98693f79764acad89f095d20b1eac Mon Sep 17 00:00:00 2001 From: El-PowerPtr Date: Wed, 30 Jul 2025 20:17:11 -0400 Subject: [PATCH 03/11] feat: chumsky and thiserror added as dependencies --- parser/Cargo.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/parser/Cargo.toml b/parser/Cargo.toml index 110049f..5475a2c 100644 --- a/parser/Cargo.toml +++ b/parser/Cargo.toml @@ -4,6 +4,7 @@ version = "0.1.0" edition = "2021" [dependencies] +chumsky = "0.10.1" logos = "0.15.0" rowan = "0.16.1" thiserror = "2.0.12" From 6613d25adcb08ea3d83c8d36278847dc8dba4486 Mon Sep 17 00:00:00 2001 From: El-PowerPtr Date: Wed, 30 Jul 2025 20:41:20 -0400 Subject: [PATCH 04/11] chore!: rowan removed from the project. Parser implementation needed. --- Cargo.lock | 244 +++++++++++++++++--- cli/Cargo.toml | 1 - cli/src/main.rs | 49 ++-- parser/Cargo.toml | 1 - parser/src/builder.rs | 23 -- parser/src/lib.rs | 3 - parser/src/parser/ast.rs | 225 ------------------ parser/src/parser/ast/ast_builder.rs | 17 ++ parser/src/parser/ast/ast_trait.rs | 14 ++ parser/src/parser/ast/errors.rs | 7 + parser/src/parser/ast/mod.rs | 5 + parser/src/parser/ast/nodes.rs | 80 +++++++ parser/src/parser/ast/syntax_kind.rs | 78 +++++++ parser/src/parser/grammar/dialect.rs | 8 + parser/src/parser/grammar/mod.rs | 60 +---- parser/src/parser/grammar/process.rs | 66 ------ parser/src/parser/grammar/utils/children.rs | 30 --- parser/src/parser/grammar/utils/combo.rs | 37 --- parser/src/parser/grammar/utils/expect.rs | 20 -- parser/src/parser/grammar/utils/gtype.rs | 16 -- parser/src/parser/grammar/utils/list.rs | 28 --- parser/src/parser/grammar/utils/loop.rs | 16 -- parser/src/parser/grammar/utils/mod.rs | 8 - parser/src/parser/grammar/utils/optional.rs | 11 - parser/src/parser/grammar/utils/template.rs | 51 ---- parser/src/parser/lexer.rs | 74 +++--- parser/src/parser/mod.rs | 2 +- parser/src/parser/parser/errors.rs | 35 +++ parser/src/parser/parser/mod.rs | 1 + parser/src/parser/sql/boolean.rs | 22 -- parser/src/parser/sql/from.rs | 4 - parser/src/parser/sql/group_by.rs | 4 - parser/src/parser/sql/mod.rs | 8 - parser/src/parser/sql/order_by.rs | 6 - parser/src/parser/sql/select.rs | 12 - parser/src/parser/sql/swhere.rs | 5 - 36 files changed, 525 insertions(+), 746 deletions(-) delete mode 100644 parser/src/builder.rs delete mode 100644 parser/src/parser/ast.rs create mode 100644 parser/src/parser/ast/ast_builder.rs create mode 100644 parser/src/parser/ast/ast_trait.rs create mode 100644 parser/src/parser/ast/errors.rs create mode 100644 parser/src/parser/ast/mod.rs create mode 100644 parser/src/parser/ast/nodes.rs create mode 100644 parser/src/parser/ast/syntax_kind.rs create mode 100644 parser/src/parser/grammar/dialect.rs delete mode 100644 parser/src/parser/grammar/process.rs delete mode 100644 parser/src/parser/grammar/utils/children.rs delete mode 100644 parser/src/parser/grammar/utils/combo.rs delete mode 100644 parser/src/parser/grammar/utils/expect.rs delete mode 100644 parser/src/parser/grammar/utils/gtype.rs delete mode 100644 parser/src/parser/grammar/utils/list.rs delete mode 100644 parser/src/parser/grammar/utils/loop.rs delete mode 100644 parser/src/parser/grammar/utils/mod.rs delete mode 100644 parser/src/parser/grammar/utils/optional.rs delete mode 100644 parser/src/parser/grammar/utils/template.rs create mode 100644 parser/src/parser/parser/errors.rs create mode 100644 parser/src/parser/parser/mod.rs delete mode 100644 parser/src/parser/sql/boolean.rs delete mode 100644 parser/src/parser/sql/from.rs delete mode 100644 parser/src/parser/sql/group_by.rs delete mode 100644 parser/src/parser/sql/mod.rs delete mode 100644 parser/src/parser/sql/order_by.rs delete mode 100644 parser/src/parser/sql/select.rs delete mode 100644 parser/src/parser/sql/swhere.rs diff --git a/Cargo.lock b/Cargo.lock index 1581196..47ee920 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2,11 +2,25 @@ # It is not intended for manual editing. version = 4 +[[package]] +name = "aho-corasick" +version = "1.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916" +dependencies = [ + "memchr", +] + +[[package]] +name = "allocator-api2" +version = "0.2.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "683d7910e743518b0e34f1186f92494becacb047c7b6bf616c96772180fef923" + [[package]] name = "atern" version = "0.1.0" dependencies = [ - "rowan", "sql_parser", ] @@ -17,10 +31,39 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3a8241f3ebb85c056b509d4327ad0358fbbba6ffb340bf388f26350aeda225b1" [[package]] -name = "countme" -version = "3.0.1" +name = "cc" +version = "1.2.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c3a42d84bb6b69d3a8b3eaacf0d88f179e1929695e1ad012b6cf64d9caaa5fd2" +dependencies = [ + "shlex", +] + +[[package]] +name = "cfg-if" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9555578bc9e57714c812a1f84e4fc5b4d21fcb063490c624de019f7464c91268" + +[[package]] +name = "chumsky" +version = "0.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "14377e276b2c8300513dff55ba4cc4142b44e5d6de6d00eb5b2307d650bb4ec1" +dependencies = [ + "hashbrown", + "regex-automata", + "serde", + "stacker", + "unicode-ident", + "unicode-segmentation", +] + +[[package]] +name = "equivalent" +version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7704b5fdd17b18ae31c4c1da5a2e0305a2bf17b5249300a9ee9ed7b72114c636" +checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f" [[package]] name = "fnv" @@ -28,11 +71,22 @@ version = "1.0.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" +[[package]] +name = "foldhash" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2" + [[package]] name = "hashbrown" -version = "0.14.5" +version = "0.15.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1" +checksum = "5971ac85611da7067dbfcabef3c70ebb5606018acd9e2a3903a0da507521e0d5" +dependencies = [ + "allocator-api2", + "equivalent", + "foldhash", +] [[package]] name = "lazy_static" @@ -40,6 +94,12 @@ version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" +[[package]] +name = "libc" +version = "0.2.174" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1171693293099992e19cddea4e8b849964e9846f4acee11b3948bcc337be8776" + [[package]] name = "logos" version = "0.15.0" @@ -60,7 +120,7 @@ dependencies = [ "lazy_static", "proc-macro2", "quote", - "regex-syntax", + "regex-syntax 0.8.5", "rustc_version", "syn", ] @@ -74,6 +134,12 @@ dependencies = [ "logos-codegen", ] +[[package]] +name = "memchr" +version = "2.7.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32a282da65faaf38286cf3be983213fcf1d2e2a58700e808f83f4ea9a4804bc0" + [[package]] name = "proc-macro2" version = "1.0.94" @@ -83,6 +149,15 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "psm" +version = "0.1.26" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6e944464ec8536cd1beb0bbfd96987eb5e3b72f2ecdafdc5c769a37f1fa2ae1f" +dependencies = [ + "cc", +] + [[package]] name = "quote" version = "1.0.40" @@ -93,28 +168,27 @@ dependencies = [ ] [[package]] -name = "regex-syntax" -version = "0.8.5" +name = "regex-automata" +version = "0.3.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c" +checksum = "59b23e92ee4318893fa3fe3e6fb365258efbfe6ac6ab30f090cdcbb7aa37efa9" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax 0.7.5", +] [[package]] -name = "rowan" -version = "0.16.1" +name = "regex-syntax" +version = "0.7.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "417a3a9f582e349834051b8a10c8d71ca88da4211e4093528e36b9845f6b5f21" -dependencies = [ - "countme", - "hashbrown", - "rustc-hash", - "text-size", -] +checksum = "dbb5fb1acd8a1a18b3dd5be62d25485eb770e05afb408a9627d14d451bae12da" [[package]] -name = "rustc-hash" -version = "1.1.0" +name = "regex-syntax" +version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2" +checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c" [[package]] name = "rustc_version" @@ -131,15 +205,54 @@ version = "1.0.26" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "56e6fa9c48d24d85fb3de5ad847117517440f6beceb7798af16b4a87d616b8d0" +[[package]] +name = "serde" +version = "1.0.219" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5f0e2c6ed6606019b4e29e69dbaba95b11854410e5347d525002456dbbb786b6" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.219" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b0276cf7f2c73365f7157c8123c21cd9a50fbbd844757af28ca1f5925fc2a00" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "shlex" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" + [[package]] name = "sql_parser" version = "0.1.0" dependencies = [ + "chumsky", "logos", - "rowan", "thiserror", ] +[[package]] +name = "stacker" +version = "0.1.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cddb07e32ddb770749da91081d8d0ac3a16f1a569a18b20348cd371f5dead06b" +dependencies = [ + "cc", + "cfg-if", + "libc", + "psm", + "windows-sys", +] + [[package]] name = "syn" version = "2.0.100" @@ -151,12 +264,6 @@ dependencies = [ "unicode-ident", ] -[[package]] -name = "text-size" -version = "1.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f18aa187839b2bdb1ad2fa35ead8c4c2976b64e4363c386d45ac0f7ee85c9233" - [[package]] name = "thiserror" version = "2.0.12" @@ -182,3 +289,82 @@ name = "unicode-ident" version = "1.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5a5f39404a5da50712a4c1eecf25e90dd62b613502b7e925fd4e4d19b5c96512" + +[[package]] +name = "unicode-segmentation" +version = "1.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f6ccf251212114b54433ec949fd6a7841275f9ada20dddd2f29e9ceea4501493" + +[[package]] +name = "windows-sys" +version = "0.59.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b" +dependencies = [ + "windows-targets", +] + +[[package]] +name = "windows-targets" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" +dependencies = [ + "windows_aarch64_gnullvm", + "windows_aarch64_msvc", + "windows_i686_gnu", + "windows_i686_gnullvm", + "windows_i686_msvc", + "windows_x86_64_gnu", + "windows_x86_64_gnullvm", + "windows_x86_64_msvc", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" + +[[package]] +name = "windows_i686_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" + +[[package]] +name = "windows_i686_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" + +[[package]] +name = "windows_i686_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" diff --git a/cli/Cargo.toml b/cli/Cargo.toml index c4a93b8..e6ae975 100644 --- a/cli/Cargo.toml +++ b/cli/Cargo.toml @@ -4,5 +4,4 @@ version = "0.1.0" edition = "2021" [dependencies] -rowan = "0.16.1" sql_parser = { path = "../parser" } diff --git a/cli/src/main.rs b/cli/src/main.rs index b1f2366..4f7a68e 100644 --- a/cli/src/main.rs +++ b/cli/src/main.rs @@ -1,26 +1,27 @@ -use rowan::NodeOrToken; -use sql_parser::parser::ast::{ParserError, SyntaxElement, SyntaxKind}; -use sql_parser::SqlBuilder; +// fn print(indent: usize, element: SyntaxElement) { +// let kind: SyntaxKind = element.kind(); +// print!("{:indent$}", "", indent = indent); +// match element { +// NodeOrToken::Node(node) => { +// println!("- {:?}", kind); +// for child in node.children_with_tokens() { +// print(indent + 2, child); +// } +// }, +// +// NodeOrToken::Token(token) => println!("- {:?} {:?}", token.text(), kind), +// } +// } +// +// fn main() -> Result<(), ParserError> { +// let ast = +// SqlBuilder::from("SELECT DISTINCT FROM Users WHERE Country='USA' ORDER BY age DESC".to_string()).build()?; +// print(0, ast.into()); +// +// Ok(()) +// } +// -fn print(indent: usize, element: SyntaxElement) { - let kind: SyntaxKind = element.kind(); - print!("{:indent$}", "", indent = indent); - match element { - NodeOrToken::Node(node) => { - println!("- {:?}", kind); - for child in node.children_with_tokens() { - print(indent + 2, child); - } - }, - - NodeOrToken::Token(token) => println!("- {:?} {:?}", token.text(), kind), - } -} - -fn main() -> Result<(), ParserError> { - let ast = - SqlBuilder::from("SELECT DISTINCT FROM Users WHERE Country='USA' ORDER BY age DESC".to_string()).build()?; - print(0, ast.into()); - - Ok(()) +fn main() { + println!("Parser has to be implemented!") } diff --git a/parser/Cargo.toml b/parser/Cargo.toml index 5475a2c..e674e57 100644 --- a/parser/Cargo.toml +++ b/parser/Cargo.toml @@ -6,5 +6,4 @@ edition = "2021" [dependencies] chumsky = "0.10.1" logos = "0.15.0" -rowan = "0.16.1" thiserror = "2.0.12" diff --git a/parser/src/builder.rs b/parser/src/builder.rs deleted file mode 100644 index a106f6d..0000000 --- a/parser/src/builder.rs +++ /dev/null @@ -1,23 +0,0 @@ -/// ! Shortcut to parse sql code -use logos::Logos; - -use super::parser::ast::{Parser, ParserError, SyntaxNode}; -use super::parser::lexer::Token; - -/// This structure is an abstraction of using the lexer and passing that value -/// to the parser. Receives a String and returns a node of the ast -pub struct SqlBuilder { - code: String, -} - -impl SqlBuilder { - pub fn build(self) -> Result { - Ok(Parser::from_tokens(&mut Token::lexer(&self.code))?.parse()?) - } -} - -impl From for SqlBuilder { - fn from(code: String) -> Self { - Self { code } - } -} diff --git a/parser/src/lib.rs b/parser/src/lib.rs index cd295a9..67c567f 100644 --- a/parser/src/lib.rs +++ b/parser/src/lib.rs @@ -1,4 +1 @@ -mod builder; pub mod parser; - -pub use builder::SqlBuilder; diff --git a/parser/src/parser/ast.rs b/parser/src/parser/ast.rs deleted file mode 100644 index b3a3170..0000000 --- a/parser/src/parser/ast.rs +++ /dev/null @@ -1,225 +0,0 @@ -// Select -// All - * -// From -// Identifier - Table -use std::iter::Peekable; - -use logos::Lexer; -use rowan::{GreenNodeBuilder, NodeOrToken}; -use thiserror::Error; - -use super::grammar::process_grammar; -use super::lexer::Token; -use super::sql::select::SELECT_GRAMMAR; - -/// Possible errors at the time of generating the ast -#[derive(Error, Debug)] -pub enum ParserError { - // TODO: this error should be in LexerError and not in ParserError - #[error("Invalid Token {0}")] - InvalidToken(String), - - /// This error is triggered when a trailing comma is left at the time of - /// enumeration, e.g. 1, 2, - #[error("Trailing Comma is not allowed")] - TrailingComma, - - /// This error is triggered when it expects a specific node and receives a - /// node of another type. - #[error("Expected {0} found {1}")] - ExpectedType(SyntaxKind, SyntaxKind), - - /// This error is triggered when the node does not match with the expected - /// by the context - #[error("Unexpected Node {0}")] - UnexpectedNode(SyntaxKind), - - /// This error is triggered when the definition of the context is incomplete - /// e.g.: SELECT; (without passing any body) - #[error("Expected Body for {0}")] - ExpectedBodyFor(SyntaxKind), - - /// this error appears when the content is finished but the current grammar - /// rule needs more content to complete. - #[error("Unexpected EOF")] - UnexpectedEof, -} - -/// Nodes that the ast can have. -#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] -#[allow(non_camel_case_types)] -#[repr(u16)] -pub enum SyntaxKind { - WHITESPACE = 0, - - SELECT, - FROM, - WHERE, - ORDER_BY, - GROUP_BY, - - CREATE, - ALTER, - DROP, - TRUNCATE, - - TABLE, - - IDENTIFIER, - TEXT, - ALL, - COMMA, - NUMBER, - SEMICOLON, - PARENTHESES_START, - PARENTHESES_END, - VALUES, - DEFINITION, - - EQUAL, - GT, - LT, - LEQT, - GEQT, - - AND, - OR, - NOT, - - TRUE, - FALSE, - BOOLEAN_OP, - - GROUP, - ORDER, - BY, - DESC, - ASC, - DISTINCT, - - COMPARISON, - EMPTY, - ROOT, -} - -impl SyntaxKind { - pub fn is_dql(&self) -> bool { - //(2..=3).contains(&(*self as u16)) - *self == SELECT - } - pub fn is_ddl(&self) -> bool { - //(4..=4).contains(&(*self as u16)) - match *self { - CREATE | ALTER | DROP | TRUNCATE => true, - _ => false, - } - } -} - -impl std::fmt::Display for SyntaxKind { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "{:?}", self) - } -} - -use SyntaxKind::*; - -impl From for rowan::SyntaxKind { - fn from(kind: SyntaxKind) -> Self { - Self(kind as u16) - } -} - -#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] -pub enum Lang {} -impl rowan::Language for Lang { - type Kind = SyntaxKind; - fn kind_from_raw(raw: rowan::SyntaxKind) -> Self::Kind { - assert!(raw.0 <= ROOT as u16); - unsafe { std::mem::transmute::(raw.0) } - } - fn kind_to_raw(kind: Self::Kind) -> rowan::SyntaxKind { - kind.into() - } -} - -pub type SyntaxNode = rowan::SyntaxNode; -pub type SyntaxToken = rowan::SyntaxToken; -pub type SyntaxElement = NodeOrToken; - -/// This struct symbolizes the parser, receives a Peekable that is used to -/// iterate over the nodes and has a GreenNodeBuilder that symbolizes the ast -pub struct Parser { - pub builder: GreenNodeBuilder<'static>, - pub iter: Peekable>, -} - -impl Parser { - pub fn peek(&mut self) -> Option { - while self.iter.peek().map(|&(t, _)| t == WHITESPACE).unwrap_or(false) { - self.bump(); - } - self.iter.peek().map(|&(t, _)| t) - } - pub fn peek_with_content(&mut self) -> Option<(SyntaxKind, &String)> { - while self.iter.peek().map(|&(t, _)| t == WHITESPACE).unwrap_or(false) { - self.bump(); - } - self.iter.peek().map(|&(t, ref c)| (t, c)) - } - pub fn next(&mut self) { - self.iter.next(); - } - pub fn bump(&mut self) { - if let Some((token, string)) = self.iter.next() { - self.builder.token(token.into(), string.as_str()); - } - } - fn handle_val(&mut self) -> Result<(), ParserError> { - match self.peek().unwrap() { - SELECT => { - process_grammar(self, SELECT, SELECT_GRAMMAR)?; - }, - CREATE => { - process_grammar(self, CREATE, &[])?; - }, - SEMICOLON => { - self.next(); - }, - n => return Err(ParserError::UnexpectedNode(n)), - } - - Ok(()) - } - /// Parses the entire contents of the iter and returns an ast - pub fn parse(mut self) -> Result { - self.builder.start_node(ROOT.into()); - - while let Some(_) = self.peek() { - self.handle_val()?; - } - - self.builder.finish_node(); - - Ok(SyntaxNode::new_root(self.builder.finish())) - } - /// Receives a lexer and iterates its tokens (making sure they are not an - /// Err) and returns a Parser. - pub fn from_tokens(lex: &mut Lexer<'_, Token>) -> Result { - let mut nodes = Vec::new(); - - while let Some(token) = lex.next() { - match token { - Ok(t) => { - nodes.push(t.to_syntax()); - }, - Err(_) => return Err(ParserError::InvalidToken(lex.slice().to_string())), - } - } - - Ok(Parser { - builder: GreenNodeBuilder::new(), - iter: nodes.into_iter().peekable(), - }) - } -} diff --git a/parser/src/parser/ast/ast_builder.rs b/parser/src/parser/ast/ast_builder.rs new file mode 100644 index 0000000..bc3f961 --- /dev/null +++ b/parser/src/parser/ast/ast_builder.rs @@ -0,0 +1,17 @@ +use super::ast_trait::Ast; +use super::errors::AstErr; +use super::nodes::TreeNode; +use crate::parser::grammar::dialect::Dialect; + +trait AstBuilder +where + D: Dialect, + T: Ast, +{ + fn add(&self, node: TreeNode) -> Result<(), AstErr>; + fn tree(&self) -> &T; + fn build(&self) -> T; + fn checkpoint(&self) -> Option<&TreeNode>; + fn new() -> Self; + fn back(&self) -> Option<&TreeNode>; +} diff --git a/parser/src/parser/ast/ast_trait.rs b/parser/src/parser/ast/ast_trait.rs new file mode 100644 index 0000000..1bc6d18 --- /dev/null +++ b/parser/src/parser/ast/ast_trait.rs @@ -0,0 +1,14 @@ +use super::errors::AstErr; +use super::nodes::TreeNode; +use crate::parser::grammar::dialect::Dialect; + +pub trait Ast +where + D: Dialect, +{ + fn root(&self) -> Option>>; + fn take_root(&mut self, node: TreeNode) -> Result<(), AstErr>; + fn diff(&self, other: &Self) -> Vec>; + fn from_node(seed: TreeNode) -> Self; + fn new() -> Self; +} diff --git a/parser/src/parser/ast/errors.rs b/parser/src/parser/ast/errors.rs new file mode 100644 index 0000000..2ceaad4 --- /dev/null +++ b/parser/src/parser/ast/errors.rs @@ -0,0 +1,7 @@ +use thiserror::Error; + +#[derive(Error, Debug)] +pub enum AstErr { + #[error("Terminal node: {0} cannot have children")] + TerminalNodeChildAdition(String), +} diff --git a/parser/src/parser/ast/mod.rs b/parser/src/parser/ast/mod.rs new file mode 100644 index 0000000..6a7e139 --- /dev/null +++ b/parser/src/parser/ast/mod.rs @@ -0,0 +1,5 @@ +pub mod ast_builder; +pub mod ast_trait; +pub mod errors; +pub mod nodes; +pub mod syntax_kind; diff --git a/parser/src/parser/ast/nodes.rs b/parser/src/parser/ast/nodes.rs new file mode 100644 index 0000000..118c1bb --- /dev/null +++ b/parser/src/parser/ast/nodes.rs @@ -0,0 +1,80 @@ +use std::cell::RefCell; +use std::fmt::Display; +use std::marker::PhantomData; +use std::rc::Rc; +use std::rc::Weak; + +use super::ast_trait::Ast; +use super::errors::AstErr; +use super::syntax_kind::SyntaxKind; +use crate::parser::grammar::dialect::Dialect; + +///Represents the nodes of our AST +///in the simplest and generic way. +///The Dialect trait will deal with the grammar +///and compatibility between Nodes + +#[derive(Debug)] +pub enum TreeNode { + Terminal { + //😭 + parent: RefCell>>>, + kind: SyntaxKind, + text: String, + _pd: PhantomData, + }, + NonTerminal { + //😭 + parent: RefCell>>>, + kind: SyntaxKind, + //😭 + children: Vec>>>, + _pd: PhantomData, + }, +} + +impl TreeNode { + pub fn get_kind(&self) -> SyntaxKind { + match self { + TreeNode::Terminal { ref kind, .. } => kind.clone(), + TreeNode::NonTerminal { ref kind, .. } => kind.clone(), + } + } + + pub fn to_tree(self) -> A + where + A: Ast, + { + A::from_node(self) + } + + pub(super) fn add(&mut self, child: Self) -> Result<(), AstErr> { + D::are_compatible(&child, self)?; + match self { + TreeNode::Terminal { .. } => Err(AstErr::TerminalNodeChildAdition(self.to_string())), + TreeNode::NonTerminal { ref mut children, .. } => { + children.push(RefCell::new(Rc::new(child))); + Ok(()) + }, + } + } +} + +impl Display for TreeNode { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self::Terminal { ref kind, ref text, .. } => write!(f, "{:?} - {}", kind, text), + Self::NonTerminal { kind, children, .. } => { + write!( + f, + "{:?} - {:?} ", + kind, + children + .iter() + .map(|child| (**child.borrow()).get_kind()) + .collect::>() + ) + }, + } + } +} diff --git a/parser/src/parser/ast/syntax_kind.rs b/parser/src/parser/ast/syntax_kind.rs new file mode 100644 index 0000000..2f9e775 --- /dev/null +++ b/parser/src/parser/ast/syntax_kind.rs @@ -0,0 +1,78 @@ +/// Nodes that the ast can have. +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] +#[allow(non_camel_case_types)] +#[repr(u16)] +pub enum SyntaxKind { + WHITESPACE = 0, + + SELECT, + FROM, + WHERE, + ORDER_BY, + GROUP_BY, + + CREATE, + ALTER, + DROP, + TRUNCATE, + + TABLE, + + IDENTIFIER, + TEXT, + ALL, + COMMA, + NUMBER, + SEMICOLON, + PARENTHESES_START, + PARENTHESES_END, + VALUES, + DEFINITION, + + EQUAL, + GT, + LT, + LEQT, + GEQT, + + AND, + OR, + NOT, + + TRUE, + FALSE, + BOOLEAN_OP, + + GROUP, + ORDER, + BY, + DESC, + ASC, + DISTINCT, + + COMPARISON, + EMPTY, + ROOT, +} + +use SyntaxKind::*; + +impl SyntaxKind { + pub fn is_dql(&self) -> bool { + //(2..=3).contains(&(*self as u16)) + *self == SyntaxKind::SELECT + } + pub fn is_ddl(&self) -> bool { + //(4..=4).contains(&(*self as u16)) + match *self { + CREATE | ALTER | DROP | TRUNCATE => true, + _ => false, + } + } +} + +impl std::fmt::Display for SyntaxKind { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{:?}", self) + } +} diff --git a/parser/src/parser/grammar/dialect.rs b/parser/src/parser/grammar/dialect.rs new file mode 100644 index 0000000..86e7c0a --- /dev/null +++ b/parser/src/parser/grammar/dialect.rs @@ -0,0 +1,8 @@ +use crate::parser::ast::errors::AstErr; +use crate::parser::ast::nodes::TreeNode; + +///This trait deals with the grammar of the SQL dialect we want to parse. +///We proly need to add more methods in the future. +pub trait Dialect: Sized { + fn are_compatible(child: &TreeNode, parent: &TreeNode) -> Result<(), AstErr>; +} diff --git a/parser/src/parser/grammar/mod.rs b/parser/src/parser/grammar/mod.rs index 5d546c1..4dfc869 100644 --- a/parser/src/parser/grammar/mod.rs +++ b/parser/src/parser/grammar/mod.rs @@ -1,59 +1 @@ -mod process; -mod utils; - -pub use process::process_grammar; -pub use utils::template::TemplateConfig; - -use super::ast::SyntaxKind; - -/// Possible grammar rules to be parsed -pub enum Grammar { - /// Takes a list of nodes that can be in a list, does not support trailing - /// commas, e.g. List(&[IDENTIFIER, TEXT]), symbolizes a list that can - /// contain identifiers or text - List(&'static [SyntaxKind]), // separated by comma - /// It expects a GrammarType and if it is satisfied it wraps everything in - /// that type, besides executing more grammar rules. - Children(&'static [SyntaxKind], SyntaxKind, &'static [Grammar]), - /// It takes several rules, if a rule gives error it passes with the next - /// one, if it does not give error it applies it and finishes its execution - /// if the first parameter is true at least one rule has to be fulfilled, if - /// it is false all of them can fail. - Combo(bool, &'static [Grammar]), - /// It takes a node and verifies that the current node of the parser matches - /// the node passed in the rule definition, the next parameter is a boolean - /// that indicates if you want to consume that parameter (add it to the ast) - /// or not. - Expect(SyntaxKind, bool), - /// It takes a template and will compare that template with the current - /// definition. - Template(&'static [GrammarType], TemplateConfig), - /// Execute a rule several times until a specific node at which it will stop - /// executing that rule (e.g. a semicolon). - Loop(&'static Grammar, SyntaxKind), - - Optional(&'static Grammar), - - GType(GrammarType), -} - -#[derive(Debug, Clone)] -/// It represents a more flexible way of classifying nodes. -pub enum GrammarType { - /// checks if the node belongs to the Data Query Language group - Dql, - /// Checks if a node is equal to the node that was passed in the definition. - Type(SyntaxKind), - /// Checks if the node is in the defined node list. - Multi(&'static [SyntaxKind]), -} - -impl PartialEq for GrammarType { - fn eq(&self, other: &SyntaxKind) -> bool { - match self { - Self::Dql => other.is_ddl(), - Self::Type(t) => t == other, - Self::Multi(l) => l.contains(other), - } - } -} +pub mod dialect; diff --git a/parser/src/parser/grammar/process.rs b/parser/src/parser/grammar/process.rs deleted file mode 100644 index 364ca85..0000000 --- a/parser/src/parser/grammar/process.rs +++ /dev/null @@ -1,66 +0,0 @@ -use super::utils::children::process_children; -use super::utils::combo::process_combo; -use super::utils::expect::process_expect; -use super::utils::gtype::process_gtype; -use super::utils::list::process_list; -use super::utils::optional::process_optional; -use super::utils::r#loop::process_loop; -use super::utils::template::process_template; -use super::Grammar; -use crate::parser::ast::{Parser, ParserError, SyntaxKind}; - -/// Takes an instance of the Parser, a parent node, and a list with definitions -/// of the grammar to parse, returns nothing but modifies the parser. -pub fn process_grammar( - parser: &mut Parser, - father: SyntaxKind, - grammar: &[Grammar], -) -> Result<(), ParserError> { - parser.builder.start_node_at(parser.builder.checkpoint(), father.into()); - parser.next(); - - for rule in grammar { - process_rule(&rule, father, parser)?; - } - - parser.builder.finish_node(); - - Ok(()) -} - -/// Takes a rule to be processed and executes the function that does the -/// corresponding function depending on the definition of that rule. -pub fn process_rule( - rule: &Grammar, - father: SyntaxKind, - parser: &mut Parser, -) -> Result<(), ParserError> { - match rule { - Grammar::List(t) => { - process_list(t, parser)?; - }, - Grammar::Loop(child, stop) => { - process_loop(child, *stop, father, parser)?; - }, - Grammar::Combo(optional, children) => { - process_combo(*optional, children, father, parser)?; - }, - Grammar::Children(start, node_father, body) => { - process_children(start, *node_father, body, father, parser)?; - }, - Grammar::Expect(token, consume) => { - process_expect(*token, *consume, parser)?; - }, - Grammar::Template(template, config) => { - process_template(template, config, parser)?; - }, - Grammar::Optional(rule) => { - process_optional(rule, father, parser); - }, - Grammar::GType(t) => { - process_gtype(&t, parser)?; - }, - } - - Ok(()) -} diff --git a/parser/src/parser/grammar/utils/children.rs b/parser/src/parser/grammar/utils/children.rs deleted file mode 100644 index e4fdeed..0000000 --- a/parser/src/parser/grammar/utils/children.rs +++ /dev/null @@ -1,30 +0,0 @@ -use crate::parser::ast::SyntaxKind::{self, *}; -use crate::parser::ast::{Parser, ParserError}; -use crate::parser::grammar::process::process_rule; -use crate::parser::grammar::Grammar; - -pub fn process_children( - start: &[SyntaxKind], - node_father: SyntaxKind, - body: &[Grammar], - father: SyntaxKind, - parser: &mut Parser, -) -> Result<(), ParserError> { - for rule in start { - if parser.peek() != Some(*rule) { - return Err(ParserError::UnexpectedNode(parser.peek().unwrap_or(EMPTY))); - } - - parser.next(); - } - - parser.builder.start_node_at(parser.builder.checkpoint(), node_father.into()); - - for rule in body { - process_rule(&rule, father, parser)?; - } - - parser.builder.finish_node(); - - Ok(()) -} diff --git a/parser/src/parser/grammar/utils/combo.rs b/parser/src/parser/grammar/utils/combo.rs deleted file mode 100644 index de054da..0000000 --- a/parser/src/parser/grammar/utils/combo.rs +++ /dev/null @@ -1,37 +0,0 @@ -use crate::parser::ast::{Parser, ParserError, SyntaxKind}; -use crate::parser::grammar::process::process_rule; -use crate::parser::grammar::Grammar; - -pub fn process_combo( - optional: bool, - children: &[Grammar], - father: SyntaxKind, - parser: &mut Parser, -) -> Result<(), ParserError> { - let mut good = true; - let iter = parser.iter.clone(); - - for child in children { - match process_rule(&child, father, parser) { - Ok(_) => { - good = true; - break; - }, - Err(_) => { - good = false; - // restore iter - parser.iter = iter.clone(); - }, - }; - } - - if !good && !optional { - return Err(ParserError::ExpectedBodyFor(father)); - } - - if !good { - parser.next(); - } - - Ok(()) -} diff --git a/parser/src/parser/grammar/utils/expect.rs b/parser/src/parser/grammar/utils/expect.rs deleted file mode 100644 index 0d5f905..0000000 --- a/parser/src/parser/grammar/utils/expect.rs +++ /dev/null @@ -1,20 +0,0 @@ -use crate::parser::ast::SyntaxKind::{self, *}; -use crate::parser::ast::{Parser, ParserError}; - -pub fn process_expect( - token: SyntaxKind, - consume: bool, - parser: &mut Parser, -) -> Result<(), ParserError> { - if parser.peek() != Some(token) { - return Err(ParserError::ExpectedType(token, parser.peek().unwrap_or(EMPTY))); - } - - if consume { - parser.bump(); - } else { - parser.next(); - } - - Ok(()) -} diff --git a/parser/src/parser/grammar/utils/gtype.rs b/parser/src/parser/grammar/utils/gtype.rs deleted file mode 100644 index 5c30d7b..0000000 --- a/parser/src/parser/grammar/utils/gtype.rs +++ /dev/null @@ -1,16 +0,0 @@ -use crate::parser::ast::{Parser, ParserError}; -use crate::parser::grammar::GrammarType; - -pub fn process_gtype(gtype: &GrammarType, parser: &mut Parser) -> Result<(), ParserError> { - if parser.peek().is_none() { - return Err(ParserError::UnexpectedEof); - } - - if gtype != &parser.peek().unwrap() { - return Err(ParserError::UnexpectedNode(parser.peek().unwrap())); - } - - parser.bump(); - - Ok(()) -} diff --git a/parser/src/parser/grammar/utils/list.rs b/parser/src/parser/grammar/utils/list.rs deleted file mode 100644 index cf4c8b5..0000000 --- a/parser/src/parser/grammar/utils/list.rs +++ /dev/null @@ -1,28 +0,0 @@ -use crate::parser::ast::SyntaxKind::{self, *}; -use crate::parser::ast::{Parser, ParserError}; - -pub fn process_list(t: &[SyntaxKind], parser: &mut Parser) -> Result<(), ParserError> { - if parser.peek() == None { - return Err(ParserError::UnexpectedEof); - } - - while let Some(token) = parser.peek() { - if token == COMMA { - parser.next(); - - if !t.contains(&parser.peek().unwrap_or(EMPTY)) { - return Err(ParserError::TrailingComma); - } - - continue; - } - - if !t.contains(&token) { - break; - } - - parser.bump(); - } - - Ok(()) -} diff --git a/parser/src/parser/grammar/utils/loop.rs b/parser/src/parser/grammar/utils/loop.rs deleted file mode 100644 index 7e7e895..0000000 --- a/parser/src/parser/grammar/utils/loop.rs +++ /dev/null @@ -1,16 +0,0 @@ -use crate::parser::ast::{Parser, ParserError, SyntaxKind}; -use crate::parser::grammar::process::process_rule; -use crate::parser::grammar::Grammar; - -pub fn process_loop( - child: &Grammar, - stop: SyntaxKind, - father: SyntaxKind, - parser: &mut Parser, -) -> Result<(), ParserError> { - while parser.peek() != Some(stop) && parser.peek() != None { - process_rule(&child, father, parser)?; - } - - Ok(()) -} diff --git a/parser/src/parser/grammar/utils/mod.rs b/parser/src/parser/grammar/utils/mod.rs deleted file mode 100644 index f7d9ee9..0000000 --- a/parser/src/parser/grammar/utils/mod.rs +++ /dev/null @@ -1,8 +0,0 @@ -pub mod children; -pub mod combo; -pub mod expect; -pub mod gtype; -pub mod list; -pub mod r#loop; -pub mod optional; -pub mod template; diff --git a/parser/src/parser/grammar/utils/optional.rs b/parser/src/parser/grammar/utils/optional.rs deleted file mode 100644 index dfbc49f..0000000 --- a/parser/src/parser/grammar/utils/optional.rs +++ /dev/null @@ -1,11 +0,0 @@ -use crate::parser::ast::{Parser, SyntaxKind}; -use crate::parser::grammar::process::process_rule; -use crate::parser::grammar::Grammar; - -pub fn process_optional(rule: &Grammar, father: SyntaxKind, parser: &mut Parser) { - let iter = parser.iter.clone(); - - process_rule(rule, father, parser).unwrap_or_else(|_| { - parser.iter = iter; - }); -} diff --git a/parser/src/parser/grammar/utils/template.rs b/parser/src/parser/grammar/utils/template.rs deleted file mode 100644 index 7e7abe6..0000000 --- a/parser/src/parser/grammar/utils/template.rs +++ /dev/null @@ -1,51 +0,0 @@ -use crate::parser::ast::SyntaxKind::{self, *}; -use crate::parser::ast::{Parser, ParserError}; -use crate::parser::grammar::GrammarType; - -#[derive(Debug)] -/// Configuration for the Template grammar rule -pub struct TemplateConfig { - /// It is the data type that will wrap everything in the template. - pub father: SyntaxKind, - /// Indicates a node to be ignored (almost always a derivative of the parent - /// node). - pub ignore: SyntaxKind, -} - -pub fn process_template( - template: &[GrammarType], - config: &TemplateConfig, - parser: &mut Parser, -) -> Result<(), ParserError> { - let tokens = { - let mut tokens = Vec::new(); - for rule in template { - if parser.peek() == Some(config.ignore) { - parser.next(); - continue; - } - - if *rule == parser.peek().unwrap_or(EMPTY) { - let peek = parser.peek_with_content().unwrap(); - tokens.push((peek.0, peek.1.clone())); - parser.next(); - continue; - } - - return Err(ParserError::UnexpectedNode(parser.peek().unwrap_or(EMPTY))); - } - tokens - }; - - { - parser.builder.start_node_at(parser.builder.checkpoint(), config.father.into()); - - for t in tokens.iter() { - parser.builder.token(t.0.into(), &t.1); - } - - parser.builder.finish_node(); - } - - Ok(()) -} diff --git a/parser/src/parser/lexer.rs b/parser/src/parser/lexer.rs index 944308d..6ad6f09 100644 --- a/parser/src/parser/lexer.rs +++ b/parser/src/parser/lexer.rs @@ -1,7 +1,5 @@ use logos::Logos; -use super::ast::SyntaxKind; - /// Possible tokens when parsing the Sql code #[derive(Logos, Debug)] #[logos(skip r"[ \t\n\f]+")] @@ -91,39 +89,43 @@ pub enum Token { ParenthesesEnd, } +///to_syntax method won't b used in the parser because some tokens +///depend of the context for having a certain syntax kind. The method +///its commented for a posible boilerplate reutilization for methods +///that use this enum impl Token { - #[inline] - /// Function used to take a token and its content (if any) and convert it - /// into a SyntaxKind (enum used by the Parser). - pub(crate) fn to_syntax(&self) -> (SyntaxKind, String) { - match &self { - Token::All => (SyntaxKind::ALL, String::from("*")), - Token::Select => (SyntaxKind::SELECT, String::from("SELECT")), - Token::Identifier(i) => (SyntaxKind::IDENTIFIER, String::from(i)), - Token::From => (SyntaxKind::FROM, String::from("FROM")), - Token::Text(t) => (SyntaxKind::TEXT, String::from(t)), - Token::Number(n) => (SyntaxKind::NUMBER, String::from(n)), - Token::Comma => (SyntaxKind::COMMA, String::from(",")), - Token::Semicolon => (SyntaxKind::SEMICOLON, String::from(";")), - Token::Create => (SyntaxKind::CREATE, String::from("CREATE")), - Token::Table => (SyntaxKind::TABLE, String::from("TABLE")), - Token::ParenthesesStart => (SyntaxKind::PARENTHESES_START, String::from("(")), - Token::ParenthesesEnd => (SyntaxKind::PARENTHESES_END, String::from(")")), - Token::Equal => (SyntaxKind::EQUAL, String::from("=")), - Token::Where => (SyntaxKind::WHERE, String::from("WHERE")), - Token::LT => (SyntaxKind::LT, String::from("<")), - Token::GT => (SyntaxKind::GT, String::from(">")), - Token::LEQT => (SyntaxKind::LT, String::from("<=")), - Token::GEQT => (SyntaxKind::GT, String::from(">=")), - Token::And => (SyntaxKind::AND, String::from("AND")), - Token::Or => (SyntaxKind::OR, String::from("OR")), - Token::Not => (SyntaxKind::NOT, String::from("NOT")), - Token::Group => (SyntaxKind::GROUP, String::from("GROUP")), - Token::By => (SyntaxKind::BY, String::from("BY")), - Token::Desc => (SyntaxKind::DESC, String::from("DESC")), - Token::ASC => (SyntaxKind::ASC, String::from("ASC")), - Token::Order => (SyntaxKind::ORDER, String::from("ORDER")), - Token::Distinct => (SyntaxKind::DISTINCT, String::from("DISTINCT")), - } - } + // #[inline] + // /// Function used to take a token and its content (if any) and convert it + // /// into a SyntaxKind (enum used by the Parser). + // pub(crate) fn to_syntax(&self) -> (SyntaxKind, String) { + // match &self { + // Token::All => (SyntaxKind::ALL, String::from("*")), + // Token::Select => (SyntaxKind::SELECT, String::from("SELECT")), + // Token::Identifier(i) => (SyntaxKind::IDENTIFIER, String::from(i)), + // Token::From => (SyntaxKind::FROM, String::from("FROM")), + // Token::Text(t) => (SyntaxKind::TEXT, String::from(t)), + // Token::Number(n) => (SyntaxKind::NUMBER, String::from(n)), + // Token::Comma => (SyntaxKind::COMMA, String::from(",")), + // Token::Semicolon => (SyntaxKind::SEMICOLON, String::from(";")), + // Token::Create => (SyntaxKind::CREATE, String::from("CREATE")), + // Token::Table => (SyntaxKind::TABLE, String::from("TABLE")), + // Token::ParenthesesStart => (SyntaxKind::PARENTHESES_START, String::from("(")), + // Token::ParenthesesEnd => (SyntaxKind::PARENTHESES_END, String::from(")")), + // Token::Equal => (SyntaxKind::EQUAL, String::from("=")), + // Token::Where => (SyntaxKind::WHERE, String::from("WHERE")), + // Token::LT => (SyntaxKind::LT, String::from("<")), + // Token::GT => (SyntaxKind::GT, String::from(">")), + // Token::LEQT => (SyntaxKind::LT, String::from("<=")), + // Token::GEQT => (SyntaxKind::GT, String::from(">=")), + // Token::And => (SyntaxKind::AND, String::from("AND")), + // Token::Or => (SyntaxKind::OR, String::from("OR")), + // Token::Not => (SyntaxKind::NOT, String::from("NOT")), + // Token::Group => (SyntaxKind::GROUP, String::from("GROUP")), + // Token::By => (SyntaxKind::BY, String::from("BY")), + // Token::Desc => (SyntaxKind::DESC, String::from("DESC")), + // Token::ASC => (SyntaxKind::ASC, String::from("ASC")), + // Token::Order => (SyntaxKind::ORDER, String::from("ORDER")), + // Token::Distinct => (SyntaxKind::DISTINCT, String::from("DISTINCT")), + // } + // } } diff --git a/parser/src/parser/mod.rs b/parser/src/parser/mod.rs index fe62780..748b7fc 100644 --- a/parser/src/parser/mod.rs +++ b/parser/src/parser/mod.rs @@ -1,4 +1,4 @@ pub mod ast; pub mod grammar; pub mod lexer; -pub mod sql; +pub mod parser; diff --git a/parser/src/parser/parser/errors.rs b/parser/src/parser/parser/errors.rs new file mode 100644 index 0000000..ffb24dc --- /dev/null +++ b/parser/src/parser/parser/errors.rs @@ -0,0 +1,35 @@ +use crate::parser::ast::syntax_kind::SyntaxKind; +use thiserror::Error; + +/// Possible errors at the time of generating the ast +#[derive(Error, Debug)] +pub enum ParserError { + // TODO: this error should be in LexerError and not in ParserError + #[error("Invalid Token {0}")] + InvalidToken(String), + + /// This error is triggered when a trailing comma is left at the time of + /// enumeration, e.g. 1, 2, + #[error("Trailing Comma is not allowed")] + TrailingComma, + + /// This error is triggered when it expects a specific node and receives a + /// node of another type. + #[error("Expected {0} found {1}")] + ExpectedType(SyntaxKind, SyntaxKind), + + /// This error is triggered when the node does not match with the expected + /// by the context + #[error("Unexpected Node {0}")] + UnexpectedNode(SyntaxKind), + + /// This error is triggered when the definition of the context is incomplete + /// e.g.: SELECT; (without passing any body) + #[error("Expected Body for {0}")] + ExpectedBodyFor(SyntaxKind), + + /// this error appears when the content is finished but the current grammar + /// rule needs more content to complete. + #[error("Unexpected EOF")] + UnexpectedEof, +} diff --git a/parser/src/parser/parser/mod.rs b/parser/src/parser/parser/mod.rs new file mode 100644 index 0000000..629e98f --- /dev/null +++ b/parser/src/parser/parser/mod.rs @@ -0,0 +1 @@ +pub mod errors; diff --git a/parser/src/parser/sql/boolean.rs b/parser/src/parser/sql/boolean.rs deleted file mode 100644 index bcffa22..0000000 --- a/parser/src/parser/sql/boolean.rs +++ /dev/null @@ -1,22 +0,0 @@ -use crate::parser::ast::SyntaxKind::*; -use crate::parser::grammar::Grammar::{self, Combo, Template}; -use crate::parser::grammar::GrammarType::*; -use crate::parser::grammar::TemplateConfig; - -pub const CMP_GRAMMAR: Grammar = Combo( - true, - &[ - Template( - &[Multi(&[NUMBER, IDENTIFIER, TEXT]), Type(EQUAL), Multi(&[NUMBER, IDENTIFIER, TEXT])], - TemplateConfig { father: COMPARE, ignore: EQUAL }, - ), - Template( - &[Multi(&[NUMBER, IDENTIFIER]), Type(GT), Multi(&[NUMBER, IDENTIFIER])], - TemplateConfig { father: GREATER, ignore: GT }, - ), - Template( - &[Multi(&[NUMBER, IDENTIFIER]), Type(LT), Multi(&[NUMBER, IDENTIFIER])], - TemplateConfig { father: LESS, ignore: LT }, - ), - ], -); diff --git a/parser/src/parser/sql/from.rs b/parser/src/parser/sql/from.rs deleted file mode 100644 index be520b2..0000000 --- a/parser/src/parser/sql/from.rs +++ /dev/null @@ -1,4 +0,0 @@ -use crate::parser::ast::SyntaxKind::*; -use crate::parser::grammar::Grammar::{self, *}; - -pub const FROM_GRAMMAR: Grammar = Children(&[FROM], FROM, &[List(&[IDENTIFIER])]); diff --git a/parser/src/parser/sql/group_by.rs b/parser/src/parser/sql/group_by.rs deleted file mode 100644 index 9c78e05..0000000 --- a/parser/src/parser/sql/group_by.rs +++ /dev/null @@ -1,4 +0,0 @@ -use crate::parser::ast::SyntaxKind::*; -use crate::parser::grammar::Grammar::{self, *}; - -pub const GROUP_BY_GRAMMAR: Grammar = Children(&[GROUP, BY], GROUP_BY, &[List(&[IDENTIFIER])]); diff --git a/parser/src/parser/sql/mod.rs b/parser/src/parser/sql/mod.rs deleted file mode 100644 index de57df5..0000000 --- a/parser/src/parser/sql/mod.rs +++ /dev/null @@ -1,8 +0,0 @@ -//! SQL definitions, not much to say here. - -pub mod order_by; -pub mod boolean; -pub mod from; -pub mod group_by; -pub mod select; -pub mod swhere; diff --git a/parser/src/parser/sql/order_by.rs b/parser/src/parser/sql/order_by.rs deleted file mode 100644 index 3e7f3e0..0000000 --- a/parser/src/parser/sql/order_by.rs +++ /dev/null @@ -1,6 +0,0 @@ -use crate::parser::ast::SyntaxKind::*; -use crate::parser::grammar::Grammar::{self, *}; -use crate::parser::grammar::GrammarType::*; - -pub const ORDER_BY_GRAMMAR: Grammar = Children(&[ORDER, BY], ORDER_BY, &[List(&[IDENTIFIER]), Optional(>ype(Multi(&[ASC, DESC])))]); - diff --git a/parser/src/parser/sql/select.rs b/parser/src/parser/sql/select.rs deleted file mode 100644 index 8fbf408..0000000 --- a/parser/src/parser/sql/select.rs +++ /dev/null @@ -1,12 +0,0 @@ -use super::from::FROM_GRAMMAR; -use super::group_by::GROUP_BY_GRAMMAR; -use super::order_by::ORDER_BY_GRAMMAR; -use super::swhere::WHERE_GRAMMAR; -use crate::parser::ast::SyntaxKind::*; -use crate::parser::grammar::Grammar::{self, *}; - -pub const SELECT_GRAMMAR: &[Grammar] = &[ - Optional(&Expect(DISTINCT, true)), - List(&[IDENTIFIER, ALL]), - Loop(&Combo(true, &[FROM_GRAMMAR, WHERE_GRAMMAR, GROUP_BY_GRAMMAR, ORDER_BY_GRAMMAR]), SEMICOLON), -]; diff --git a/parser/src/parser/sql/swhere.rs b/parser/src/parser/sql/swhere.rs deleted file mode 100644 index 6bb989b..0000000 --- a/parser/src/parser/sql/swhere.rs +++ /dev/null @@ -1,5 +0,0 @@ -use super::boolean::CMP_GRAMMAR; -use crate::parser::ast::SyntaxKind::*; -use crate::parser::grammar::Grammar::{self, Children}; - -pub const WHERE_GRAMMAR: Grammar = Children(&[WHERE], WHERE, &[CMP_GRAMMAR]); From f430350ba5e429e2ce1ea1cf70d150c413bff483 Mon Sep 17 00:00:00 2001 From: El-PowerPtr Date: Wed, 30 Jul 2025 20:41:20 -0400 Subject: [PATCH 05/11] ok, another merge message --- parser/src/parser/ast/ast_trait.rs | 27 +++++++++++++++++++++++++++ parser/src/parser/ast/errors.rs | 10 ++++++++++ parser/src/parser/ast/mod.rs | 16 ++++++++++++++++ 3 files changed, 53 insertions(+) diff --git a/parser/src/parser/ast/ast_trait.rs b/parser/src/parser/ast/ast_trait.rs index 1bc6d18..561304e 100644 --- a/parser/src/parser/ast/ast_trait.rs +++ b/parser/src/parser/ast/ast_trait.rs @@ -1,14 +1,41 @@ +<<<<<<< HEAD use super::errors::AstErr; use super::nodes::TreeNode; use crate::parser::grammar::dialect::Dialect; +======= +<<<<<<< HEAD +use crate::parser::ast::ast::SyntaxKind; +use crate::parser::ast::errors::AstErr; +use crate::parser::lexer::Token; +use std::fmt::Display; +>>>>>>> a22607f (chore!: rowan removed from the project. Parser implementation needed.) pub trait Ast where D: Dialect, { +<<<<<<< HEAD +======= + fn diff(&self, other: &Self) -> Self; + fn as_node(&self) -> N; + fn from(root: &N) -> Self; +======= +use super::errors::AstErr; +use super::nodes::TreeNode; +use crate::parser::grammar::dialect::Dialect; + +pub trait Ast +where + D: Dialect, +{ +>>>>>>> a22607f (chore!: rowan removed from the project. Parser implementation needed.) fn root(&self) -> Option>>; fn take_root(&mut self, node: TreeNode) -> Result<(), AstErr>; fn diff(&self, other: &Self) -> Vec>; fn from_node(seed: TreeNode) -> Self; fn new() -> Self; +<<<<<<< HEAD +======= +>>>>>>> 6613d25 (chore!: rowan removed from the project. Parser implementation needed.) +>>>>>>> a22607f (chore!: rowan removed from the project. Parser implementation needed.) } diff --git a/parser/src/parser/ast/errors.rs b/parser/src/parser/ast/errors.rs index 2ceaad4..65fc499 100644 --- a/parser/src/parser/ast/errors.rs +++ b/parser/src/parser/ast/errors.rs @@ -1,7 +1,17 @@ use thiserror::Error; #[derive(Error, Debug)] +<<<<<<< HEAD +======= +<<<<<<< HEAD +pub enum AstErr {} +======= +>>>>>>> a22607f (chore!: rowan removed from the project. Parser implementation needed.) pub enum AstErr { #[error("Terminal node: {0} cannot have children")] TerminalNodeChildAdition(String), } +<<<<<<< HEAD +======= +>>>>>>> 6613d25 (chore!: rowan removed from the project. Parser implementation needed.) +>>>>>>> a22607f (chore!: rowan removed from the project. Parser implementation needed.) diff --git a/parser/src/parser/ast/mod.rs b/parser/src/parser/ast/mod.rs index 6a7e139..0fcf377 100644 --- a/parser/src/parser/ast/mod.rs +++ b/parser/src/parser/ast/mod.rs @@ -1,5 +1,21 @@ +<<<<<<< HEAD pub mod ast_builder; pub mod ast_trait; pub mod errors; pub mod nodes; pub mod syntax_kind; +======= +<<<<<<< HEAD +pub mod ast; +pub mod ast_trait; +pub mod errors; +pub mod nodes; +pub mod operators; +======= +pub mod ast_builder; +pub mod ast_trait; +pub mod errors; +pub mod nodes; +pub mod syntax_kind; +>>>>>>> 6613d25 (chore!: rowan removed from the project. Parser implementation needed.) +>>>>>>> a22607f (chore!: rowan removed from the project. Parser implementation needed.) From a16882dae4866df6a2427143536c5db0b087c905 Mon Sep 17 00:00:00 2001 From: El-PowerPtr Date: Fri, 8 Aug 2025 01:24:47 -0400 Subject: [PATCH 06/11] feat: traits for ast, builder, nodes and dialect defined --- parser/src/parser/ast/ast_trait.rs | 27 --------------------------- parser/src/parser/ast/errors.rs | 10 ---------- parser/src/parser/ast/mod.rs | 16 ---------------- 3 files changed, 53 deletions(-) diff --git a/parser/src/parser/ast/ast_trait.rs b/parser/src/parser/ast/ast_trait.rs index 561304e..1bc6d18 100644 --- a/parser/src/parser/ast/ast_trait.rs +++ b/parser/src/parser/ast/ast_trait.rs @@ -1,41 +1,14 @@ -<<<<<<< HEAD use super::errors::AstErr; use super::nodes::TreeNode; use crate::parser::grammar::dialect::Dialect; -======= -<<<<<<< HEAD -use crate::parser::ast::ast::SyntaxKind; -use crate::parser::ast::errors::AstErr; -use crate::parser::lexer::Token; -use std::fmt::Display; ->>>>>>> a22607f (chore!: rowan removed from the project. Parser implementation needed.) pub trait Ast where D: Dialect, { -<<<<<<< HEAD -======= - fn diff(&self, other: &Self) -> Self; - fn as_node(&self) -> N; - fn from(root: &N) -> Self; -======= -use super::errors::AstErr; -use super::nodes::TreeNode; -use crate::parser::grammar::dialect::Dialect; - -pub trait Ast -where - D: Dialect, -{ ->>>>>>> a22607f (chore!: rowan removed from the project. Parser implementation needed.) fn root(&self) -> Option>>; fn take_root(&mut self, node: TreeNode) -> Result<(), AstErr>; fn diff(&self, other: &Self) -> Vec>; fn from_node(seed: TreeNode) -> Self; fn new() -> Self; -<<<<<<< HEAD -======= ->>>>>>> 6613d25 (chore!: rowan removed from the project. Parser implementation needed.) ->>>>>>> a22607f (chore!: rowan removed from the project. Parser implementation needed.) } diff --git a/parser/src/parser/ast/errors.rs b/parser/src/parser/ast/errors.rs index 65fc499..2ceaad4 100644 --- a/parser/src/parser/ast/errors.rs +++ b/parser/src/parser/ast/errors.rs @@ -1,17 +1,7 @@ use thiserror::Error; #[derive(Error, Debug)] -<<<<<<< HEAD -======= -<<<<<<< HEAD -pub enum AstErr {} -======= ->>>>>>> a22607f (chore!: rowan removed from the project. Parser implementation needed.) pub enum AstErr { #[error("Terminal node: {0} cannot have children")] TerminalNodeChildAdition(String), } -<<<<<<< HEAD -======= ->>>>>>> 6613d25 (chore!: rowan removed from the project. Parser implementation needed.) ->>>>>>> a22607f (chore!: rowan removed from the project. Parser implementation needed.) diff --git a/parser/src/parser/ast/mod.rs b/parser/src/parser/ast/mod.rs index 0fcf377..6a7e139 100644 --- a/parser/src/parser/ast/mod.rs +++ b/parser/src/parser/ast/mod.rs @@ -1,21 +1,5 @@ -<<<<<<< HEAD pub mod ast_builder; pub mod ast_trait; pub mod errors; pub mod nodes; pub mod syntax_kind; -======= -<<<<<<< HEAD -pub mod ast; -pub mod ast_trait; -pub mod errors; -pub mod nodes; -pub mod operators; -======= -pub mod ast_builder; -pub mod ast_trait; -pub mod errors; -pub mod nodes; -pub mod syntax_kind; ->>>>>>> 6613d25 (chore!: rowan removed from the project. Parser implementation needed.) ->>>>>>> a22607f (chore!: rowan removed from the project. Parser implementation needed.) From f78def944913c9e0bf9e3bb9f632bb6a1106eb3b Mon Sep 17 00:00:00 2001 From: El-PowerPtr Date: Wed, 13 Aug 2025 00:43:53 -0400 Subject: [PATCH 07/11] feat: parser trait added --- parser/src/parser/parser/mod.rs | 1 + parser/src/parser/parser/parser.rs | 29 +++++++++++++++++++++++++++++ 2 files changed, 30 insertions(+) create mode 100644 parser/src/parser/parser/parser.rs diff --git a/parser/src/parser/parser/mod.rs b/parser/src/parser/parser/mod.rs index 629e98f..4590435 100644 --- a/parser/src/parser/parser/mod.rs +++ b/parser/src/parser/parser/mod.rs @@ -1 +1,2 @@ pub mod errors; +pub mod parser; diff --git a/parser/src/parser/parser/parser.rs b/parser/src/parser/parser/parser.rs new file mode 100644 index 0000000..d10b59b --- /dev/null +++ b/parser/src/parser/parser/parser.rs @@ -0,0 +1,29 @@ +use crate::parser::grammar::dialect::Dialect; +use crate::parser::ast::ast_trait::Ast; +use chumsky::extra::Err as CE; +use chumsky::error::Rich; +use logos::Logos; + + + +trait Parser<'src, A, D, Token> +where + D: Dialect, + A: Ast, + Token: Logos<'src> + Clone +{ + + fn parse(&self) -> Result>>; + fn new(source: &'src str) -> Self; +} + +pub fn parse<'src, P, T, D, A>(text: &'src str) -> Result>> +where + D: Dialect, + T: Logos<'src> + Clone, + A: Ast, + P: Parser<'src , A , D, T> + 'src +{ + let parser = P::new(text); + parser.parse() +} \ No newline at end of file From a51d68792c581ea9148fad743c33223e402691b3 Mon Sep 17 00:00:00 2001 From: El-PowerPtr Date: Thu, 21 Aug 2025 02:26:18 -0400 Subject: [PATCH 08/11] feat(lexer): error token added. Clone & PartialEq traits implemented --- parser/src/parser/lexer.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/parser/src/parser/lexer.rs b/parser/src/parser/lexer.rs index 6ad6f09..c723f80 100644 --- a/parser/src/parser/lexer.rs +++ b/parser/src/parser/lexer.rs @@ -1,7 +1,7 @@ use logos::Logos; /// Possible tokens when parsing the Sql code -#[derive(Logos, Debug)] +#[derive(Logos,Clone, Debug, PartialEq)] #[logos(skip r"[ \t\n\f]+")] pub enum Token { #[regex("(?i)SELECT")] @@ -87,6 +87,8 @@ pub enum Token { #[token(")")] ParenthesesEnd, + + Error } ///to_syntax method won't b used in the parser because some tokens From efb8d3e01f083401fd579d204421dd40218e6a34 Mon Sep 17 00:00:00 2001 From: El-PowerPtr Date: Thu, 21 Aug 2025 02:32:34 -0400 Subject: [PATCH 09/11] chore(grammar): dialect checking method commented temporaly --- parser/src/parser/grammar/dialect.rs | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/parser/src/parser/grammar/dialect.rs b/parser/src/parser/grammar/dialect.rs index 86e7c0a..31380d9 100644 --- a/parser/src/parser/grammar/dialect.rs +++ b/parser/src/parser/grammar/dialect.rs @@ -1,8 +1,7 @@ -use crate::parser::ast::errors::AstErr; -use crate::parser::ast::nodes::TreeNode; +//use crate::parser::ast::errors::AstErr; ///This trait deals with the grammar of the SQL dialect we want to parse. ///We proly need to add more methods in the future. -pub trait Dialect: Sized { - fn are_compatible(child: &TreeNode, parent: &TreeNode) -> Result<(), AstErr>; +pub trait Dialect: Sized { + //fn are_compatible(child: &N, parent: &N) -> Result<(), AstErr>; } From abf16cf8cb64cce1190fb9b2660988155fc02fef Mon Sep 17 00:00:00 2001 From: El-PowerPtr Date: Thu, 21 Aug 2025 12:59:13 -0400 Subject: [PATCH 10/11] refactor(ast)! : Nodes father attribute is commented for now for a more comfortable development. Nodes constructors implemented. Also Ast and Nodes are now printable. --- parser/src/parser/ast/ast_builder.rs | 12 ++-- parser/src/parser/ast/ast_trait.rs | 17 ++--- parser/src/parser/ast/nodes.rs | 92 ++++++++++++++++++---------- 3 files changed, 71 insertions(+), 50 deletions(-) diff --git a/parser/src/parser/ast/ast_builder.rs b/parser/src/parser/ast/ast_builder.rs index bc3f961..e29a95e 100644 --- a/parser/src/parser/ast/ast_builder.rs +++ b/parser/src/parser/ast/ast_builder.rs @@ -1,17 +1,15 @@ use super::ast_trait::Ast; use super::errors::AstErr; use super::nodes::TreeNode; -use crate::parser::grammar::dialect::Dialect; -trait AstBuilder +trait AstBuilder where - D: Dialect, - T: Ast, + T: Ast, { - fn add(&self, node: TreeNode) -> Result<(), AstErr>; + fn add(&self, node: N) -> Result<(), AstErr>; fn tree(&self) -> &T; fn build(&self) -> T; - fn checkpoint(&self) -> Option<&TreeNode>; + fn checkpoint(&self) -> Option<&TreeNode>; fn new() -> Self; - fn back(&self) -> Option<&TreeNode>; + fn back(&self) -> Option<&TreeNode>; } diff --git a/parser/src/parser/ast/ast_trait.rs b/parser/src/parser/ast/ast_trait.rs index 1bc6d18..38c0777 100644 --- a/parser/src/parser/ast/ast_trait.rs +++ b/parser/src/parser/ast/ast_trait.rs @@ -1,14 +1,9 @@ -use super::errors::AstErr; -use super::nodes::TreeNode; -use crate::parser::grammar::dialect::Dialect; +//use super::errors::AstErr; +use std::fmt::Display; -pub trait Ast -where - D: Dialect, -{ - fn root(&self) -> Option>>; - fn take_root(&mut self, node: TreeNode) -> Result<(), AstErr>; - fn diff(&self, other: &Self) -> Vec>; - fn from_node(seed: TreeNode) -> Self; +pub trait Ast: Display { + fn root(&self) -> &N; + //fn diff(&self, other: &Self) -> Vec>; + fn from_node(seed: N) -> Self; fn new() -> Self; } diff --git a/parser/src/parser/ast/nodes.rs b/parser/src/parser/ast/nodes.rs index 118c1bb..f5439d5 100644 --- a/parser/src/parser/ast/nodes.rs +++ b/parser/src/parser/ast/nodes.rs @@ -1,39 +1,36 @@ use std::cell::RefCell; use std::fmt::Display; -use std::marker::PhantomData; use std::rc::Rc; -use std::rc::Weak; use super::ast_trait::Ast; use super::errors::AstErr; use super::syntax_kind::SyntaxKind; -use crate::parser::grammar::dialect::Dialect; + + ///Represents the nodes of our AST ///in the simplest and generic way. ///The Dialect trait will deal with the grammar ///and compatibility between Nodes -#[derive(Debug)] -pub enum TreeNode { +#[derive(Debug, Clone)] +pub enum TreeNode { Terminal { //😭 - parent: RefCell>>>, + //parent: Option>>, kind: SyntaxKind, text: String, - _pd: PhantomData, }, NonTerminal { //😭 - parent: RefCell>>>, + //parent: Option>>>, kind: SyntaxKind, //😭 - children: Vec>>>, - _pd: PhantomData, + children: Vec>>>, }, } -impl TreeNode { +impl TreeNode { pub fn get_kind(&self) -> SyntaxKind { match self { TreeNode::Terminal { ref kind, .. } => kind.clone(), @@ -41,40 +38,71 @@ impl TreeNode { } } + /*pub fn get_parent(&mut self) -> &mut Option>> + { + match self { + TreeNode::NonTerminal{ref mut parent,..} => parent.borrow_mut(), + TreeNode::Terminal{ref mut parent,..} => parent.borrow_mut(), + } + }*/ + pub fn to_tree(self) -> A where - A: Ast, + A: Ast, { A::from_node(self) } - pub(super) fn add(&mut self, child: Self) -> Result<(), AstErr> { - D::are_compatible(&child, self)?; + pub fn add(&mut self, child: TreeNode) -> Result<(), AstErr> { match self { TreeNode::Terminal { .. } => Err(AstErr::TerminalNodeChildAdition(self.to_string())), - TreeNode::NonTerminal { ref mut children, .. } => { - children.push(RefCell::new(Rc::new(child))); + TreeNode::NonTerminal {ref mut children, .. } => { + /*if children.is_empty() { + child.get_parent().borrow_mut() = Rc::new(RefCell::new(self)); + } else { + child.get_parent = children[0]?.get_parent().clone(); + }*/ + children.push(Rc::new(RefCell::new(Box::new(child)))); Ok(()) }, } } -} - -impl Display for TreeNode { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match self { - Self::Terminal { ref kind, ref text, .. } => write!(f, "{:?} - {}", kind, text), - Self::NonTerminal { kind, children, .. } => { - write!( - f, - "{:?} - {:?} ", - kind, - children - .iter() - .map(|child| (**child.borrow()).get_kind()) - .collect::>() - ) + pub fn new_term(kind: SyntaxKind, text: String) -> TreeNode{ + TreeNode::Terminal{ + //parent: None, + kind, + text + } + } + pub fn new_no_term(kind: SyntaxKind, children: Vec> ) -> TreeNode{ + TreeNode::NonTerminal{ + //parent: None, + kind, + children: children.into_iter().map(|member| Rc::new(RefCell::new(member))).collect::>() + } + } + + fn print(indent: usize, node: &TreeNode, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + for _ in 0..indent{ + write!(f,"\t"); + }; + match node { + Self::NonTerminal{ref kind, ref children} => { + write!(f, "• {:?}", kind); + for child in children { + write!(f,"\n"); + Self::print(indent + 1, &(**child.borrow()), f); + }; + std::fmt::Result::Ok(()) }, + Self::Terminal { ref kind, ref text} => write!(f, "• {:?}: {}", kind, text) } } } + +impl Display for TreeNode { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + Self::print(0, self, f); + std::fmt::Result::Ok(()) + } +} \ No newline at end of file From 193c3e25e20d6bda16ca29d470274b3f5a172275 Mon Sep 17 00:00:00 2001 From: El-PowerPtr Date: Thu, 21 Aug 2025 13:01:40 -0400 Subject: [PATCH 11/11] feature(parser): Parser integrated with chumsky --- parser/src/parser/parser/mod.rs | 1 + parser/src/parser/parser/parser.rs | 25 ++++++++++++++----------- parser/src/parser/parser/utils.rs | 19 +++++++++++++++++++ 3 files changed, 34 insertions(+), 11 deletions(-) create mode 100644 parser/src/parser/parser/utils.rs diff --git a/parser/src/parser/parser/mod.rs b/parser/src/parser/parser/mod.rs index 4590435..bd125fd 100644 --- a/parser/src/parser/parser/mod.rs +++ b/parser/src/parser/parser/mod.rs @@ -1,2 +1,3 @@ pub mod errors; pub mod parser; +pub mod utils; \ No newline at end of file diff --git a/parser/src/parser/parser/parser.rs b/parser/src/parser/parser/parser.rs index d10b59b..3b029c8 100644 --- a/parser/src/parser/parser/parser.rs +++ b/parser/src/parser/parser/parser.rs @@ -1,29 +1,32 @@ use crate::parser::grammar::dialect::Dialect; use crate::parser::ast::ast_trait::Ast; -use chumsky::extra::Err as CE; use chumsky::error::Rich; use logos::Logos; +use chumsky::prelude::ParseResult; -trait Parser<'src, A, D, Token> +pub trait Parser<'src,N, D, Token> where - D: Dialect, - A: Ast, + D: Dialect, Token: Logos<'src> + Clone { - fn parse(&self) -> Result>>; + fn parse(&mut self) -> ParseResult>; fn new(source: &'src str) -> Self; } -pub fn parse<'src, P, T, D, A>(text: &'src str) -> Result>> +pub fn parse<'src,N, P, T, D, A>(text: &'src str) -> Result>> where - D: Dialect, + D: Dialect, T: Logos<'src> + Clone, - A: Ast, - P: Parser<'src , A , D, T> + 'src + A: Ast, + P: Parser<'src, N, D, T> + 'src { - let parser = P::new(text); - parser.parse() + let mut parser = P::new(text); + let output = parser.parse(); + match output.into_result(){ + Ok(x) => Ok(A::from_node(x)), + Err(e) => Err(e) + } } \ No newline at end of file diff --git a/parser/src/parser/parser/utils.rs b/parser/src/parser/parser/utils.rs new file mode 100644 index 0000000..f21cdd2 --- /dev/null +++ b/parser/src/parser/parser/utils.rs @@ -0,0 +1,19 @@ +use crate::parser::ast::nodes::TreeNode; +use chumsky::container::Container; +use crate::parser::ast::syntax_kind::SyntaxKind; + +impl Container for TreeNode{ + fn push(&mut self, item: TreeNode){ + let _ = self.add(item).unwrap(); + } +} + +impl Default for TreeNode{ + fn default() -> Self{ + TreeNode::NonTerminal{ + //parent: None, + kind: SyntaxKind::ROOT, + children: vec![], + } + } +} \ No newline at end of file