diff --git a/Cargo.lock b/Cargo.lock index 1581196..47ee920 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2,11 +2,25 @@ # It is not intended for manual editing. version = 4 +[[package]] +name = "aho-corasick" +version = "1.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916" +dependencies = [ + "memchr", +] + +[[package]] +name = "allocator-api2" +version = "0.2.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "683d7910e743518b0e34f1186f92494becacb047c7b6bf616c96772180fef923" + [[package]] name = "atern" version = "0.1.0" dependencies = [ - "rowan", "sql_parser", ] @@ -17,10 +31,39 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3a8241f3ebb85c056b509d4327ad0358fbbba6ffb340bf388f26350aeda225b1" [[package]] -name = "countme" -version = "3.0.1" +name = "cc" +version = "1.2.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c3a42d84bb6b69d3a8b3eaacf0d88f179e1929695e1ad012b6cf64d9caaa5fd2" +dependencies = [ + "shlex", +] + +[[package]] +name = "cfg-if" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9555578bc9e57714c812a1f84e4fc5b4d21fcb063490c624de019f7464c91268" + +[[package]] +name = "chumsky" +version = "0.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "14377e276b2c8300513dff55ba4cc4142b44e5d6de6d00eb5b2307d650bb4ec1" +dependencies = [ + "hashbrown", + "regex-automata", + "serde", + "stacker", + "unicode-ident", + "unicode-segmentation", +] + +[[package]] +name = "equivalent" +version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7704b5fdd17b18ae31c4c1da5a2e0305a2bf17b5249300a9ee9ed7b72114c636" +checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f" [[package]] name = "fnv" @@ -28,11 +71,22 @@ version = "1.0.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" +[[package]] +name = "foldhash" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2" + [[package]] name = "hashbrown" -version = "0.14.5" +version = "0.15.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1" +checksum = "5971ac85611da7067dbfcabef3c70ebb5606018acd9e2a3903a0da507521e0d5" +dependencies = [ + "allocator-api2", + "equivalent", + "foldhash", +] [[package]] name = "lazy_static" @@ -40,6 +94,12 @@ version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" +[[package]] +name = "libc" +version = "0.2.174" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1171693293099992e19cddea4e8b849964e9846f4acee11b3948bcc337be8776" + [[package]] name = "logos" version = "0.15.0" @@ -60,7 +120,7 @@ dependencies = [ "lazy_static", "proc-macro2", "quote", - "regex-syntax", + "regex-syntax 0.8.5", "rustc_version", "syn", ] @@ -74,6 +134,12 @@ dependencies = [ "logos-codegen", ] +[[package]] +name = "memchr" +version = "2.7.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32a282da65faaf38286cf3be983213fcf1d2e2a58700e808f83f4ea9a4804bc0" + [[package]] name = "proc-macro2" version = "1.0.94" @@ -83,6 +149,15 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "psm" +version = "0.1.26" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6e944464ec8536cd1beb0bbfd96987eb5e3b72f2ecdafdc5c769a37f1fa2ae1f" +dependencies = [ + "cc", +] + [[package]] name = "quote" version = "1.0.40" @@ -93,28 +168,27 @@ dependencies = [ ] [[package]] -name = "regex-syntax" -version = "0.8.5" +name = "regex-automata" +version = "0.3.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c" +checksum = "59b23e92ee4318893fa3fe3e6fb365258efbfe6ac6ab30f090cdcbb7aa37efa9" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax 0.7.5", +] [[package]] -name = "rowan" -version = "0.16.1" +name = "regex-syntax" +version = "0.7.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "417a3a9f582e349834051b8a10c8d71ca88da4211e4093528e36b9845f6b5f21" -dependencies = [ - "countme", - "hashbrown", - "rustc-hash", - "text-size", -] +checksum = "dbb5fb1acd8a1a18b3dd5be62d25485eb770e05afb408a9627d14d451bae12da" [[package]] -name = "rustc-hash" -version = "1.1.0" +name = "regex-syntax" +version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2" +checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c" [[package]] name = "rustc_version" @@ -131,15 +205,54 @@ version = "1.0.26" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "56e6fa9c48d24d85fb3de5ad847117517440f6beceb7798af16b4a87d616b8d0" +[[package]] +name = "serde" +version = "1.0.219" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5f0e2c6ed6606019b4e29e69dbaba95b11854410e5347d525002456dbbb786b6" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.219" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b0276cf7f2c73365f7157c8123c21cd9a50fbbd844757af28ca1f5925fc2a00" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "shlex" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" + [[package]] name = "sql_parser" version = "0.1.0" dependencies = [ + "chumsky", "logos", - "rowan", "thiserror", ] +[[package]] +name = "stacker" +version = "0.1.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cddb07e32ddb770749da91081d8d0ac3a16f1a569a18b20348cd371f5dead06b" +dependencies = [ + "cc", + "cfg-if", + "libc", + "psm", + "windows-sys", +] + [[package]] name = "syn" version = "2.0.100" @@ -151,12 +264,6 @@ dependencies = [ "unicode-ident", ] -[[package]] -name = "text-size" -version = "1.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f18aa187839b2bdb1ad2fa35ead8c4c2976b64e4363c386d45ac0f7ee85c9233" - [[package]] name = "thiserror" version = "2.0.12" @@ -182,3 +289,82 @@ name = "unicode-ident" version = "1.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5a5f39404a5da50712a4c1eecf25e90dd62b613502b7e925fd4e4d19b5c96512" + +[[package]] +name = "unicode-segmentation" +version = "1.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f6ccf251212114b54433ec949fd6a7841275f9ada20dddd2f29e9ceea4501493" + +[[package]] +name = "windows-sys" +version = "0.59.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b" +dependencies = [ + "windows-targets", +] + +[[package]] +name = "windows-targets" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" +dependencies = [ + "windows_aarch64_gnullvm", + "windows_aarch64_msvc", + "windows_i686_gnu", + "windows_i686_gnullvm", + "windows_i686_msvc", + "windows_x86_64_gnu", + "windows_x86_64_gnullvm", + "windows_x86_64_msvc", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" + +[[package]] +name = "windows_i686_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" + +[[package]] +name = "windows_i686_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" + +[[package]] +name = "windows_i686_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" diff --git a/cli/Cargo.toml b/cli/Cargo.toml index c4a93b8..e6ae975 100644 --- a/cli/Cargo.toml +++ b/cli/Cargo.toml @@ -4,5 +4,4 @@ version = "0.1.0" edition = "2021" [dependencies] -rowan = "0.16.1" sql_parser = { path = "../parser" } diff --git a/cli/src/main.rs b/cli/src/main.rs index b1f2366..4f7a68e 100644 --- a/cli/src/main.rs +++ b/cli/src/main.rs @@ -1,26 +1,27 @@ -use rowan::NodeOrToken; -use sql_parser::parser::ast::{ParserError, SyntaxElement, SyntaxKind}; -use sql_parser::SqlBuilder; +// fn print(indent: usize, element: SyntaxElement) { +// let kind: SyntaxKind = element.kind(); +// print!("{:indent$}", "", indent = indent); +// match element { +// NodeOrToken::Node(node) => { +// println!("- {:?}", kind); +// for child in node.children_with_tokens() { +// print(indent + 2, child); +// } +// }, +// +// NodeOrToken::Token(token) => println!("- {:?} {:?}", token.text(), kind), +// } +// } +// +// fn main() -> Result<(), ParserError> { +// let ast = +// SqlBuilder::from("SELECT DISTINCT FROM Users WHERE Country='USA' ORDER BY age DESC".to_string()).build()?; +// print(0, ast.into()); +// +// Ok(()) +// } +// -fn print(indent: usize, element: SyntaxElement) { - let kind: SyntaxKind = element.kind(); - print!("{:indent$}", "", indent = indent); - match element { - NodeOrToken::Node(node) => { - println!("- {:?}", kind); - for child in node.children_with_tokens() { - print(indent + 2, child); - } - }, - - NodeOrToken::Token(token) => println!("- {:?} {:?}", token.text(), kind), - } -} - -fn main() -> Result<(), ParserError> { - let ast = - SqlBuilder::from("SELECT DISTINCT FROM Users WHERE Country='USA' ORDER BY age DESC".to_string()).build()?; - print(0, ast.into()); - - Ok(()) +fn main() { + println!("Parser has to be implemented!") } diff --git a/parser/Cargo.toml b/parser/Cargo.toml index 110049f..e674e57 100644 --- a/parser/Cargo.toml +++ b/parser/Cargo.toml @@ -4,6 +4,6 @@ version = "0.1.0" edition = "2021" [dependencies] +chumsky = "0.10.1" logos = "0.15.0" -rowan = "0.16.1" thiserror = "2.0.12" diff --git a/parser/src/builder.rs b/parser/src/builder.rs deleted file mode 100644 index a106f6d..0000000 --- a/parser/src/builder.rs +++ /dev/null @@ -1,23 +0,0 @@ -/// ! Shortcut to parse sql code -use logos::Logos; - -use super::parser::ast::{Parser, ParserError, SyntaxNode}; -use super::parser::lexer::Token; - -/// This structure is an abstraction of using the lexer and passing that value -/// to the parser. Receives a String and returns a node of the ast -pub struct SqlBuilder { - code: String, -} - -impl SqlBuilder { - pub fn build(self) -> Result { - Ok(Parser::from_tokens(&mut Token::lexer(&self.code))?.parse()?) - } -} - -impl From for SqlBuilder { - fn from(code: String) -> Self { - Self { code } - } -} diff --git a/parser/src/lib.rs b/parser/src/lib.rs index cd295a9..67c567f 100644 --- a/parser/src/lib.rs +++ b/parser/src/lib.rs @@ -1,4 +1 @@ -mod builder; pub mod parser; - -pub use builder::SqlBuilder; diff --git a/parser/src/parser/ast.rs b/parser/src/parser/ast.rs deleted file mode 100644 index 424d95c..0000000 --- a/parser/src/parser/ast.rs +++ /dev/null @@ -1,209 +0,0 @@ -// Select -// All - * -// From -// Identifier - Table -use std::iter::Peekable; - -use logos::Lexer; -use rowan::{GreenNodeBuilder, NodeOrToken}; -use thiserror::Error; - -use super::grammar::process_grammar; -use super::lexer::Token; -use super::sql::select::SELECT_GRAMMAR; - -/// Possible errors at the time of generating the ast -#[derive(Error, Debug)] -pub enum ParserError { - // TODO: this error should be in LexerError and not in ParserError - #[error("Invalid Token {0}")] - InvalidToken(String), - - /// This error is triggered when a trailing comma is left at the time of - /// enumeration, e.g. 1, 2, - #[error("Trailing Comma is not allowed")] - TrailingComma, - - /// This error is triggered when it expects a specific node and receives a - /// node of another type. - #[error("Expected {0} found {1}")] - ExpectedType(SyntaxKind, SyntaxKind), - - /// This error is triggered when the node does not match with the expected - /// by the context - #[error("Unexpected Node {0}")] - UnexpectedNode(SyntaxKind), - - /// This error is triggered when the definition of the context is incomplete - /// e.g.: SELECT; (without passing any body) - #[error("Expected Body for {0}")] - ExpectedBodyFor(SyntaxKind), - - /// this error appears when the content is finished but the current grammar - /// rule needs more content to complete. - #[error("Unexpected EOF")] - UnexpectedEof, -} - -/// Nodes that the ast can have. -#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] -#[allow(non_camel_case_types)] -#[repr(u16)] -pub enum SyntaxKind { - WHITESPACE = 0, - - SELECT, - FROM, - WHERE, - ORDER_BY, - GROUP_BY, - - CREATE, - - TABLE, - - IDENTIFIER, - TEXT, - ALL, - COMMA, - NUMBER, - SEMICOLON, - PARENTHESES_START, - PARENTHESES_END, - VALUES, - DEFINITION, - EQUAL, - GT, - LT, - AND, - OR, - GROUP, - ORDER, - BY, - DESC, - ASC, - DISTINCT, - - COMPARE, - GREATER, - LESS, - EMPTY, - ROOT, -} - -impl SyntaxKind { - pub fn is_dql(&self) -> bool { - (2..=3).contains(&(*self as u16)) - } - pub fn is_ddl(&self) -> bool { - (4..=4).contains(&(*self as u16)) - } -} - -impl std::fmt::Display for SyntaxKind { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "{:?}", self) - } -} - -use SyntaxKind::*; - -impl From for rowan::SyntaxKind { - fn from(kind: SyntaxKind) -> Self { - Self(kind as u16) - } -} - -#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] -pub enum Lang {} -impl rowan::Language for Lang { - type Kind = SyntaxKind; - fn kind_from_raw(raw: rowan::SyntaxKind) -> Self::Kind { - assert!(raw.0 <= ROOT as u16); - unsafe { std::mem::transmute::(raw.0) } - } - fn kind_to_raw(kind: Self::Kind) -> rowan::SyntaxKind { - kind.into() - } -} - -pub type SyntaxNode = rowan::SyntaxNode; -pub type SyntaxToken = rowan::SyntaxToken; -pub type SyntaxElement = NodeOrToken; - -/// This struct symbolizes the parser, receives a Peekable that is used to -/// iterate over the nodes and has a GreenNodeBuilder that symbolizes the ast -pub struct Parser { - pub builder: GreenNodeBuilder<'static>, - pub iter: Peekable>, -} - -impl Parser { - pub fn peek(&mut self) -> Option { - while self.iter.peek().map(|&(t, _)| t == WHITESPACE).unwrap_or(false) { - self.bump(); - } - self.iter.peek().map(|&(t, _)| t) - } - pub fn peek_with_content(&mut self) -> Option<(SyntaxKind, &String)> { - while self.iter.peek().map(|&(t, _)| t == WHITESPACE).unwrap_or(false) { - self.bump(); - } - self.iter.peek().map(|&(t, ref c)| (t, c)) - } - pub fn next(&mut self) { - self.iter.next(); - } - pub fn bump(&mut self) { - if let Some((token, string)) = self.iter.next() { - self.builder.token(token.into(), string.as_str()); - } - } - fn handle_val(&mut self) -> Result<(), ParserError> { - match self.peek().unwrap() { - SELECT => { - process_grammar(self, SELECT, SELECT_GRAMMAR)?; - }, - CREATE => { - process_grammar(self, CREATE, &[])?; - }, - SEMICOLON => { - self.next(); - }, - n => return Err(ParserError::UnexpectedNode(n)), - } - - Ok(()) - } - /// Parses the entire contents of the iter and returns an ast - pub fn parse(mut self) -> Result { - self.builder.start_node(ROOT.into()); - - while let Some(_) = self.peek() { - self.handle_val()?; - } - - self.builder.finish_node(); - - Ok(SyntaxNode::new_root(self.builder.finish())) - } - /// Receives a lexer and iterates its tokens (making sure they are not an - /// Err) and returns a Parser. - pub fn from_tokens(lex: &mut Lexer<'_, Token>) -> Result { - let mut nodes = Vec::new(); - - while let Some(token) = lex.next() { - match token { - Ok(t) => { - nodes.push(t.to_syntax()); - }, - Err(_) => return Err(ParserError::InvalidToken(lex.slice().to_string())), - } - } - - Ok(Parser { - builder: GreenNodeBuilder::new(), - iter: nodes.into_iter().peekable(), - }) - } -} diff --git a/parser/src/parser/ast/ast_builder.rs b/parser/src/parser/ast/ast_builder.rs new file mode 100644 index 0000000..e29a95e --- /dev/null +++ b/parser/src/parser/ast/ast_builder.rs @@ -0,0 +1,15 @@ +use super::ast_trait::Ast; +use super::errors::AstErr; +use super::nodes::TreeNode; + +trait AstBuilder +where + T: Ast, +{ + fn add(&self, node: N) -> Result<(), AstErr>; + fn tree(&self) -> &T; + fn build(&self) -> T; + fn checkpoint(&self) -> Option<&TreeNode>; + fn new() -> Self; + fn back(&self) -> Option<&TreeNode>; +} diff --git a/parser/src/parser/ast/ast_trait.rs b/parser/src/parser/ast/ast_trait.rs new file mode 100644 index 0000000..38c0777 --- /dev/null +++ b/parser/src/parser/ast/ast_trait.rs @@ -0,0 +1,9 @@ +//use super::errors::AstErr; +use std::fmt::Display; + +pub trait Ast: Display { + fn root(&self) -> &N; + //fn diff(&self, other: &Self) -> Vec>; + fn from_node(seed: N) -> Self; + fn new() -> Self; +} diff --git a/parser/src/parser/ast/errors.rs b/parser/src/parser/ast/errors.rs new file mode 100644 index 0000000..2ceaad4 --- /dev/null +++ b/parser/src/parser/ast/errors.rs @@ -0,0 +1,7 @@ +use thiserror::Error; + +#[derive(Error, Debug)] +pub enum AstErr { + #[error("Terminal node: {0} cannot have children")] + TerminalNodeChildAdition(String), +} diff --git a/parser/src/parser/ast/mod.rs b/parser/src/parser/ast/mod.rs new file mode 100644 index 0000000..6a7e139 --- /dev/null +++ b/parser/src/parser/ast/mod.rs @@ -0,0 +1,5 @@ +pub mod ast_builder; +pub mod ast_trait; +pub mod errors; +pub mod nodes; +pub mod syntax_kind; diff --git a/parser/src/parser/ast/nodes.rs b/parser/src/parser/ast/nodes.rs new file mode 100644 index 0000000..f5439d5 --- /dev/null +++ b/parser/src/parser/ast/nodes.rs @@ -0,0 +1,108 @@ +use std::cell::RefCell; +use std::fmt::Display; +use std::rc::Rc; + +use super::ast_trait::Ast; +use super::errors::AstErr; +use super::syntax_kind::SyntaxKind; + + + +///Represents the nodes of our AST +///in the simplest and generic way. +///The Dialect trait will deal with the grammar +///and compatibility between Nodes + +#[derive(Debug, Clone)] +pub enum TreeNode { + Terminal { + //😭 + //parent: Option>>, + kind: SyntaxKind, + text: String, + }, + NonTerminal { + //😭 + //parent: Option>>>, + kind: SyntaxKind, + //😭 + children: Vec>>>, + }, +} + +impl TreeNode { + pub fn get_kind(&self) -> SyntaxKind { + match self { + TreeNode::Terminal { ref kind, .. } => kind.clone(), + TreeNode::NonTerminal { ref kind, .. } => kind.clone(), + } + } + + /*pub fn get_parent(&mut self) -> &mut Option>> + { + match self { + TreeNode::NonTerminal{ref mut parent,..} => parent.borrow_mut(), + TreeNode::Terminal{ref mut parent,..} => parent.borrow_mut(), + } + }*/ + + pub fn to_tree(self) -> A + where + A: Ast, + { + A::from_node(self) + } + + pub fn add(&mut self, child: TreeNode) -> Result<(), AstErr> { + match self { + TreeNode::Terminal { .. } => Err(AstErr::TerminalNodeChildAdition(self.to_string())), + TreeNode::NonTerminal {ref mut children, .. } => { + /*if children.is_empty() { + child.get_parent().borrow_mut() = Rc::new(RefCell::new(self)); + } else { + child.get_parent = children[0]?.get_parent().clone(); + }*/ + children.push(Rc::new(RefCell::new(Box::new(child)))); + Ok(()) + }, + } + } + pub fn new_term(kind: SyntaxKind, text: String) -> TreeNode{ + TreeNode::Terminal{ + //parent: None, + kind, + text + } + } + pub fn new_no_term(kind: SyntaxKind, children: Vec> ) -> TreeNode{ + TreeNode::NonTerminal{ + //parent: None, + kind, + children: children.into_iter().map(|member| Rc::new(RefCell::new(member))).collect::>() + } + } + + fn print(indent: usize, node: &TreeNode, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + for _ in 0..indent{ + write!(f,"\t"); + }; + match node { + Self::NonTerminal{ref kind, ref children} => { + write!(f, "• {:?}", kind); + for child in children { + write!(f,"\n"); + Self::print(indent + 1, &(**child.borrow()), f); + }; + std::fmt::Result::Ok(()) + }, + Self::Terminal { ref kind, ref text} => write!(f, "• {:?}: {}", kind, text) + } + } +} + +impl Display for TreeNode { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + Self::print(0, self, f); + std::fmt::Result::Ok(()) + } +} \ No newline at end of file diff --git a/parser/src/parser/ast/syntax_kind.rs b/parser/src/parser/ast/syntax_kind.rs new file mode 100644 index 0000000..2f9e775 --- /dev/null +++ b/parser/src/parser/ast/syntax_kind.rs @@ -0,0 +1,78 @@ +/// Nodes that the ast can have. +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] +#[allow(non_camel_case_types)] +#[repr(u16)] +pub enum SyntaxKind { + WHITESPACE = 0, + + SELECT, + FROM, + WHERE, + ORDER_BY, + GROUP_BY, + + CREATE, + ALTER, + DROP, + TRUNCATE, + + TABLE, + + IDENTIFIER, + TEXT, + ALL, + COMMA, + NUMBER, + SEMICOLON, + PARENTHESES_START, + PARENTHESES_END, + VALUES, + DEFINITION, + + EQUAL, + GT, + LT, + LEQT, + GEQT, + + AND, + OR, + NOT, + + TRUE, + FALSE, + BOOLEAN_OP, + + GROUP, + ORDER, + BY, + DESC, + ASC, + DISTINCT, + + COMPARISON, + EMPTY, + ROOT, +} + +use SyntaxKind::*; + +impl SyntaxKind { + pub fn is_dql(&self) -> bool { + //(2..=3).contains(&(*self as u16)) + *self == SyntaxKind::SELECT + } + pub fn is_ddl(&self) -> bool { + //(4..=4).contains(&(*self as u16)) + match *self { + CREATE | ALTER | DROP | TRUNCATE => true, + _ => false, + } + } +} + +impl std::fmt::Display for SyntaxKind { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{:?}", self) + } +} diff --git a/parser/src/parser/grammar/dialect.rs b/parser/src/parser/grammar/dialect.rs new file mode 100644 index 0000000..31380d9 --- /dev/null +++ b/parser/src/parser/grammar/dialect.rs @@ -0,0 +1,7 @@ +//use crate::parser::ast::errors::AstErr; + +///This trait deals with the grammar of the SQL dialect we want to parse. +///We proly need to add more methods in the future. +pub trait Dialect: Sized { + //fn are_compatible(child: &N, parent: &N) -> Result<(), AstErr>; +} diff --git a/parser/src/parser/grammar/mod.rs b/parser/src/parser/grammar/mod.rs index 5d546c1..4dfc869 100644 --- a/parser/src/parser/grammar/mod.rs +++ b/parser/src/parser/grammar/mod.rs @@ -1,59 +1 @@ -mod process; -mod utils; - -pub use process::process_grammar; -pub use utils::template::TemplateConfig; - -use super::ast::SyntaxKind; - -/// Possible grammar rules to be parsed -pub enum Grammar { - /// Takes a list of nodes that can be in a list, does not support trailing - /// commas, e.g. List(&[IDENTIFIER, TEXT]), symbolizes a list that can - /// contain identifiers or text - List(&'static [SyntaxKind]), // separated by comma - /// It expects a GrammarType and if it is satisfied it wraps everything in - /// that type, besides executing more grammar rules. - Children(&'static [SyntaxKind], SyntaxKind, &'static [Grammar]), - /// It takes several rules, if a rule gives error it passes with the next - /// one, if it does not give error it applies it and finishes its execution - /// if the first parameter is true at least one rule has to be fulfilled, if - /// it is false all of them can fail. - Combo(bool, &'static [Grammar]), - /// It takes a node and verifies that the current node of the parser matches - /// the node passed in the rule definition, the next parameter is a boolean - /// that indicates if you want to consume that parameter (add it to the ast) - /// or not. - Expect(SyntaxKind, bool), - /// It takes a template and will compare that template with the current - /// definition. - Template(&'static [GrammarType], TemplateConfig), - /// Execute a rule several times until a specific node at which it will stop - /// executing that rule (e.g. a semicolon). - Loop(&'static Grammar, SyntaxKind), - - Optional(&'static Grammar), - - GType(GrammarType), -} - -#[derive(Debug, Clone)] -/// It represents a more flexible way of classifying nodes. -pub enum GrammarType { - /// checks if the node belongs to the Data Query Language group - Dql, - /// Checks if a node is equal to the node that was passed in the definition. - Type(SyntaxKind), - /// Checks if the node is in the defined node list. - Multi(&'static [SyntaxKind]), -} - -impl PartialEq for GrammarType { - fn eq(&self, other: &SyntaxKind) -> bool { - match self { - Self::Dql => other.is_ddl(), - Self::Type(t) => t == other, - Self::Multi(l) => l.contains(other), - } - } -} +pub mod dialect; diff --git a/parser/src/parser/grammar/process.rs b/parser/src/parser/grammar/process.rs deleted file mode 100644 index 364ca85..0000000 --- a/parser/src/parser/grammar/process.rs +++ /dev/null @@ -1,66 +0,0 @@ -use super::utils::children::process_children; -use super::utils::combo::process_combo; -use super::utils::expect::process_expect; -use super::utils::gtype::process_gtype; -use super::utils::list::process_list; -use super::utils::optional::process_optional; -use super::utils::r#loop::process_loop; -use super::utils::template::process_template; -use super::Grammar; -use crate::parser::ast::{Parser, ParserError, SyntaxKind}; - -/// Takes an instance of the Parser, a parent node, and a list with definitions -/// of the grammar to parse, returns nothing but modifies the parser. -pub fn process_grammar( - parser: &mut Parser, - father: SyntaxKind, - grammar: &[Grammar], -) -> Result<(), ParserError> { - parser.builder.start_node_at(parser.builder.checkpoint(), father.into()); - parser.next(); - - for rule in grammar { - process_rule(&rule, father, parser)?; - } - - parser.builder.finish_node(); - - Ok(()) -} - -/// Takes a rule to be processed and executes the function that does the -/// corresponding function depending on the definition of that rule. -pub fn process_rule( - rule: &Grammar, - father: SyntaxKind, - parser: &mut Parser, -) -> Result<(), ParserError> { - match rule { - Grammar::List(t) => { - process_list(t, parser)?; - }, - Grammar::Loop(child, stop) => { - process_loop(child, *stop, father, parser)?; - }, - Grammar::Combo(optional, children) => { - process_combo(*optional, children, father, parser)?; - }, - Grammar::Children(start, node_father, body) => { - process_children(start, *node_father, body, father, parser)?; - }, - Grammar::Expect(token, consume) => { - process_expect(*token, *consume, parser)?; - }, - Grammar::Template(template, config) => { - process_template(template, config, parser)?; - }, - Grammar::Optional(rule) => { - process_optional(rule, father, parser); - }, - Grammar::GType(t) => { - process_gtype(&t, parser)?; - }, - } - - Ok(()) -} diff --git a/parser/src/parser/grammar/utils/children.rs b/parser/src/parser/grammar/utils/children.rs deleted file mode 100644 index e4fdeed..0000000 --- a/parser/src/parser/grammar/utils/children.rs +++ /dev/null @@ -1,30 +0,0 @@ -use crate::parser::ast::SyntaxKind::{self, *}; -use crate::parser::ast::{Parser, ParserError}; -use crate::parser::grammar::process::process_rule; -use crate::parser::grammar::Grammar; - -pub fn process_children( - start: &[SyntaxKind], - node_father: SyntaxKind, - body: &[Grammar], - father: SyntaxKind, - parser: &mut Parser, -) -> Result<(), ParserError> { - for rule in start { - if parser.peek() != Some(*rule) { - return Err(ParserError::UnexpectedNode(parser.peek().unwrap_or(EMPTY))); - } - - parser.next(); - } - - parser.builder.start_node_at(parser.builder.checkpoint(), node_father.into()); - - for rule in body { - process_rule(&rule, father, parser)?; - } - - parser.builder.finish_node(); - - Ok(()) -} diff --git a/parser/src/parser/grammar/utils/combo.rs b/parser/src/parser/grammar/utils/combo.rs deleted file mode 100644 index de054da..0000000 --- a/parser/src/parser/grammar/utils/combo.rs +++ /dev/null @@ -1,37 +0,0 @@ -use crate::parser::ast::{Parser, ParserError, SyntaxKind}; -use crate::parser::grammar::process::process_rule; -use crate::parser::grammar::Grammar; - -pub fn process_combo( - optional: bool, - children: &[Grammar], - father: SyntaxKind, - parser: &mut Parser, -) -> Result<(), ParserError> { - let mut good = true; - let iter = parser.iter.clone(); - - for child in children { - match process_rule(&child, father, parser) { - Ok(_) => { - good = true; - break; - }, - Err(_) => { - good = false; - // restore iter - parser.iter = iter.clone(); - }, - }; - } - - if !good && !optional { - return Err(ParserError::ExpectedBodyFor(father)); - } - - if !good { - parser.next(); - } - - Ok(()) -} diff --git a/parser/src/parser/grammar/utils/expect.rs b/parser/src/parser/grammar/utils/expect.rs deleted file mode 100644 index 0d5f905..0000000 --- a/parser/src/parser/grammar/utils/expect.rs +++ /dev/null @@ -1,20 +0,0 @@ -use crate::parser::ast::SyntaxKind::{self, *}; -use crate::parser::ast::{Parser, ParserError}; - -pub fn process_expect( - token: SyntaxKind, - consume: bool, - parser: &mut Parser, -) -> Result<(), ParserError> { - if parser.peek() != Some(token) { - return Err(ParserError::ExpectedType(token, parser.peek().unwrap_or(EMPTY))); - } - - if consume { - parser.bump(); - } else { - parser.next(); - } - - Ok(()) -} diff --git a/parser/src/parser/grammar/utils/gtype.rs b/parser/src/parser/grammar/utils/gtype.rs deleted file mode 100644 index 5c30d7b..0000000 --- a/parser/src/parser/grammar/utils/gtype.rs +++ /dev/null @@ -1,16 +0,0 @@ -use crate::parser::ast::{Parser, ParserError}; -use crate::parser::grammar::GrammarType; - -pub fn process_gtype(gtype: &GrammarType, parser: &mut Parser) -> Result<(), ParserError> { - if parser.peek().is_none() { - return Err(ParserError::UnexpectedEof); - } - - if gtype != &parser.peek().unwrap() { - return Err(ParserError::UnexpectedNode(parser.peek().unwrap())); - } - - parser.bump(); - - Ok(()) -} diff --git a/parser/src/parser/grammar/utils/list.rs b/parser/src/parser/grammar/utils/list.rs deleted file mode 100644 index cf4c8b5..0000000 --- a/parser/src/parser/grammar/utils/list.rs +++ /dev/null @@ -1,28 +0,0 @@ -use crate::parser::ast::SyntaxKind::{self, *}; -use crate::parser::ast::{Parser, ParserError}; - -pub fn process_list(t: &[SyntaxKind], parser: &mut Parser) -> Result<(), ParserError> { - if parser.peek() == None { - return Err(ParserError::UnexpectedEof); - } - - while let Some(token) = parser.peek() { - if token == COMMA { - parser.next(); - - if !t.contains(&parser.peek().unwrap_or(EMPTY)) { - return Err(ParserError::TrailingComma); - } - - continue; - } - - if !t.contains(&token) { - break; - } - - parser.bump(); - } - - Ok(()) -} diff --git a/parser/src/parser/grammar/utils/loop.rs b/parser/src/parser/grammar/utils/loop.rs deleted file mode 100644 index 7e7e895..0000000 --- a/parser/src/parser/grammar/utils/loop.rs +++ /dev/null @@ -1,16 +0,0 @@ -use crate::parser::ast::{Parser, ParserError, SyntaxKind}; -use crate::parser::grammar::process::process_rule; -use crate::parser::grammar::Grammar; - -pub fn process_loop( - child: &Grammar, - stop: SyntaxKind, - father: SyntaxKind, - parser: &mut Parser, -) -> Result<(), ParserError> { - while parser.peek() != Some(stop) && parser.peek() != None { - process_rule(&child, father, parser)?; - } - - Ok(()) -} diff --git a/parser/src/parser/grammar/utils/mod.rs b/parser/src/parser/grammar/utils/mod.rs deleted file mode 100644 index f7d9ee9..0000000 --- a/parser/src/parser/grammar/utils/mod.rs +++ /dev/null @@ -1,8 +0,0 @@ -pub mod children; -pub mod combo; -pub mod expect; -pub mod gtype; -pub mod list; -pub mod r#loop; -pub mod optional; -pub mod template; diff --git a/parser/src/parser/grammar/utils/optional.rs b/parser/src/parser/grammar/utils/optional.rs deleted file mode 100644 index dfbc49f..0000000 --- a/parser/src/parser/grammar/utils/optional.rs +++ /dev/null @@ -1,11 +0,0 @@ -use crate::parser::ast::{Parser, SyntaxKind}; -use crate::parser::grammar::process::process_rule; -use crate::parser::grammar::Grammar; - -pub fn process_optional(rule: &Grammar, father: SyntaxKind, parser: &mut Parser) { - let iter = parser.iter.clone(); - - process_rule(rule, father, parser).unwrap_or_else(|_| { - parser.iter = iter; - }); -} diff --git a/parser/src/parser/grammar/utils/template.rs b/parser/src/parser/grammar/utils/template.rs deleted file mode 100644 index 7e7abe6..0000000 --- a/parser/src/parser/grammar/utils/template.rs +++ /dev/null @@ -1,51 +0,0 @@ -use crate::parser::ast::SyntaxKind::{self, *}; -use crate::parser::ast::{Parser, ParserError}; -use crate::parser::grammar::GrammarType; - -#[derive(Debug)] -/// Configuration for the Template grammar rule -pub struct TemplateConfig { - /// It is the data type that will wrap everything in the template. - pub father: SyntaxKind, - /// Indicates a node to be ignored (almost always a derivative of the parent - /// node). - pub ignore: SyntaxKind, -} - -pub fn process_template( - template: &[GrammarType], - config: &TemplateConfig, - parser: &mut Parser, -) -> Result<(), ParserError> { - let tokens = { - let mut tokens = Vec::new(); - for rule in template { - if parser.peek() == Some(config.ignore) { - parser.next(); - continue; - } - - if *rule == parser.peek().unwrap_or(EMPTY) { - let peek = parser.peek_with_content().unwrap(); - tokens.push((peek.0, peek.1.clone())); - parser.next(); - continue; - } - - return Err(ParserError::UnexpectedNode(parser.peek().unwrap_or(EMPTY))); - } - tokens - }; - - { - parser.builder.start_node_at(parser.builder.checkpoint(), config.father.into()); - - for t in tokens.iter() { - parser.builder.token(t.0.into(), &t.1); - } - - parser.builder.finish_node(); - } - - Ok(()) -} diff --git a/parser/src/parser/lexer.rs b/parser/src/parser/lexer.rs index 06a4751..c723f80 100644 --- a/parser/src/parser/lexer.rs +++ b/parser/src/parser/lexer.rs @@ -1,9 +1,7 @@ use logos::Logos; -use super::ast::SyntaxKind; - /// Possible tokens when parsing the Sql code -#[derive(Logos, Debug)] +#[derive(Logos,Clone, Debug, PartialEq)] #[logos(skip r"[ \t\n\f]+")] pub enum Token { #[regex("(?i)SELECT")] @@ -27,6 +25,9 @@ pub enum Token { #[regex("(?i)OR")] Or, + #[regex("(?i)NOT")] + Not, + #[regex("(?i)GROUP")] Group, @@ -66,6 +67,12 @@ pub enum Token { #[token("=")] Equal, + #[token(">=")] + GEQT, + + #[token("<=")] + LEQT, + #[token(">")] GT, @@ -80,38 +87,47 @@ pub enum Token { #[token(")")] ParenthesesEnd, + + Error } +///to_syntax method won't b used in the parser because some tokens +///depend of the context for having a certain syntax kind. The method +///its commented for a posible boilerplate reutilization for methods +///that use this enum impl Token { - #[inline] - /// Function used to take a token and its content (if any) and convert it - /// into a SyntaxKind (enum used by the Parser). - pub(crate) fn to_syntax(&self) -> (SyntaxKind, String) { - match &self { - Token::All => (SyntaxKind::ALL, String::from("*")), - Token::Select => (SyntaxKind::SELECT, String::from("SELECT")), - Token::Identifier(i) => (SyntaxKind::IDENTIFIER, String::from(i)), - Token::From => (SyntaxKind::FROM, String::from("FROM")), - Token::Text(t) => (SyntaxKind::TEXT, String::from(t)), - Token::Number(n) => (SyntaxKind::NUMBER, String::from(n)), - Token::Comma => (SyntaxKind::COMMA, String::from(",")), - Token::Semicolon => (SyntaxKind::SEMICOLON, String::from(";")), - Token::Create => (SyntaxKind::CREATE, String::from("CREATE")), - Token::Table => (SyntaxKind::TABLE, String::from("TABLE")), - Token::ParenthesesStart => (SyntaxKind::PARENTHESES_START, String::from("(")), - Token::ParenthesesEnd => (SyntaxKind::PARENTHESES_END, String::from(")")), - Token::Equal => (SyntaxKind::EQUAL, String::from("=")), - Token::Where => (SyntaxKind::WHERE, String::from("WHERE")), - Token::LT => (SyntaxKind::LT, String::from("<")), - Token::GT => (SyntaxKind::GT, String::from(">")), - Token::And => (SyntaxKind::AND, String::from("AND")), - Token::Or => (SyntaxKind::OR, String::from("OR")), - Token::Group => (SyntaxKind::GROUP, String::from("GROUP")), - Token::By => (SyntaxKind::BY, String::from("BY")), - Token::Desc => (SyntaxKind::DESC, String::from("DESC")), - Token::ASC => (SyntaxKind::ASC, String::from("ASC")), - Token::Order => (SyntaxKind::ORDER, String::from("ORDER")), - Token::Distinct => (SyntaxKind::DISTINCT, String::from("DISTINCT")), - } - } + // #[inline] + // /// Function used to take a token and its content (if any) and convert it + // /// into a SyntaxKind (enum used by the Parser). + // pub(crate) fn to_syntax(&self) -> (SyntaxKind, String) { + // match &self { + // Token::All => (SyntaxKind::ALL, String::from("*")), + // Token::Select => (SyntaxKind::SELECT, String::from("SELECT")), + // Token::Identifier(i) => (SyntaxKind::IDENTIFIER, String::from(i)), + // Token::From => (SyntaxKind::FROM, String::from("FROM")), + // Token::Text(t) => (SyntaxKind::TEXT, String::from(t)), + // Token::Number(n) => (SyntaxKind::NUMBER, String::from(n)), + // Token::Comma => (SyntaxKind::COMMA, String::from(",")), + // Token::Semicolon => (SyntaxKind::SEMICOLON, String::from(";")), + // Token::Create => (SyntaxKind::CREATE, String::from("CREATE")), + // Token::Table => (SyntaxKind::TABLE, String::from("TABLE")), + // Token::ParenthesesStart => (SyntaxKind::PARENTHESES_START, String::from("(")), + // Token::ParenthesesEnd => (SyntaxKind::PARENTHESES_END, String::from(")")), + // Token::Equal => (SyntaxKind::EQUAL, String::from("=")), + // Token::Where => (SyntaxKind::WHERE, String::from("WHERE")), + // Token::LT => (SyntaxKind::LT, String::from("<")), + // Token::GT => (SyntaxKind::GT, String::from(">")), + // Token::LEQT => (SyntaxKind::LT, String::from("<=")), + // Token::GEQT => (SyntaxKind::GT, String::from(">=")), + // Token::And => (SyntaxKind::AND, String::from("AND")), + // Token::Or => (SyntaxKind::OR, String::from("OR")), + // Token::Not => (SyntaxKind::NOT, String::from("NOT")), + // Token::Group => (SyntaxKind::GROUP, String::from("GROUP")), + // Token::By => (SyntaxKind::BY, String::from("BY")), + // Token::Desc => (SyntaxKind::DESC, String::from("DESC")), + // Token::ASC => (SyntaxKind::ASC, String::from("ASC")), + // Token::Order => (SyntaxKind::ORDER, String::from("ORDER")), + // Token::Distinct => (SyntaxKind::DISTINCT, String::from("DISTINCT")), + // } + // } } diff --git a/parser/src/parser/mod.rs b/parser/src/parser/mod.rs index fe62780..748b7fc 100644 --- a/parser/src/parser/mod.rs +++ b/parser/src/parser/mod.rs @@ -1,4 +1,4 @@ pub mod ast; pub mod grammar; pub mod lexer; -pub mod sql; +pub mod parser; diff --git a/parser/src/parser/parser/errors.rs b/parser/src/parser/parser/errors.rs new file mode 100644 index 0000000..ffb24dc --- /dev/null +++ b/parser/src/parser/parser/errors.rs @@ -0,0 +1,35 @@ +use crate::parser::ast::syntax_kind::SyntaxKind; +use thiserror::Error; + +/// Possible errors at the time of generating the ast +#[derive(Error, Debug)] +pub enum ParserError { + // TODO: this error should be in LexerError and not in ParserError + #[error("Invalid Token {0}")] + InvalidToken(String), + + /// This error is triggered when a trailing comma is left at the time of + /// enumeration, e.g. 1, 2, + #[error("Trailing Comma is not allowed")] + TrailingComma, + + /// This error is triggered when it expects a specific node and receives a + /// node of another type. + #[error("Expected {0} found {1}")] + ExpectedType(SyntaxKind, SyntaxKind), + + /// This error is triggered when the node does not match with the expected + /// by the context + #[error("Unexpected Node {0}")] + UnexpectedNode(SyntaxKind), + + /// This error is triggered when the definition of the context is incomplete + /// e.g.: SELECT; (without passing any body) + #[error("Expected Body for {0}")] + ExpectedBodyFor(SyntaxKind), + + /// this error appears when the content is finished but the current grammar + /// rule needs more content to complete. + #[error("Unexpected EOF")] + UnexpectedEof, +} diff --git a/parser/src/parser/parser/mod.rs b/parser/src/parser/parser/mod.rs new file mode 100644 index 0000000..bd125fd --- /dev/null +++ b/parser/src/parser/parser/mod.rs @@ -0,0 +1,3 @@ +pub mod errors; +pub mod parser; +pub mod utils; \ No newline at end of file diff --git a/parser/src/parser/parser/parser.rs b/parser/src/parser/parser/parser.rs new file mode 100644 index 0000000..3b029c8 --- /dev/null +++ b/parser/src/parser/parser/parser.rs @@ -0,0 +1,32 @@ +use crate::parser::grammar::dialect::Dialect; +use crate::parser::ast::ast_trait::Ast; +use chumsky::error::Rich; +use logos::Logos; +use chumsky::prelude::ParseResult; + + + +pub trait Parser<'src,N, D, Token> +where + D: Dialect, + Token: Logos<'src> + Clone +{ + + fn parse(&mut self) -> ParseResult>; + fn new(source: &'src str) -> Self; +} + +pub fn parse<'src,N, P, T, D, A>(text: &'src str) -> Result>> +where + D: Dialect, + T: Logos<'src> + Clone, + A: Ast, + P: Parser<'src, N, D, T> + 'src +{ + let mut parser = P::new(text); + let output = parser.parse(); + match output.into_result(){ + Ok(x) => Ok(A::from_node(x)), + Err(e) => Err(e) + } +} \ No newline at end of file diff --git a/parser/src/parser/parser/utils.rs b/parser/src/parser/parser/utils.rs new file mode 100644 index 0000000..f21cdd2 --- /dev/null +++ b/parser/src/parser/parser/utils.rs @@ -0,0 +1,19 @@ +use crate::parser::ast::nodes::TreeNode; +use chumsky::container::Container; +use crate::parser::ast::syntax_kind::SyntaxKind; + +impl Container for TreeNode{ + fn push(&mut self, item: TreeNode){ + let _ = self.add(item).unwrap(); + } +} + +impl Default for TreeNode{ + fn default() -> Self{ + TreeNode::NonTerminal{ + //parent: None, + kind: SyntaxKind::ROOT, + children: vec![], + } + } +} \ No newline at end of file diff --git a/parser/src/parser/sql/boolean.rs b/parser/src/parser/sql/boolean.rs deleted file mode 100644 index bcffa22..0000000 --- a/parser/src/parser/sql/boolean.rs +++ /dev/null @@ -1,22 +0,0 @@ -use crate::parser::ast::SyntaxKind::*; -use crate::parser::grammar::Grammar::{self, Combo, Template}; -use crate::parser::grammar::GrammarType::*; -use crate::parser::grammar::TemplateConfig; - -pub const CMP_GRAMMAR: Grammar = Combo( - true, - &[ - Template( - &[Multi(&[NUMBER, IDENTIFIER, TEXT]), Type(EQUAL), Multi(&[NUMBER, IDENTIFIER, TEXT])], - TemplateConfig { father: COMPARE, ignore: EQUAL }, - ), - Template( - &[Multi(&[NUMBER, IDENTIFIER]), Type(GT), Multi(&[NUMBER, IDENTIFIER])], - TemplateConfig { father: GREATER, ignore: GT }, - ), - Template( - &[Multi(&[NUMBER, IDENTIFIER]), Type(LT), Multi(&[NUMBER, IDENTIFIER])], - TemplateConfig { father: LESS, ignore: LT }, - ), - ], -); diff --git a/parser/src/parser/sql/from.rs b/parser/src/parser/sql/from.rs deleted file mode 100644 index be520b2..0000000 --- a/parser/src/parser/sql/from.rs +++ /dev/null @@ -1,4 +0,0 @@ -use crate::parser::ast::SyntaxKind::*; -use crate::parser::grammar::Grammar::{self, *}; - -pub const FROM_GRAMMAR: Grammar = Children(&[FROM], FROM, &[List(&[IDENTIFIER])]); diff --git a/parser/src/parser/sql/group_by.rs b/parser/src/parser/sql/group_by.rs deleted file mode 100644 index 4feab9e..0000000 --- a/parser/src/parser/sql/group_by.rs +++ /dev/null @@ -1,6 +0,0 @@ -use crate::parser::ast::SyntaxKind::*; -use crate::parser::grammar::Grammar::{self, *}; -use crate::parser::grammar::GrammarType::*; - -pub const GROUP_BY_GRAMMAR: Grammar = - Children(&[GROUP, BY], GROUP_BY, &[List(&[IDENTIFIER])]); diff --git a/parser/src/parser/sql/mod.rs b/parser/src/parser/sql/mod.rs deleted file mode 100644 index de57df5..0000000 --- a/parser/src/parser/sql/mod.rs +++ /dev/null @@ -1,8 +0,0 @@ -//! SQL definitions, not much to say here. - -pub mod order_by; -pub mod boolean; -pub mod from; -pub mod group_by; -pub mod select; -pub mod swhere; diff --git a/parser/src/parser/sql/order_by.rs b/parser/src/parser/sql/order_by.rs deleted file mode 100644 index 3e7f3e0..0000000 --- a/parser/src/parser/sql/order_by.rs +++ /dev/null @@ -1,6 +0,0 @@ -use crate::parser::ast::SyntaxKind::*; -use crate::parser::grammar::Grammar::{self, *}; -use crate::parser::grammar::GrammarType::*; - -pub const ORDER_BY_GRAMMAR: Grammar = Children(&[ORDER, BY], ORDER_BY, &[List(&[IDENTIFIER]), Optional(>ype(Multi(&[ASC, DESC])))]); - diff --git a/parser/src/parser/sql/select.rs b/parser/src/parser/sql/select.rs deleted file mode 100644 index 8fbf408..0000000 --- a/parser/src/parser/sql/select.rs +++ /dev/null @@ -1,12 +0,0 @@ -use super::from::FROM_GRAMMAR; -use super::group_by::GROUP_BY_GRAMMAR; -use super::order_by::ORDER_BY_GRAMMAR; -use super::swhere::WHERE_GRAMMAR; -use crate::parser::ast::SyntaxKind::*; -use crate::parser::grammar::Grammar::{self, *}; - -pub const SELECT_GRAMMAR: &[Grammar] = &[ - Optional(&Expect(DISTINCT, true)), - List(&[IDENTIFIER, ALL]), - Loop(&Combo(true, &[FROM_GRAMMAR, WHERE_GRAMMAR, GROUP_BY_GRAMMAR, ORDER_BY_GRAMMAR]), SEMICOLON), -]; diff --git a/parser/src/parser/sql/swhere.rs b/parser/src/parser/sql/swhere.rs deleted file mode 100644 index 6bb989b..0000000 --- a/parser/src/parser/sql/swhere.rs +++ /dev/null @@ -1,5 +0,0 @@ -use super::boolean::CMP_GRAMMAR; -use crate::parser::ast::SyntaxKind::*; -use crate::parser::grammar::Grammar::{self, Children}; - -pub const WHERE_GRAMMAR: Grammar = Children(&[WHERE], WHERE, &[CMP_GRAMMAR]);