diff --git a/src/dialect/mod.rs b/src/dialect/mod.rs index 1a416e4df..43b5042c5 100644 --- a/src/dialect/mod.rs +++ b/src/dialect/mod.rs @@ -759,6 +759,13 @@ pub trait Dialect: Debug + Any { Token::DoubleColon | Token::ExclamationMark | Token::LBracket | Token::CaretAt => { Ok(p!(DoubleColon)) } + Token::Colon => match parser.peek_nth_token(1).token { + // When colon is followed by a string or a number, it's usually in MAP syntax. + Token::SingleQuotedString(_) | Token::Number(_, _) => Ok(self.prec_unknown()), + // In other cases, it's used in semi-structured data traversal like in variant or JSON + // string columns. See `JsonAccess`. + _ => Ok(p!(Colon)), + }, Token::Arrow | Token::LongArrow | Token::HashArrow @@ -812,6 +819,7 @@ pub trait Dialect: Debug + Any { Precedence::Ampersand => 23, Precedence::Caret => 22, Precedence::Pipe => 21, + Precedence::Colon => 21, Precedence::Between => 20, Precedence::Eq => 20, Precedence::Like => 19, @@ -1232,6 +1240,7 @@ pub enum Precedence { Ampersand, Caret, Pipe, + Colon, Between, Eq, Like, diff --git a/src/dialect/mssql.rs b/src/dialect/mssql.rs index faf3402c2..a28545250 100644 --- a/src/dialect/mssql.rs +++ b/src/dialect/mssql.rs @@ -160,6 +160,15 @@ impl Dialect for MsSqlDialect { None } } + + fn get_next_precedence(&self, parser: &Parser) -> Option> { + let token = parser.peek_token(); + match token.token { + // lowest prec to prevent it from turning into a binary op + Token::Colon => Some(Ok(self.prec_unknown())), + _ => None, + } + } } impl MsSqlDialect { diff --git a/src/dialect/postgresql.rs b/src/dialect/postgresql.rs index e861cc515..991233fb8 100644 --- a/src/dialect/postgresql.rs +++ b/src/dialect/postgresql.rs @@ -136,6 +136,8 @@ impl Dialect for PostgreSqlDialect { | Token::ShiftRight | Token::ShiftLeft | Token::CustomBinaryOperator(_) => Some(Ok(PG_OTHER_PREC)), + // lowest prec to prevent it from turning into a binary op + Token::Colon => Some(Ok(self.prec_unknown())), _ => None, } } diff --git a/src/parser/mod.rs b/src/parser/mod.rs index d1c4fe05b..61252b122 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -3847,7 +3847,8 @@ impl<'a> Parser<'a> { let lower_bound = if self.consume_token(&Token::Colon) { None } else { - Some(self.parse_expr()?) + // parse expr until we hit a colon (or any token with lower precedence) + Some(self.parse_subexpr(self.dialect.prec_value(Precedence::Colon))?) }; // check for end @@ -3875,7 +3876,8 @@ impl<'a> Parser<'a> { stride: None, }); } else { - Some(self.parse_expr()?) + // parse expr until we hit a colon (or any token with lower precedence) + Some(self.parse_subexpr(self.dialect.prec_value(Precedence::Colon))?) }; // check for end @@ -3892,7 +3894,8 @@ impl<'a> Parser<'a> { let stride = if self.consume_token(&Token::RBracket) { None } else { - Some(self.parse_expr()?) + // parse expr until we hit a colon (or any token with lower precedence) + Some(self.parse_subexpr(self.dialect.prec_value(Precedence::Colon))?) }; if stride.is_some() { diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 9f549e4d0..8bdbb2ced 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -17972,3 +17972,126 @@ fn parse_select_parenthesized_wildcard() { assert_eq!(select2.projection.len(), 1); assert!(matches!(select2.projection[0], SelectItem::Wildcard(_))); } + +// https://docs.snowflake.com/en/user-guide/querying-semistructured +#[test] +fn parse_semi_structured_data_traversal() { + let dialects = TestedDialects::new(vec![ + Box::new(GenericDialect {}), + Box::new(SnowflakeDialect {}), + ]); + + // most basic case + let sql = "SELECT a:b FROM t"; + let select = dialects.verified_only_select(sql); + assert_eq!( + SelectItem::UnnamedExpr(Expr::JsonAccess { + value: Box::new(Expr::Identifier(Ident::new("a"))), + path: JsonPath { + path: vec![JsonPathElem::Dot { + key: "b".to_owned(), + quoted: false + }] + }, + }), + select.projection[0] + ); + + // identifier can be quoted + let sql = r#"SELECT a:"my long object key name" FROM t"#; + let select = dialects.verified_only_select(sql); + assert_eq!( + SelectItem::UnnamedExpr(Expr::JsonAccess { + value: Box::new(Expr::Identifier(Ident::new("a"))), + path: JsonPath { + path: vec![JsonPathElem::Dot { + key: "my long object key name".to_owned(), + quoted: true + }] + }, + }), + select.projection[0] + ); + + dialects.verified_stmt("SELECT a:b::INT FROM t"); + + // unquoted keywords are permitted in the object key + let sql = "SELECT a:select, a:from FROM t"; + let select = dialects.verified_only_select(sql); + assert_eq!( + vec![ + SelectItem::UnnamedExpr(Expr::JsonAccess { + value: Box::new(Expr::Identifier(Ident::new("a"))), + path: JsonPath { + path: vec![JsonPathElem::Dot { + key: "select".to_owned(), + quoted: false + }] + }, + }), + SelectItem::UnnamedExpr(Expr::JsonAccess { + value: Box::new(Expr::Identifier(Ident::new("a"))), + path: JsonPath { + path: vec![JsonPathElem::Dot { + key: "from".to_owned(), + quoted: false + }] + }, + }) + ], + select.projection + ); + + // multiple levels can be traversed + // https://docs.snowflake.com/en/user-guide/querying-semistructured#dot-notation + let sql = r#"SELECT a:foo."bar".baz"#; + let select = dialects.verified_only_select(sql); + assert_eq!( + vec![SelectItem::UnnamedExpr(Expr::JsonAccess { + value: Box::new(Expr::Identifier(Ident::new("a"))), + path: JsonPath { + path: vec![ + JsonPathElem::Dot { + key: "foo".to_owned(), + quoted: false, + }, + JsonPathElem::Dot { + key: "bar".to_owned(), + quoted: true, + }, + JsonPathElem::Dot { + key: "baz".to_owned(), + quoted: false, + } + ] + }, + })], + select.projection + ); + + // dot and bracket notation can be mixed (starting with : case) + // https://docs.snowflake.com/en/user-guide/querying-semistructured#dot-notation + let sql = r#"SELECT a:foo[0].bar"#; + let select = dialects.verified_only_select(sql); + assert_eq!( + vec![SelectItem::UnnamedExpr(Expr::JsonAccess { + value: Box::new(Expr::Identifier(Ident::new("a"))), + path: JsonPath { + path: vec![ + JsonPathElem::Dot { + key: "foo".to_owned(), + quoted: false, + }, + JsonPathElem::Bracket { + key: Expr::value(number("0")), + }, + JsonPathElem::Dot { + key: "bar".to_owned(), + quoted: false, + } + ] + }, + })], + select.projection + ); +} diff --git a/tests/sqlparser_snowflake.rs b/tests/sqlparser_snowflake.rs index 37e9f8cb4..5889b2bd0 100644 --- a/tests/sqlparser_snowflake.rs +++ b/tests/sqlparser_snowflake.rs @@ -1265,37 +1265,8 @@ fn parse_lateral_flatten() { // https://docs.snowflake.com/en/user-guide/querying-semistructured #[test] fn parse_semi_structured_data_traversal() { - // most basic case - let sql = "SELECT a:b FROM t"; - let select = snowflake().verified_only_select(sql); - assert_eq!( - SelectItem::UnnamedExpr(Expr::JsonAccess { - value: Box::new(Expr::Identifier(Ident::new("a"))), - path: JsonPath { - path: vec![JsonPathElem::Dot { - key: "b".to_owned(), - quoted: false - }] - }, - }), - select.projection[0] - ); - - // identifier can be quoted - let sql = r#"SELECT a:"my long object key name" FROM t"#; - let select = snowflake().verified_only_select(sql); - assert_eq!( - SelectItem::UnnamedExpr(Expr::JsonAccess { - value: Box::new(Expr::Identifier(Ident::new("a"))), - path: JsonPath { - path: vec![JsonPathElem::Dot { - key: "my long object key name".to_owned(), - quoted: true - }] - }, - }), - select.projection[0] - ); + // see `tests/sqlparser_common.rs` -> `parse_semi_structured_data_traversal` for more test + // cases. This test only has Snowflake-specific syntax like array access. // expressions are allowed in bracket notation let sql = r#"SELECT a[2 + 2] FROM t"#; @@ -1316,88 +1287,6 @@ fn parse_semi_structured_data_traversal() { select.projection[0] ); - snowflake().verified_stmt("SELECT a:b::INT FROM t"); - - // unquoted keywords are permitted in the object key - let sql = "SELECT a:select, a:from FROM t"; - let select = snowflake().verified_only_select(sql); - assert_eq!( - vec![ - SelectItem::UnnamedExpr(Expr::JsonAccess { - value: Box::new(Expr::Identifier(Ident::new("a"))), - path: JsonPath { - path: vec![JsonPathElem::Dot { - key: "select".to_owned(), - quoted: false - }] - }, - }), - SelectItem::UnnamedExpr(Expr::JsonAccess { - value: Box::new(Expr::Identifier(Ident::new("a"))), - path: JsonPath { - path: vec![JsonPathElem::Dot { - key: "from".to_owned(), - quoted: false - }] - }, - }) - ], - select.projection - ); - - // multiple levels can be traversed - // https://docs.snowflake.com/en/user-guide/querying-semistructured#dot-notation - let sql = r#"SELECT a:foo."bar".baz"#; - let select = snowflake().verified_only_select(sql); - assert_eq!( - vec![SelectItem::UnnamedExpr(Expr::JsonAccess { - value: Box::new(Expr::Identifier(Ident::new("a"))), - path: JsonPath { - path: vec![ - JsonPathElem::Dot { - key: "foo".to_owned(), - quoted: false, - }, - JsonPathElem::Dot { - key: "bar".to_owned(), - quoted: true, - }, - JsonPathElem::Dot { - key: "baz".to_owned(), - quoted: false, - } - ] - }, - })], - select.projection - ); - - // dot and bracket notation can be mixed (starting with : case) - // https://docs.snowflake.com/en/user-guide/querying-semistructured#dot-notation - let sql = r#"SELECT a:foo[0].bar"#; - let select = snowflake().verified_only_select(sql); - assert_eq!( - vec![SelectItem::UnnamedExpr(Expr::JsonAccess { - value: Box::new(Expr::Identifier(Ident::new("a"))), - path: JsonPath { - path: vec![ - JsonPathElem::Dot { - key: "foo".to_owned(), - quoted: false, - }, - JsonPathElem::Bracket { - key: Expr::value(number("0")), - }, - JsonPathElem::Dot { - key: "bar".to_owned(), - quoted: false, - } - ] - }, - })], - select.projection - ); - // dot and bracket notation can be mixed (starting with bracket case) // https://docs.snowflake.com/en/user-guide/querying-semistructured#dot-notation let sql = r#"SELECT a[0].foo.bar"#;