sqltk_parser/dialect/mod.rs
1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements. See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership. The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License. You may obtain a copy of the License at
8//
9// http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied. See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18mod ansi;
19mod bigquery;
20mod clickhouse;
21mod databricks;
22mod duckdb;
23mod generic;
24mod hive;
25mod mssql;
26mod mysql;
27mod postgresql;
28mod redshift;
29mod snowflake;
30mod sqlite;
31
32use core::any::{Any, TypeId};
33use core::fmt::Debug;
34use core::iter::Peekable;
35use core::str::Chars;
36
37use log::debug;
38
39pub use self::ansi::AnsiDialect;
40pub use self::bigquery::BigQueryDialect;
41pub use self::clickhouse::ClickHouseDialect;
42pub use self::databricks::DatabricksDialect;
43pub use self::duckdb::DuckDbDialect;
44pub use self::generic::GenericDialect;
45pub use self::hive::HiveDialect;
46pub use self::mssql::MsSqlDialect;
47pub use self::mysql::MySqlDialect;
48pub use self::postgresql::PostgreSqlDialect;
49pub use self::redshift::RedshiftSqlDialect;
50pub use self::snowflake::SnowflakeDialect;
51pub use self::sqlite::SQLiteDialect;
52use crate::ast::{ColumnOption, Expr, Statement};
53pub use crate::keywords;
54use crate::keywords::Keyword;
55use crate::parser::{Parser, ParserError};
56use crate::tokenizer::Token;
57
58#[cfg(not(feature = "std"))]
59use alloc::boxed::Box;
60
61/// Convenience check if a [`Parser`] uses a certain dialect.
62///
63/// Note: when possible please the new style, adding a method to the [`Dialect`]
64/// trait rather than using this macro.
65///
66/// The benefits of adding a method on `Dialect` over this macro are:
67/// 1. user defined [`Dialect`]s can customize the parsing behavior
68/// 2. The differences between dialects can be clearly documented in the trait
69///
70/// `dialect_of!(parser is SQLiteDialect | GenericDialect)` evaluates
71/// to `true` if `parser.dialect` is one of the [`Dialect`]s specified.
72macro_rules! dialect_of {
73 ( $parsed_dialect: ident is $($dialect_type: ty)|+ ) => {
74 ($($parsed_dialect.dialect.is::<$dialect_type>())||+)
75 };
76}
77
78// Similar to above, but for applying directly against an instance of dialect
79// instead of a struct member named dialect. This avoids lifetime issues when
80// mixing match guards and token references.
81macro_rules! dialect_is {
82 ($dialect:ident is $($dialect_type:ty)|+) => {
83 ($($dialect.is::<$dialect_type>())||+)
84 }
85}
86
87/// Encapsulates the differences between SQL implementations.
88///
89/// # SQL Dialects
90///
91/// SQL implementations deviate from one another, either due to
92/// custom extensions or various historical reasons. This trait
93/// encapsulates the parsing differences between dialects.
94///
95/// [`GenericDialect`] is the most permissive dialect, and parses the union of
96/// all the other dialects, when there is no ambiguity. However, it does not
97/// currently allow `CREATE TABLE` statements without types specified for all
98/// columns; use [`SQLiteDialect`] if you require that.
99///
100/// # Examples
101/// Most users create a [`Dialect`] directly, as shown on the [module
102/// level documentation]:
103///
104/// ```
105/// # use sqltk_parser::dialect::AnsiDialect;
106/// let dialect = AnsiDialect {};
107/// ```
108///
109/// It is also possible to dynamically create a [`Dialect`] from its
110/// name. For example:
111///
112/// ```
113/// # use sqltk_parser::dialect::{AnsiDialect, dialect_from_str};
114/// let dialect = dialect_from_str("ansi").unwrap();
115///
116/// // Parsed dialect is an instance of `AnsiDialect`:
117/// assert!(dialect.is::<AnsiDialect>());
118/// ```
119///
120/// [module level documentation]: crate
121pub trait Dialect: Debug + Any {
122 /// Determine the [`TypeId`] of this dialect.
123 ///
124 /// By default, return the same [`TypeId`] as [`Any::type_id`]. Can be overridden
125 /// by dialects that behave like other dialects
126 /// (for example when wrapping a dialect).
127 fn dialect(&self) -> TypeId {
128 self.type_id()
129 }
130
131 /// Determine if a character starts a quoted identifier. The default
132 /// implementation, accepting "double quoted" ids is both ANSI-compliant
133 /// and appropriate for most dialects (with the notable exception of
134 /// MySQL, MS SQL, and sqlite). You can accept one of characters listed
135 /// in `Word::matching_end_quote` here
136 fn is_delimited_identifier_start(&self, ch: char) -> bool {
137 ch == '"' || ch == '`'
138 }
139
140 /// Determine if a character starts a potential nested quoted identifier.
141 /// Example: RedShift supports the following quote styles to all mean the same thing:
142 /// ```sql
143 /// SELECT 1 AS foo;
144 /// SELECT 1 AS "foo";
145 /// SELECT 1 AS [foo];
146 /// SELECT 1 AS ["foo"];
147 /// ```
148 fn is_nested_delimited_identifier_start(&self, _ch: char) -> bool {
149 false
150 }
151
152 /// Only applicable whenever [`Self::is_nested_delimited_identifier_start`] returns true
153 /// If the next sequence of tokens potentially represent a nested identifier, then this method
154 /// returns a tuple containing the outer quote style, and if present, the inner (nested) quote style.
155 ///
156 /// Example (Redshift):
157 /// ```text
158 /// `["foo"]` => Some(`[`, Some(`"`))
159 /// `[foo]` => Some(`[`, None)
160 /// `[0]` => None
161 /// `"foo"` => None
162 /// ```
163 fn peek_nested_delimited_identifier_quotes(
164 &self,
165 mut _chars: Peekable<Chars<'_>>,
166 ) -> Option<(char, Option<char>)> {
167 None
168 }
169
170 /// Return the character used to quote identifiers.
171 fn identifier_quote_style(&self, _identifier: &str) -> Option<char> {
172 None
173 }
174
175 /// Determine if a character is a valid start character for an unquoted identifier
176 fn is_identifier_start(&self, ch: char) -> bool;
177
178 /// Determine if a character is a valid unquoted identifier character
179 fn is_identifier_part(&self, ch: char) -> bool;
180
181 /// Most dialects do not have custom operators. Override this method to provide custom operators.
182 fn is_custom_operator_part(&self, _ch: char) -> bool {
183 false
184 }
185
186 /// Determine if the dialect supports escaping characters via '\' in string literals.
187 ///
188 /// Some dialects like BigQuery and Snowflake support this while others like
189 /// Postgres do not. Such that the following is accepted by the former but
190 /// rejected by the latter.
191 /// ```sql
192 /// SELECT 'ab\'cd';
193 /// ```
194 ///
195 /// Conversely, such dialects reject the following statement which
196 /// otherwise would be valid in the other dialects.
197 /// ```sql
198 /// SELECT '\';
199 /// ```
200 fn supports_string_literal_backslash_escape(&self) -> bool {
201 false
202 }
203
204 /// Determine whether the dialect strips the backslash when escaping LIKE wildcards (%, _).
205 ///
206 /// [MySQL] has a special case when escaping single quoted strings which leaves these unescaped
207 /// so they can be used in LIKE patterns without double-escaping (as is necessary in other
208 /// escaping dialects, such as [Snowflake]). Generally, special characters have escaping rules
209 /// causing them to be replaced with a different byte sequences (e.g. `'\0'` becoming the zero
210 /// byte), and the default if an escaped character does not have a specific escaping rule is to
211 /// strip the backslash (e.g. there is no rule for `h`, so `'\h' = 'h'`). MySQL's special case
212 /// for ignoring LIKE wildcard escapes is to *not* strip the backslash, so that `'\%' = '\\%'`.
213 /// This applies to all string literals though, not just those used in LIKE patterns.
214 ///
215 /// ```text
216 /// mysql> select '\_', hex('\\'), hex('_'), hex('\_');
217 /// +----+-----------+----------+-----------+
218 /// | \_ | hex('\\') | hex('_') | hex('\_') |
219 /// +----+-----------+----------+-----------+
220 /// | \_ | 5C | 5F | 5C5F |
221 /// +----+-----------+----------+-----------+
222 /// 1 row in set (0.00 sec)
223 /// ```
224 ///
225 /// [MySQL]: https://dev.mysql.com/doc/refman/8.4/en/string-literals.html
226 /// [Snowflake]: https://docs.snowflake.com/en/sql-reference/functions/like#usage-notes
227 fn ignores_wildcard_escapes(&self) -> bool {
228 false
229 }
230
231 /// Determine if the dialect supports string literals with `U&` prefix.
232 /// This is used to specify Unicode code points in string literals.
233 /// For example, in PostgreSQL, the following is a valid string literal:
234 /// ```sql
235 /// SELECT U&'\0061\0062\0063';
236 /// ```
237 /// This is equivalent to the string literal `'abc'`.
238 /// See
239 /// - [Postgres docs](https://www.postgresql.org/docs/current/sql-syntax-lexical.html#SQL-SYNTAX-STRINGS-UESCAPE)
240 /// - [H2 docs](http://www.h2database.com/html/grammar.html#string)
241 fn supports_unicode_string_literal(&self) -> bool {
242 false
243 }
244
245 /// Does the dialect support `FILTER (WHERE expr)` for aggregate queries?
246 fn supports_filter_during_aggregation(&self) -> bool {
247 false
248 }
249
250 /// Returns true if the dialect supports referencing another named window
251 /// within a window clause declaration.
252 ///
253 /// Example
254 /// ```sql
255 /// SELECT * FROM mytable
256 /// WINDOW mynamed_window AS another_named_window
257 /// ```
258 fn supports_window_clause_named_window_reference(&self) -> bool {
259 false
260 }
261
262 /// Returns true if the dialect supports `ARRAY_AGG() [WITHIN GROUP (ORDER BY)]` expressions.
263 /// Otherwise, the dialect should expect an `ORDER BY` without the `WITHIN GROUP` clause, e.g. [`ANSI`]
264 ///
265 /// [`ANSI`]: https://jakewheat.github.io/sql-overview/sql-2016-foundation-grammar.html#array-aggregate-function
266 fn supports_within_after_array_aggregation(&self) -> bool {
267 false
268 }
269
270 /// Returns true if the dialects supports `group sets, roll up, or cube` expressions.
271 fn supports_group_by_expr(&self) -> bool {
272 false
273 }
274
275 /// Returns true if the dialects supports `GROUP BY` modifiers prefixed by a `WITH` keyword.
276 /// Example: `GROUP BY value WITH ROLLUP`.
277 fn supports_group_by_with_modifier(&self) -> bool {
278 false
279 }
280
281 /// Returns true if the dialect supports the `(+)` syntax for OUTER JOIN.
282 fn supports_outer_join_operator(&self) -> bool {
283 false
284 }
285
286 /// Returns true if the dialect supports CONNECT BY.
287 fn supports_connect_by(&self) -> bool {
288 false
289 }
290
291 /// Returns true if the dialect supports `EXECUTE IMMEDIATE` statements.
292 fn supports_execute_immediate(&self) -> bool {
293 false
294 }
295
296 /// Returns true if the dialect supports the MATCH_RECOGNIZE operation.
297 fn supports_match_recognize(&self) -> bool {
298 false
299 }
300
301 /// Returns true if the dialect supports `(NOT) IN ()` expressions
302 fn supports_in_empty_list(&self) -> bool {
303 false
304 }
305
306 /// Returns true if the dialect supports `BEGIN {DEFERRED | IMMEDIATE | EXCLUSIVE | TRY | CATCH} [TRANSACTION]` statements
307 fn supports_start_transaction_modifier(&self) -> bool {
308 false
309 }
310
311 /// Returns true if the dialect supports `END {TRY | CATCH}` statements
312 fn supports_end_transaction_modifier(&self) -> bool {
313 false
314 }
315
316 /// Returns true if the dialect supports named arguments of the form `FUN(a = '1', b = '2')`.
317 fn supports_named_fn_args_with_eq_operator(&self) -> bool {
318 false
319 }
320
321 /// Returns true if the dialect supports named arguments of the form `FUN(a : '1', b : '2')`.
322 fn supports_named_fn_args_with_colon_operator(&self) -> bool {
323 false
324 }
325
326 /// Returns true if the dialect supports named arguments of the form `FUN(a := '1', b := '2')`.
327 fn supports_named_fn_args_with_assignment_operator(&self) -> bool {
328 false
329 }
330
331 /// Returns true if the dialect supports named arguments of the form `FUN(a => '1', b => '2')`.
332 fn supports_named_fn_args_with_rarrow_operator(&self) -> bool {
333 true
334 }
335
336 /// Returns true if dialect supports argument name as arbitrary expression.
337 /// e.g. `FUN(LOWER('a'):'1', b:'2')`
338 /// Such function arguments are represented in the AST by the `FunctionArg::ExprNamed` variant,
339 /// otherwise use the `FunctionArg::Named` variant (compatible reason).
340 fn supports_named_fn_args_with_expr_name(&self) -> bool {
341 false
342 }
343
344 /// Returns true if the dialect supports identifiers starting with a numeric
345 /// prefix such as tables named `59901_user_login`
346 fn supports_numeric_prefix(&self) -> bool {
347 false
348 }
349
350 /// Returns true if the dialect supports numbers containing underscores, e.g. `10_000_000`
351 fn supports_numeric_literal_underscores(&self) -> bool {
352 false
353 }
354
355 /// Returns true if the dialects supports specifying null treatment
356 /// as part of a window function's parameter list as opposed
357 /// to after the parameter list.
358 ///
359 /// i.e The following syntax returns true
360 /// ```sql
361 /// FIRST_VALUE(a IGNORE NULLS) OVER ()
362 /// ```
363 /// while the following syntax returns false
364 /// ```sql
365 /// FIRST_VALUE(a) IGNORE NULLS OVER ()
366 /// ```
367 fn supports_window_function_null_treatment_arg(&self) -> bool {
368 false
369 }
370
371 /// Returns true if the dialect supports defining structs or objects using a
372 /// syntax like `{'x': 1, 'y': 2, 'z': 3}`.
373 fn supports_dictionary_syntax(&self) -> bool {
374 false
375 }
376
377 /// Returns true if the dialect supports defining object using the
378 /// syntax like `Map {1: 10, 2: 20}`.
379 fn support_map_literal_syntax(&self) -> bool {
380 false
381 }
382
383 /// Returns true if the dialect supports lambda functions, for example:
384 ///
385 /// ```sql
386 /// SELECT transform(array(1, 2, 3), x -> x + 1); -- returns [2,3,4]
387 /// ```
388 fn supports_lambda_functions(&self) -> bool {
389 false
390 }
391
392 /// Returns true if the dialect supports multiple variable assignment
393 /// using parentheses in a `SET` variable declaration.
394 ///
395 /// ```sql
396 /// SET (variable[, ...]) = (expression[, ...]);
397 /// ```
398 fn supports_parenthesized_set_variables(&self) -> bool {
399 false
400 }
401
402 /// Returns true if the dialect supports multiple `SET` statements
403 /// in a single statement.
404 ///
405 /// ```sql
406 /// SET variable = expression [, variable = expression];
407 /// ```
408 fn supports_comma_separated_set_assignments(&self) -> bool {
409 false
410 }
411
412 /// Returns true if the dialect supports an `EXCEPT` clause following a
413 /// wildcard in a select list.
414 ///
415 /// For example
416 /// ```sql
417 /// SELECT * EXCEPT order_id FROM orders;
418 /// ```
419 fn supports_select_wildcard_except(&self) -> bool {
420 false
421 }
422
423 /// Returns true if the dialect has a CONVERT function which accepts a type first
424 /// and an expression second, e.g. `CONVERT(varchar, 1)`
425 fn convert_type_before_value(&self) -> bool {
426 false
427 }
428
429 /// Returns true if the dialect supports triple quoted string
430 /// e.g. `"""abc"""`
431 fn supports_triple_quoted_string(&self) -> bool {
432 false
433 }
434
435 /// Dialect-specific prefix parser override
436 fn parse_prefix(&self, _parser: &mut Parser) -> Option<Result<Expr, ParserError>> {
437 // return None to fall back to the default behavior
438 None
439 }
440
441 /// Does the dialect support trailing commas around the query?
442 fn supports_trailing_commas(&self) -> bool {
443 false
444 }
445
446 /// Does the dialect support parsing `LIMIT 1, 2` as `LIMIT 2 OFFSET 1`?
447 fn supports_limit_comma(&self) -> bool {
448 false
449 }
450
451 /// Does the dialect support trailing commas in the projection list?
452 fn supports_projection_trailing_commas(&self) -> bool {
453 self.supports_trailing_commas()
454 }
455
456 /// Returns true if the dialect supports trailing commas in the `FROM` clause of a `SELECT` statement.
457 /// Example: `SELECT 1 FROM T, U, LIMIT 1`
458 fn supports_from_trailing_commas(&self) -> bool {
459 false
460 }
461
462 /// Returns true if the dialect supports trailing commas in the
463 /// column definitions list of a `CREATE` statement.
464 /// Example: `CREATE TABLE T (x INT, y TEXT,)`
465 fn supports_column_definition_trailing_commas(&self) -> bool {
466 false
467 }
468
469 /// Returns true if the dialect supports double dot notation for object names
470 ///
471 /// Example
472 /// ```sql
473 /// SELECT * FROM db_name..table_name
474 /// ```
475 fn supports_object_name_double_dot_notation(&self) -> bool {
476 false
477 }
478
479 /// Return true if the dialect supports the STRUCT literal
480 ///
481 /// Example
482 /// ```sql
483 /// SELECT STRUCT(1 as one, 'foo' as foo, false)
484 /// ```
485 fn supports_struct_literal(&self) -> bool {
486 false
487 }
488
489 /// Return true if the dialect supports empty projections in SELECT statements
490 ///
491 /// Example
492 /// ```sql
493 /// SELECT from table_name
494 /// ```
495 fn supports_empty_projections(&self) -> bool {
496 false
497 }
498
499 /// Return true if the dialect supports wildcard expansion on
500 /// arbitrary expressions in projections.
501 ///
502 /// Example:
503 /// ```sql
504 /// SELECT STRUCT<STRING>('foo').* FROM T
505 /// ```
506 fn supports_select_expr_star(&self) -> bool {
507 false
508 }
509
510 /// Return true if the dialect supports "FROM-first" selects.
511 ///
512 /// Example:
513 /// ```sql
514 /// FROM table
515 /// SELECT *
516 /// ```
517 fn supports_from_first_select(&self) -> bool {
518 false
519 }
520
521 /// Return true if the dialect supports pipe operator.
522 ///
523 /// Example:
524 /// ```sql
525 /// SELECT *
526 /// FROM table
527 /// |> limit 1
528 /// ```
529 ///
530 /// See <https://cloud.google.com/bigquery/docs/pipe-syntax-guide#basic_syntax>
531 fn supports_pipe_operator(&self) -> bool {
532 false
533 }
534
535 /// Does the dialect support MySQL-style `'user'@'host'` grantee syntax?
536 fn supports_user_host_grantee(&self) -> bool {
537 false
538 }
539
540 /// Does the dialect support the `MATCH() AGAINST()` syntax?
541 fn supports_match_against(&self) -> bool {
542 false
543 }
544
545 /// Dialect-specific infix parser override
546 ///
547 /// This method is called to parse the next infix expression.
548 ///
549 /// If `None` is returned, falls back to the default behavior.
550 fn parse_infix(
551 &self,
552 _parser: &mut Parser,
553 _expr: &Expr,
554 _precedence: u8,
555 ) -> Option<Result<Expr, ParserError>> {
556 // return None to fall back to the default behavior
557 None
558 }
559
560 /// Dialect-specific precedence override
561 ///
562 /// This method is called to get the precedence of the next token.
563 ///
564 /// If `None` is returned, falls back to the default behavior.
565 fn get_next_precedence(&self, _parser: &Parser) -> Option<Result<u8, ParserError>> {
566 // return None to fall back to the default behavior
567 None
568 }
569
570 /// Get the precedence of the next token, looking at the full token stream.
571 ///
572 /// A higher number => higher precedence
573 ///
574 /// See [`Self::get_next_precedence`] to override the behavior for just the
575 /// next token.
576 ///
577 /// The default implementation is used for many dialects, but can be
578 /// overridden to provide dialect-specific behavior.
579 fn get_next_precedence_default(&self, parser: &Parser) -> Result<u8, ParserError> {
580 if let Some(precedence) = self.get_next_precedence(parser) {
581 return precedence;
582 }
583 macro_rules! p {
584 ($precedence:ident) => {
585 self.prec_value(Precedence::$precedence)
586 };
587 }
588
589 let token = parser.peek_token();
590 debug!("get_next_precedence_full() {:?}", token);
591 match token.token {
592 Token::Word(w) if w.keyword == Keyword::OR => Ok(p!(Or)),
593 Token::Word(w) if w.keyword == Keyword::AND => Ok(p!(And)),
594 Token::Word(w) if w.keyword == Keyword::XOR => Ok(p!(Xor)),
595
596 Token::Word(w) if w.keyword == Keyword::AT => {
597 match (
598 parser.peek_nth_token(1).token,
599 parser.peek_nth_token(2).token,
600 ) {
601 (Token::Word(w), Token::Word(w2))
602 if w.keyword == Keyword::TIME && w2.keyword == Keyword::ZONE =>
603 {
604 Ok(p!(AtTz))
605 }
606 _ => Ok(self.prec_unknown()),
607 }
608 }
609
610 Token::Word(w) if w.keyword == Keyword::NOT => match parser.peek_nth_token(1).token {
611 // The precedence of NOT varies depending on keyword that
612 // follows it. If it is followed by IN, BETWEEN, or LIKE,
613 // it takes on the precedence of those tokens. Otherwise, it
614 // is not an infix operator, and therefore has zero
615 // precedence.
616 Token::Word(w) if w.keyword == Keyword::IN => Ok(p!(Between)),
617 Token::Word(w) if w.keyword == Keyword::BETWEEN => Ok(p!(Between)),
618 Token::Word(w) if w.keyword == Keyword::LIKE => Ok(p!(Like)),
619 Token::Word(w) if w.keyword == Keyword::ILIKE => Ok(p!(Like)),
620 Token::Word(w) if w.keyword == Keyword::RLIKE => Ok(p!(Like)),
621 Token::Word(w) if w.keyword == Keyword::REGEXP => Ok(p!(Like)),
622 Token::Word(w) if w.keyword == Keyword::SIMILAR => Ok(p!(Like)),
623 _ => Ok(self.prec_unknown()),
624 },
625 Token::Word(w) if w.keyword == Keyword::IS => Ok(p!(Is)),
626 Token::Word(w) if w.keyword == Keyword::IN => Ok(p!(Between)),
627 Token::Word(w) if w.keyword == Keyword::BETWEEN => Ok(p!(Between)),
628 Token::Word(w) if w.keyword == Keyword::OVERLAPS => Ok(p!(Between)),
629 Token::Word(w) if w.keyword == Keyword::LIKE => Ok(p!(Like)),
630 Token::Word(w) if w.keyword == Keyword::ILIKE => Ok(p!(Like)),
631 Token::Word(w) if w.keyword == Keyword::RLIKE => Ok(p!(Like)),
632 Token::Word(w) if w.keyword == Keyword::REGEXP => Ok(p!(Like)),
633 Token::Word(w) if w.keyword == Keyword::SIMILAR => Ok(p!(Like)),
634 Token::Word(w) if w.keyword == Keyword::OPERATOR => Ok(p!(Between)),
635 Token::Word(w) if w.keyword == Keyword::DIV => Ok(p!(MulDivModOp)),
636 Token::Period => Ok(p!(Period)),
637 Token::Assignment
638 | Token::Eq
639 | Token::Lt
640 | Token::LtEq
641 | Token::Neq
642 | Token::Gt
643 | Token::GtEq
644 | Token::DoubleEq
645 | Token::Tilde
646 | Token::TildeAsterisk
647 | Token::ExclamationMarkTilde
648 | Token::ExclamationMarkTildeAsterisk
649 | Token::DoubleTilde
650 | Token::DoubleTildeAsterisk
651 | Token::ExclamationMarkDoubleTilde
652 | Token::ExclamationMarkDoubleTildeAsterisk
653 | Token::Spaceship => Ok(p!(Eq)),
654 Token::Pipe
655 | Token::QuestionMarkDash
656 | Token::DoubleSharp
657 | Token::Overlap
658 | Token::AmpersandLeftAngleBracket
659 | Token::AmpersandRightAngleBracket
660 | Token::QuestionMarkDashVerticalBar
661 | Token::AmpersandLeftAngleBracketVerticalBar
662 | Token::VerticalBarAmpersandRightAngleBracket
663 | Token::TwoWayArrow
664 | Token::LeftAngleBracketCaret
665 | Token::RightAngleBracketCaret
666 | Token::QuestionMarkSharp
667 | Token::QuestionMarkDoubleVerticalBar
668 | Token::QuestionPipe
669 | Token::TildeEqual
670 | Token::AtSign
671 | Token::ShiftLeftVerticalBar
672 | Token::VerticalBarShiftRight => Ok(p!(Pipe)),
673 Token::Caret | Token::Sharp | Token::ShiftRight | Token::ShiftLeft => Ok(p!(Caret)),
674 Token::Ampersand => Ok(p!(Ampersand)),
675 Token::Plus | Token::Minus => Ok(p!(PlusMinus)),
676 Token::Mul | Token::Div | Token::DuckIntDiv | Token::Mod | Token::StringConcat => {
677 Ok(p!(MulDivModOp))
678 }
679 Token::DoubleColon | Token::ExclamationMark | Token::LBracket | Token::CaretAt => {
680 Ok(p!(DoubleColon))
681 }
682 Token::Arrow
683 | Token::LongArrow
684 | Token::HashArrow
685 | Token::HashLongArrow
686 | Token::AtArrow
687 | Token::ArrowAt
688 | Token::HashMinus
689 | Token::AtQuestion
690 | Token::AtAt
691 | Token::Question
692 | Token::QuestionAnd
693 | Token::CustomBinaryOperator(_) => Ok(p!(PgOther)),
694 _ => Ok(self.prec_unknown()),
695 }
696 }
697
698 /// Dialect-specific statement parser override
699 ///
700 /// This method is called to parse the next statement.
701 ///
702 /// If `None` is returned, falls back to the default behavior.
703 fn parse_statement(&self, _parser: &mut Parser) -> Option<Result<Statement, ParserError>> {
704 // return None to fall back to the default behavior
705 None
706 }
707
708 /// Dialect-specific column option parser override
709 ///
710 /// This method is called to parse the next column option.
711 ///
712 /// If `None` is returned, falls back to the default behavior.
713 fn parse_column_option(
714 &self,
715 _parser: &mut Parser,
716 ) -> Result<Option<Result<Option<ColumnOption>, ParserError>>, ParserError> {
717 // return None to fall back to the default behavior
718 Ok(None)
719 }
720
721 /// Decide the lexical Precedence of operators.
722 ///
723 /// Uses (APPROXIMATELY) <https://www.postgresql.org/docs/7.0/operators.htm#AEN2026> as a reference
724 fn prec_value(&self, prec: Precedence) -> u8 {
725 match prec {
726 Precedence::Period => 100,
727 Precedence::DoubleColon => 50,
728 Precedence::AtTz => 41,
729 Precedence::MulDivModOp => 40,
730 Precedence::PlusMinus => 30,
731 Precedence::Xor => 24,
732 Precedence::Ampersand => 23,
733 Precedence::Caret => 22,
734 Precedence::Pipe => 21,
735 Precedence::Between => 20,
736 Precedence::Eq => 20,
737 Precedence::Like => 19,
738 Precedence::Is => 17,
739 Precedence::PgOther => 16,
740 Precedence::UnaryNot => 15,
741 Precedence::And => 10,
742 Precedence::Or => 5,
743 }
744 }
745
746 /// Returns the precedence when the precedence is otherwise unknown
747 fn prec_unknown(&self) -> u8 {
748 0
749 }
750
751 /// Returns true if this dialect requires the `TABLE` keyword after `DESCRIBE`
752 ///
753 /// Defaults to false.
754 ///
755 /// If true, the following statement is valid: `DESCRIBE TABLE my_table`
756 /// If false, the following statements are valid: `DESCRIBE my_table` and `DESCRIBE table`
757 fn describe_requires_table_keyword(&self) -> bool {
758 false
759 }
760
761 /// Returns true if this dialect allows the `EXTRACT` function to words other than [`Keyword`].
762 fn allow_extract_custom(&self) -> bool {
763 false
764 }
765
766 /// Returns true if this dialect allows the `EXTRACT` function to use single quotes in the part being extracted.
767 fn allow_extract_single_quotes(&self) -> bool {
768 false
769 }
770
771 /// Returns true if this dialect allows dollar placeholders
772 /// e.g. `SELECT $var` (SQLite)
773 fn supports_dollar_placeholder(&self) -> bool {
774 false
775 }
776
777 /// Does the dialect support with clause in create index statement?
778 /// e.g. `CREATE INDEX idx ON t WITH (key = value, key2)`
779 fn supports_create_index_with_clause(&self) -> bool {
780 false
781 }
782
783 /// Whether `INTERVAL` expressions require units (called "qualifiers" in the ANSI SQL spec) to be specified,
784 /// e.g. `INTERVAL 1 DAY` vs `INTERVAL 1`.
785 ///
786 /// Expressions within intervals (e.g. `INTERVAL '1' + '1' DAY`) are only allowed when units are required.
787 ///
788 /// See <https://github.com/sqlparser-rs/sqlparser-rs/pull/1398> for more information.
789 ///
790 /// When `true`:
791 /// * `INTERVAL '1' DAY` is VALID
792 /// * `INTERVAL 1 + 1 DAY` is VALID
793 /// * `INTERVAL '1' + '1' DAY` is VALID
794 /// * `INTERVAL '1'` is INVALID
795 ///
796 /// When `false`:
797 /// * `INTERVAL '1'` is VALID
798 /// * `INTERVAL '1' DAY` is VALID — unit is not required, but still allowed
799 /// * `INTERVAL 1 + 1 DAY` is INVALID
800 fn require_interval_qualifier(&self) -> bool {
801 false
802 }
803
804 fn supports_explain_with_utility_options(&self) -> bool {
805 false
806 }
807
808 fn supports_asc_desc_in_column_definition(&self) -> bool {
809 false
810 }
811
812 /// Returns true if the dialect supports `a!` expressions
813 fn supports_factorial_operator(&self) -> bool {
814 false
815 }
816
817 /// Returns true if the dialect supports nested comments
818 /// e.g. `/* /* nested */ */`
819 fn supports_nested_comments(&self) -> bool {
820 false
821 }
822
823 /// Returns true if this dialect supports treating the equals operator `=` within a `SelectItem`
824 /// as an alias assignment operator, rather than a boolean expression.
825 /// For example: the following statements are equivalent for such a dialect:
826 /// ```sql
827 /// SELECT col_alias = col FROM tbl;
828 /// SELECT col_alias AS col FROM tbl;
829 /// ```
830 fn supports_eq_alias_assignment(&self) -> bool {
831 false
832 }
833
834 /// Returns true if this dialect supports the `TRY_CONVERT` function
835 fn supports_try_convert(&self) -> bool {
836 false
837 }
838
839 /// Returns true if the dialect supports `!a` syntax for boolean `NOT` expressions.
840 fn supports_bang_not_operator(&self) -> bool {
841 false
842 }
843
844 /// Returns true if the dialect supports the `LISTEN`, `UNLISTEN` and `NOTIFY` statements
845 fn supports_listen_notify(&self) -> bool {
846 false
847 }
848
849 /// Returns true if the dialect supports the `LOAD DATA` statement
850 fn supports_load_data(&self) -> bool {
851 false
852 }
853
854 /// Returns true if the dialect supports the `LOAD extension` statement
855 fn supports_load_extension(&self) -> bool {
856 false
857 }
858
859 /// Returns true if this dialect expects the `TOP` option
860 /// before the `ALL`/`DISTINCT` options in a `SELECT` statement.
861 fn supports_top_before_distinct(&self) -> bool {
862 false
863 }
864
865 /// Returns true if the dialect supports boolean literals (`true` and `false`).
866 /// For example, in MSSQL these are treated as identifiers rather than boolean literals.
867 fn supports_boolean_literals(&self) -> bool {
868 true
869 }
870
871 /// Returns true if this dialect supports the `LIKE 'pattern'` option in
872 /// a `SHOW` statement before the `IN` option
873 fn supports_show_like_before_in(&self) -> bool {
874 false
875 }
876
877 /// Returns true if this dialect supports the `COMMENT` statement
878 fn supports_comment_on(&self) -> bool {
879 false
880 }
881
882 /// Returns true if the dialect supports the `CREATE TABLE SELECT` statement
883 fn supports_create_table_select(&self) -> bool {
884 false
885 }
886
887 /// Returns true if the dialect supports PartiQL for querying semi-structured data
888 /// <https://partiql.org/index.html>
889 fn supports_partiql(&self) -> bool {
890 false
891 }
892
893 /// Returns true if the specified keyword is reserved and cannot be
894 /// used as an identifier without special handling like quoting.
895 fn is_reserved_for_identifier(&self, kw: Keyword) -> bool {
896 keywords::RESERVED_FOR_IDENTIFIER.contains(&kw)
897 }
898
899 /// Returns reserved keywords when looking to parse a `TableFactor`.
900 /// See [Self::supports_from_trailing_commas]
901 fn get_reserved_keywords_for_table_factor(&self) -> &[Keyword] {
902 keywords::RESERVED_FOR_TABLE_FACTOR
903 }
904
905 /// Returns reserved keywords that may prefix a select item expression
906 /// e.g. `SELECT CONNECT_BY_ROOT name FROM Tbl2` (Snowflake)
907 fn get_reserved_keywords_for_select_item_operator(&self) -> &[Keyword] {
908 &[]
909 }
910
911 /// Returns true if this dialect supports the `TABLESAMPLE` option
912 /// before the table alias option. For example:
913 ///
914 /// Table sample before alias: `SELECT * FROM tbl AS t TABLESAMPLE (10)`
915 /// Table sample after alias: `SELECT * FROM tbl TABLESAMPLE (10) AS t`
916 ///
917 /// <https://jakewheat.github.io/sql-overview/sql-2016-foundation-grammar.html#_7_6_table_reference>
918 fn supports_table_sample_before_alias(&self) -> bool {
919 false
920 }
921
922 /// Returns true if this dialect supports the `INSERT INTO ... SET col1 = 1, ...` syntax.
923 ///
924 /// MySQL: <https://dev.mysql.com/doc/refman/8.4/en/insert.html>
925 fn supports_insert_set(&self) -> bool {
926 false
927 }
928
929 /// Does the dialect support table function in insertion?
930 fn supports_insert_table_function(&self) -> bool {
931 false
932 }
933
934 /// Does the dialect support insert formats, e.g. `INSERT INTO ... FORMAT <format>`
935 fn supports_insert_format(&self) -> bool {
936 false
937 }
938
939 /// Returns true if this dialect supports `SET` statements without an explicit
940 /// assignment operator such as `=`. For example: `SET SHOWPLAN_XML ON`.
941 fn supports_set_stmt_without_operator(&self) -> bool {
942 false
943 }
944
945 /// Returns true if the specified keyword should be parsed as a column identifier.
946 /// See [keywords::RESERVED_FOR_COLUMN_ALIAS]
947 fn is_column_alias(&self, kw: &Keyword, _parser: &mut Parser) -> bool {
948 !keywords::RESERVED_FOR_COLUMN_ALIAS.contains(kw)
949 }
950
951 /// Returns true if the specified keyword should be parsed as a select item alias.
952 /// When explicit is true, the keyword is preceded by an `AS` word. Parser is provided
953 /// to enable looking ahead if needed.
954 fn is_select_item_alias(&self, explicit: bool, kw: &Keyword, parser: &mut Parser) -> bool {
955 explicit || self.is_column_alias(kw, parser)
956 }
957
958 /// Returns true if the specified keyword should be parsed as a table factor alias.
959 /// When explicit is true, the keyword is preceded by an `AS` word. Parser is provided
960 /// to enable looking ahead if needed.
961 fn is_table_factor_alias(&self, explicit: bool, kw: &Keyword, _parser: &mut Parser) -> bool {
962 explicit || !keywords::RESERVED_FOR_TABLE_ALIAS.contains(kw)
963 }
964
965 /// Returns true if this dialect supports querying historical table data
966 /// by specifying which version of the data to query.
967 fn supports_timestamp_versioning(&self) -> bool {
968 false
969 }
970
971 /// Returns true if this dialect supports the E'...' syntax for string literals
972 ///
973 /// Postgres: <https://www.postgresql.org/docs/current/sql-syntax-lexical.html#SQL-SYNTAX-STRINGS-ESCAPE>
974 fn supports_string_escape_constant(&self) -> bool {
975 false
976 }
977
978 /// Returns true if the dialect supports the table hints in the `FROM` clause.
979 fn supports_table_hints(&self) -> bool {
980 false
981 }
982
983 /// Returns true if this dialect requires a whitespace character after `--` to start a single line comment.
984 ///
985 /// MySQL: <https://dev.mysql.com/doc/refman/8.4/en/ansi-diff-comments.html>
986 /// e.g. UPDATE account SET balance=balance--1
987 // WHERE account_id=5752 ^^^ will be interpreted as two minus signs instead of a comment
988 fn requires_single_line_comment_whitespace(&self) -> bool {
989 false
990 }
991
992 /// Returns true if the dialect supports array type definition with brackets with
993 /// an optional size. For example:
994 /// ```CREATE TABLE my_table (arr1 INT[], arr2 INT[3])```
995 /// ```SELECT x::INT[]```
996 fn supports_array_typedef_with_brackets(&self) -> bool {
997 false
998 }
999 /// Returns true if the dialect supports geometric types.
1000 ///
1001 /// Postgres: <https://www.postgresql.org/docs/9.5/functions-geometry.html>
1002 /// e.g. @@ circle '((0,0),10)'
1003 fn supports_geometric_types(&self) -> bool {
1004 false
1005 }
1006
1007 /// Returns true if the dialect supports `ORDER BY ALL`.
1008 /// `ALL` which means all columns of the SELECT clause.
1009 ///
1010 /// For example: ```SELECT * FROM addresses ORDER BY ALL;```.
1011 fn supports_order_by_all(&self) -> bool {
1012 false
1013 }
1014
1015 /// Returns true if the dialect supports `SET NAMES <charset_name> [COLLATE <collation_name>]`.
1016 ///
1017 /// - [MySQL](https://dev.mysql.com/doc/refman/8.4/en/set-names.html)
1018 /// - [PostgreSQL](https://www.postgresql.org/docs/17/sql-set.html)
1019 ///
1020 /// Note: Postgres doesn't support the `COLLATE` clause, but we permissively parse it anyway.
1021 fn supports_set_names(&self) -> bool {
1022 false
1023 }
1024}
1025
1026/// This represents the operators for which precedence must be defined
1027///
1028/// higher number -> higher precedence
1029#[derive(Debug, Clone, Copy)]
1030pub enum Precedence {
1031 Period,
1032 DoubleColon,
1033 AtTz,
1034 MulDivModOp,
1035 PlusMinus,
1036 Xor,
1037 Ampersand,
1038 Caret,
1039 Pipe,
1040 Between,
1041 Eq,
1042 Like,
1043 Is,
1044 PgOther,
1045 UnaryNot,
1046 And,
1047 Or,
1048}
1049
1050impl dyn Dialect {
1051 #[inline]
1052 pub fn is<T: Dialect>(&self) -> bool {
1053 // borrowed from `Any` implementation
1054 TypeId::of::<T>() == self.dialect()
1055 }
1056}
1057
1058/// Returns the built in [`Dialect`] corresponding to `dialect_name`.
1059///
1060/// See [`Dialect`] documentation for an example.
1061pub fn dialect_from_str(dialect_name: impl AsRef<str>) -> Option<Box<dyn Dialect>> {
1062 let dialect_name = dialect_name.as_ref();
1063 match dialect_name.to_lowercase().as_str() {
1064 "generic" => Some(Box::new(GenericDialect)),
1065 "mysql" => Some(Box::new(MySqlDialect {})),
1066 "postgresql" | "postgres" => Some(Box::new(PostgreSqlDialect {})),
1067 "hive" => Some(Box::new(HiveDialect {})),
1068 "sqlite" => Some(Box::new(SQLiteDialect {})),
1069 "snowflake" => Some(Box::new(SnowflakeDialect)),
1070 "redshift" => Some(Box::new(RedshiftSqlDialect {})),
1071 "mssql" => Some(Box::new(MsSqlDialect {})),
1072 "clickhouse" => Some(Box::new(ClickHouseDialect {})),
1073 "bigquery" => Some(Box::new(BigQueryDialect)),
1074 "ansi" => Some(Box::new(AnsiDialect {})),
1075 "duckdb" => Some(Box::new(DuckDbDialect {})),
1076 "databricks" => Some(Box::new(DatabricksDialect {})),
1077 _ => None,
1078 }
1079}
1080
1081#[cfg(test)]
1082mod tests {
1083 use super::*;
1084
1085 struct DialectHolder<'a> {
1086 dialect: &'a dyn Dialect,
1087 }
1088
1089 #[test]
1090 fn test_is_dialect() {
1091 let generic_dialect: &dyn Dialect = &GenericDialect {};
1092 let ansi_dialect: &dyn Dialect = &AnsiDialect {};
1093
1094 let generic_holder = DialectHolder {
1095 dialect: generic_dialect,
1096 };
1097 let ansi_holder = DialectHolder {
1098 dialect: ansi_dialect,
1099 };
1100
1101 assert!(dialect_of!(generic_holder is GenericDialect | AnsiDialect),);
1102 assert!(!dialect_of!(generic_holder is AnsiDialect));
1103 assert!(dialect_of!(ansi_holder is AnsiDialect));
1104 assert!(dialect_of!(ansi_holder is GenericDialect | AnsiDialect));
1105 assert!(!dialect_of!(ansi_holder is GenericDialect | MsSqlDialect));
1106 }
1107
1108 #[test]
1109 fn test_dialect_from_str() {
1110 assert!(parse_dialect("generic").is::<GenericDialect>());
1111 assert!(parse_dialect("mysql").is::<MySqlDialect>());
1112 assert!(parse_dialect("MySql").is::<MySqlDialect>());
1113 assert!(parse_dialect("postgresql").is::<PostgreSqlDialect>());
1114 assert!(parse_dialect("postgres").is::<PostgreSqlDialect>());
1115 assert!(parse_dialect("hive").is::<HiveDialect>());
1116 assert!(parse_dialect("sqlite").is::<SQLiteDialect>());
1117 assert!(parse_dialect("snowflake").is::<SnowflakeDialect>());
1118 assert!(parse_dialect("SnowFlake").is::<SnowflakeDialect>());
1119 assert!(parse_dialect("MsSql").is::<MsSqlDialect>());
1120 assert!(parse_dialect("clickhouse").is::<ClickHouseDialect>());
1121 assert!(parse_dialect("ClickHouse").is::<ClickHouseDialect>());
1122 assert!(parse_dialect("bigquery").is::<BigQueryDialect>());
1123 assert!(parse_dialect("BigQuery").is::<BigQueryDialect>());
1124 assert!(parse_dialect("ansi").is::<AnsiDialect>());
1125 assert!(parse_dialect("ANSI").is::<AnsiDialect>());
1126 assert!(parse_dialect("duckdb").is::<DuckDbDialect>());
1127 assert!(parse_dialect("DuckDb").is::<DuckDbDialect>());
1128 assert!(parse_dialect("DataBricks").is::<DatabricksDialect>());
1129 assert!(parse_dialect("databricks").is::<DatabricksDialect>());
1130
1131 // error cases
1132 assert!(dialect_from_str("Unknown").is_none());
1133 assert!(dialect_from_str("").is_none());
1134 }
1135
1136 fn parse_dialect(v: &str) -> Box<dyn Dialect> {
1137 dialect_from_str(v).unwrap()
1138 }
1139
1140 #[test]
1141 fn identifier_quote_style() {
1142 let tests: Vec<(&dyn Dialect, &str, Option<char>)> = vec![
1143 (&GenericDialect {}, "id", None),
1144 (&SQLiteDialect {}, "id", Some('`')),
1145 (&PostgreSqlDialect {}, "id", Some('"')),
1146 ];
1147
1148 for (dialect, ident, expected) in tests {
1149 let actual = dialect.identifier_quote_style(ident);
1150
1151 assert_eq!(actual, expected);
1152 }
1153 }
1154
1155 #[test]
1156 fn parse_with_wrapped_dialect() {
1157 /// Wrapper for a dialect. In a real-world example, this wrapper
1158 /// would tweak the behavior of the dialect. For the test case,
1159 /// it wraps all methods unaltered.
1160 #[derive(Debug)]
1161 struct WrappedDialect(MySqlDialect);
1162
1163 impl Dialect for WrappedDialect {
1164 fn dialect(&self) -> std::any::TypeId {
1165 self.0.dialect()
1166 }
1167
1168 fn is_identifier_start(&self, ch: char) -> bool {
1169 self.0.is_identifier_start(ch)
1170 }
1171
1172 fn is_delimited_identifier_start(&self, ch: char) -> bool {
1173 self.0.is_delimited_identifier_start(ch)
1174 }
1175
1176 fn is_nested_delimited_identifier_start(&self, ch: char) -> bool {
1177 self.0.is_nested_delimited_identifier_start(ch)
1178 }
1179
1180 fn peek_nested_delimited_identifier_quotes(
1181 &self,
1182 chars: std::iter::Peekable<std::str::Chars<'_>>,
1183 ) -> Option<(char, Option<char>)> {
1184 self.0.peek_nested_delimited_identifier_quotes(chars)
1185 }
1186
1187 fn identifier_quote_style(&self, identifier: &str) -> Option<char> {
1188 self.0.identifier_quote_style(identifier)
1189 }
1190
1191 fn supports_string_literal_backslash_escape(&self) -> bool {
1192 self.0.supports_string_literal_backslash_escape()
1193 }
1194
1195 fn supports_filter_during_aggregation(&self) -> bool {
1196 self.0.supports_filter_during_aggregation()
1197 }
1198
1199 fn supports_within_after_array_aggregation(&self) -> bool {
1200 self.0.supports_within_after_array_aggregation()
1201 }
1202
1203 fn supports_group_by_expr(&self) -> bool {
1204 self.0.supports_group_by_expr()
1205 }
1206
1207 fn supports_in_empty_list(&self) -> bool {
1208 self.0.supports_in_empty_list()
1209 }
1210
1211 fn convert_type_before_value(&self) -> bool {
1212 self.0.convert_type_before_value()
1213 }
1214
1215 fn parse_prefix(
1216 &self,
1217 parser: &mut sqltk_parser::parser::Parser,
1218 ) -> Option<Result<Expr, sqltk_parser::parser::ParserError>> {
1219 self.0.parse_prefix(parser)
1220 }
1221
1222 fn parse_infix(
1223 &self,
1224 parser: &mut sqltk_parser::parser::Parser,
1225 expr: &Expr,
1226 precedence: u8,
1227 ) -> Option<Result<Expr, sqltk_parser::parser::ParserError>> {
1228 self.0.parse_infix(parser, expr, precedence)
1229 }
1230
1231 fn get_next_precedence(
1232 &self,
1233 parser: &sqltk_parser::parser::Parser,
1234 ) -> Option<Result<u8, sqltk_parser::parser::ParserError>> {
1235 self.0.get_next_precedence(parser)
1236 }
1237
1238 fn parse_statement(
1239 &self,
1240 parser: &mut sqltk_parser::parser::Parser,
1241 ) -> Option<Result<Statement, sqltk_parser::parser::ParserError>> {
1242 self.0.parse_statement(parser)
1243 }
1244
1245 fn is_identifier_part(&self, ch: char) -> bool {
1246 self.0.is_identifier_part(ch)
1247 }
1248 }
1249
1250 #[allow(clippy::needless_raw_string_hashes)]
1251 let statement = r#"SELECT 'Wayne\'s World'"#;
1252 let res1 = Parser::parse_sql(&MySqlDialect {}, statement);
1253 let res2 = Parser::parse_sql(&WrappedDialect(MySqlDialect {}), statement);
1254 assert!(res1.is_ok());
1255 assert_eq!(res1, res2);
1256 }
1257}