sqltk_parser/dialect/
mod.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18mod ansi;
19mod bigquery;
20mod clickhouse;
21mod databricks;
22mod duckdb;
23mod generic;
24mod hive;
25mod mssql;
26mod mysql;
27mod postgresql;
28mod redshift;
29mod snowflake;
30mod sqlite;
31
32use core::any::{Any, TypeId};
33use core::fmt::Debug;
34use core::iter::Peekable;
35use core::str::Chars;
36
37use log::debug;
38
39pub use self::ansi::AnsiDialect;
40pub use self::bigquery::BigQueryDialect;
41pub use self::clickhouse::ClickHouseDialect;
42pub use self::databricks::DatabricksDialect;
43pub use self::duckdb::DuckDbDialect;
44pub use self::generic::GenericDialect;
45pub use self::hive::HiveDialect;
46pub use self::mssql::MsSqlDialect;
47pub use self::mysql::MySqlDialect;
48pub use self::postgresql::PostgreSqlDialect;
49pub use self::redshift::RedshiftSqlDialect;
50pub use self::snowflake::SnowflakeDialect;
51pub use self::sqlite::SQLiteDialect;
52use crate::ast::{ColumnOption, Expr, Statement};
53pub use crate::keywords;
54use crate::keywords::Keyword;
55use crate::parser::{Parser, ParserError};
56use crate::tokenizer::Token;
57
58#[cfg(not(feature = "std"))]
59use alloc::boxed::Box;
60
61/// Convenience check if a [`Parser`] uses a certain dialect.
62///
63/// Note: when possible please the new style, adding a method to the [`Dialect`]
64/// trait rather than using this macro.
65///
66/// The benefits of adding a method on `Dialect` over this macro are:
67/// 1. user defined [`Dialect`]s can customize the parsing behavior
68/// 2. The differences between dialects can be clearly documented in the trait
69///
70/// `dialect_of!(parser is SQLiteDialect |  GenericDialect)` evaluates
71/// to `true` if `parser.dialect` is one of the [`Dialect`]s specified.
72macro_rules! dialect_of {
73    ( $parsed_dialect: ident is $($dialect_type: ty)|+ ) => {
74        ($($parsed_dialect.dialect.is::<$dialect_type>())||+)
75    };
76}
77
78/// Encapsulates the differences between SQL implementations.
79///
80/// # SQL Dialects
81///
82/// SQL implementations deviate from one another, either due to
83/// custom extensions or various historical reasons. This trait
84/// encapsulates the parsing differences between dialects.
85///
86/// [`GenericDialect`] is the most permissive dialect, and parses the union of
87/// all the other dialects, when there is no ambiguity. However, it does not
88/// currently allow `CREATE TABLE` statements without types specified for all
89/// columns; use [`SQLiteDialect`] if you require that.
90///
91/// # Examples
92/// Most users create a [`Dialect`] directly, as shown on the [module
93/// level documentation]:
94///
95/// ```
96/// # use sqltk_parser::dialect::AnsiDialect;
97/// let dialect = AnsiDialect {};
98/// ```
99///
100/// It is also possible to dynamically create a [`Dialect`] from its
101/// name. For example:
102///
103/// ```
104/// # use sqltk_parser::dialect::{AnsiDialect, dialect_from_str};
105/// let dialect = dialect_from_str("ansi").unwrap();
106///
107/// // Parsed dialect is an instance of `AnsiDialect`:
108/// assert!(dialect.is::<AnsiDialect>());
109/// ```
110///
111/// [module level documentation]: crate
112pub trait Dialect: Debug + Any {
113    /// Determine the [`TypeId`] of this dialect.
114    ///
115    /// By default, return the same [`TypeId`] as [`Any::type_id`]. Can be overridden
116    /// by dialects that behave like other dialects
117    /// (for example when wrapping a dialect).
118    fn dialect(&self) -> TypeId {
119        self.type_id()
120    }
121
122    /// Determine if a character starts a quoted identifier. The default
123    /// implementation, accepting "double quoted" ids is both ANSI-compliant
124    /// and appropriate for most dialects (with the notable exception of
125    /// MySQL, MS SQL, and sqlite). You can accept one of characters listed
126    /// in `Word::matching_end_quote` here
127    fn is_delimited_identifier_start(&self, ch: char) -> bool {
128        ch == '"' || ch == '`'
129    }
130
131    /// Return the character used to quote identifiers.
132    fn identifier_quote_style(&self, _identifier: &str) -> Option<char> {
133        None
134    }
135
136    /// Determine if quoted characters are proper for identifier
137    fn is_proper_identifier_inside_quotes(&self, mut _chars: Peekable<Chars<'_>>) -> bool {
138        true
139    }
140
141    /// Determine if a character is a valid start character for an unquoted identifier
142    fn is_identifier_start(&self, ch: char) -> bool;
143
144    /// Determine if a character is a valid unquoted identifier character
145    fn is_identifier_part(&self, ch: char) -> bool;
146
147    /// Most dialects do not have custom operators. Override this method to provide custom operators.
148    fn is_custom_operator_part(&self, _ch: char) -> bool {
149        false
150    }
151
152    /// Determine if the dialect supports escaping characters via '\' in string literals.
153    ///
154    /// Some dialects like BigQuery and Snowflake support this while others like
155    /// Postgres do not. Such that the following is accepted by the former but
156    /// rejected by the latter.
157    /// ```sql
158    /// SELECT 'ab\'cd';
159    /// ```
160    ///
161    /// Conversely, such dialects reject the following statement which
162    /// otherwise would be valid in the other dialects.
163    /// ```sql
164    /// SELECT '\';
165    /// ```
166    fn supports_string_literal_backslash_escape(&self) -> bool {
167        false
168    }
169
170    /// Determine if the dialect supports string literals with `U&` prefix.
171    /// This is used to specify Unicode code points in string literals.
172    /// For example, in PostgreSQL, the following is a valid string literal:
173    /// ```sql
174    /// SELECT U&'\0061\0062\0063';
175    /// ```
176    /// This is equivalent to the string literal `'abc'`.
177    /// See
178    ///  - [Postgres docs](https://www.postgresql.org/docs/current/sql-syntax-lexical.html#SQL-SYNTAX-STRINGS-UESCAPE)
179    ///  - [H2 docs](http://www.h2database.com/html/grammar.html#string)
180    fn supports_unicode_string_literal(&self) -> bool {
181        false
182    }
183
184    /// Does the dialect support `FILTER (WHERE expr)` for aggregate queries?
185    fn supports_filter_during_aggregation(&self) -> bool {
186        false
187    }
188
189    /// Returns true if the dialect supports referencing another named window
190    /// within a window clause declaration.
191    ///
192    /// Example
193    /// ```sql
194    /// SELECT * FROM mytable
195    /// WINDOW mynamed_window AS another_named_window
196    /// ```
197    fn supports_window_clause_named_window_reference(&self) -> bool {
198        false
199    }
200
201    /// Returns true if the dialect supports `ARRAY_AGG() [WITHIN GROUP (ORDER BY)]` expressions.
202    /// Otherwise, the dialect should expect an `ORDER BY` without the `WITHIN GROUP` clause, e.g. [`ANSI`]
203    ///
204    /// [`ANSI`]: https://jakewheat.github.io/sql-overview/sql-2016-foundation-grammar.html#array-aggregate-function
205    fn supports_within_after_array_aggregation(&self) -> bool {
206        false
207    }
208
209    /// Returns true if the dialects supports `group sets, roll up, or cube` expressions.
210    fn supports_group_by_expr(&self) -> bool {
211        false
212    }
213
214    /// Returns true if the dialect supports CONNECT BY.
215    fn supports_connect_by(&self) -> bool {
216        false
217    }
218
219    /// Returns true if the dialect supports the MATCH_RECOGNIZE operation.
220    fn supports_match_recognize(&self) -> bool {
221        false
222    }
223
224    /// Returns true if the dialect supports `(NOT) IN ()` expressions
225    fn supports_in_empty_list(&self) -> bool {
226        false
227    }
228
229    /// Returns true if the dialect supports `BEGIN {DEFERRED | IMMEDIATE | EXCLUSIVE} [TRANSACTION]` statements
230    fn supports_start_transaction_modifier(&self) -> bool {
231        false
232    }
233
234    /// Returns true if the dialect supports named arguments of the form `FUN(a = '1', b = '2')`.
235    fn supports_named_fn_args_with_eq_operator(&self) -> bool {
236        false
237    }
238
239    /// Returns true if the dialect supports named arguments of the form `FUN(a : '1', b : '2')`.
240    fn supports_named_fn_args_with_colon_operator(&self) -> bool {
241        false
242    }
243
244    /// Returns true if the dialect supports named arguments of the form `FUN(a := '1', b := '2')`.
245    fn supports_named_fn_args_with_assignment_operator(&self) -> bool {
246        false
247    }
248
249    /// Returns true if the dialect supports named arguments of the form `FUN(a => '1', b => '2')`.
250    fn supports_named_fn_args_with_rarrow_operator(&self) -> bool {
251        true
252    }
253
254    /// Returns true if dialect supports argument name as arbitrary expression.
255    /// e.g. `FUN(LOWER('a'):'1',  b:'2')`
256    /// Such function arguments are represented in the AST by the `FunctionArg::ExprNamed` variant,
257    /// otherwise use the `FunctionArg::Named` variant (compatible reason).
258    fn supports_named_fn_args_with_expr_name(&self) -> bool {
259        false
260    }
261
262    /// Returns true if the dialect supports identifiers starting with a numeric
263    /// prefix such as tables named `59901_user_login`
264    fn supports_numeric_prefix(&self) -> bool {
265        false
266    }
267
268    /// Returns true if the dialects supports specifying null treatment
269    /// as part of a window function's parameter list as opposed
270    /// to after the parameter list.
271    ///
272    /// i.e The following syntax returns true
273    /// ```sql
274    /// FIRST_VALUE(a IGNORE NULLS) OVER ()
275    /// ```
276    /// while the following syntax returns false
277    /// ```sql
278    /// FIRST_VALUE(a) IGNORE NULLS OVER ()
279    /// ```
280    fn supports_window_function_null_treatment_arg(&self) -> bool {
281        false
282    }
283
284    /// Returns true if the dialect supports defining structs or objects using a
285    /// syntax like `{'x': 1, 'y': 2, 'z': 3}`.
286    fn supports_dictionary_syntax(&self) -> bool {
287        false
288    }
289
290    /// Returns true if the dialect supports defining object using the
291    /// syntax like `Map {1: 10, 2: 20}`.
292    fn support_map_literal_syntax(&self) -> bool {
293        false
294    }
295
296    /// Returns true if the dialect supports lambda functions, for example:
297    ///
298    /// ```sql
299    /// SELECT transform(array(1, 2, 3), x -> x + 1); -- returns [2,3,4]
300    /// ```
301    fn supports_lambda_functions(&self) -> bool {
302        false
303    }
304
305    /// Returns true if the dialect supports method calls, for example:
306    ///
307    /// ```sql
308    /// SELECT (SELECT ',' + name FROM sys.objects  FOR XML PATH(''), TYPE).value('.','NVARCHAR(MAX)')
309    /// ```
310    fn supports_methods(&self) -> bool {
311        false
312    }
313
314    /// Returns true if the dialect supports multiple variable assignment
315    /// using parentheses in a `SET` variable declaration.
316    ///
317    /// ```sql
318    /// SET (variable[, ...]) = (expression[, ...]);
319    /// ```
320    fn supports_parenthesized_set_variables(&self) -> bool {
321        false
322    }
323
324    /// Returns true if the dialect supports an `EXCEPT` clause following a
325    /// wildcard in a select list.
326    ///
327    /// For example
328    /// ```sql
329    /// SELECT * EXCEPT order_id FROM orders;
330    /// ```
331    fn supports_select_wildcard_except(&self) -> bool {
332        false
333    }
334
335    /// Returns true if the dialect has a CONVERT function which accepts a type first
336    /// and an expression second, e.g. `CONVERT(varchar, 1)`
337    fn convert_type_before_value(&self) -> bool {
338        false
339    }
340
341    /// Returns true if the dialect supports triple quoted string
342    /// e.g. `"""abc"""`
343    fn supports_triple_quoted_string(&self) -> bool {
344        false
345    }
346
347    /// Dialect-specific prefix parser override
348    fn parse_prefix(&self, _parser: &mut Parser) -> Option<Result<Expr, ParserError>> {
349        // return None to fall back to the default behavior
350        None
351    }
352
353    /// Does the dialect support trailing commas around the query?
354    fn supports_trailing_commas(&self) -> bool {
355        false
356    }
357
358    /// Does the dialect support parsing `LIMIT 1, 2` as `LIMIT 2 OFFSET 1`?
359    fn supports_limit_comma(&self) -> bool {
360        false
361    }
362
363    /// Does the dialect support trailing commas in the projection list?
364    fn supports_projection_trailing_commas(&self) -> bool {
365        self.supports_trailing_commas()
366    }
367
368    /// Returns true if the dialect supports double dot notation for object names
369    ///
370    /// Example
371    /// ```sql
372    /// SELECT * FROM db_name..table_name
373    /// ```
374    fn supports_object_name_double_dot_notation(&self) -> bool {
375        false
376    }
377
378    /// Return true if the dialect supports the STRUCT literal
379    ///
380    /// Example
381    /// ```sql
382    /// SELECT STRUCT(1 as one, 'foo' as foo, false)
383    /// ```
384    fn supports_struct_literal(&self) -> bool {
385        false
386    }
387
388    /// Dialect-specific infix parser override
389    ///
390    /// This method is called to parse the next infix expression.
391    ///
392    /// If `None` is returned, falls back to the default behavior.
393    fn parse_infix(
394        &self,
395        _parser: &mut Parser,
396        _expr: &Expr,
397        _precedence: u8,
398    ) -> Option<Result<Expr, ParserError>> {
399        // return None to fall back to the default behavior
400        None
401    }
402
403    /// Dialect-specific precedence override
404    ///
405    /// This method is called to get the precedence of the next token.
406    ///
407    /// If `None` is returned, falls back to the default behavior.
408    fn get_next_precedence(&self, _parser: &Parser) -> Option<Result<u8, ParserError>> {
409        // return None to fall back to the default behavior
410        None
411    }
412
413    /// Get the precedence of the next token, looking at the full token stream.
414    ///
415    /// A higher number => higher precedence
416    ///
417    /// See [`Self::get_next_precedence`] to override the behavior for just the
418    /// next token.
419    ///
420    /// The default implementation is used for many dialects, but can be
421    /// overridden to provide dialect-specific behavior.
422    fn get_next_precedence_default(&self, parser: &Parser) -> Result<u8, ParserError> {
423        if let Some(precedence) = self.get_next_precedence(parser) {
424            return precedence;
425        }
426        macro_rules! p {
427            ($precedence:ident) => {
428                self.prec_value(Precedence::$precedence)
429            };
430        }
431
432        let token = parser.peek_token();
433        debug!("get_next_precedence_full() {:?}", token);
434        match token.token {
435            Token::Word(w) if w.keyword == Keyword::OR => Ok(p!(Or)),
436            Token::Word(w) if w.keyword == Keyword::AND => Ok(p!(And)),
437            Token::Word(w) if w.keyword == Keyword::XOR => Ok(p!(Xor)),
438
439            Token::Word(w) if w.keyword == Keyword::AT => {
440                match (
441                    parser.peek_nth_token(1).token,
442                    parser.peek_nth_token(2).token,
443                ) {
444                    (Token::Word(w), Token::Word(w2))
445                        if w.keyword == Keyword::TIME && w2.keyword == Keyword::ZONE =>
446                    {
447                        Ok(p!(AtTz))
448                    }
449                    _ => Ok(self.prec_unknown()),
450                }
451            }
452
453            Token::Word(w) if w.keyword == Keyword::NOT => match parser.peek_nth_token(1).token {
454                // The precedence of NOT varies depending on keyword that
455                // follows it. If it is followed by IN, BETWEEN, or LIKE,
456                // it takes on the precedence of those tokens. Otherwise, it
457                // is not an infix operator, and therefore has zero
458                // precedence.
459                Token::Word(w) if w.keyword == Keyword::IN => Ok(p!(Between)),
460                Token::Word(w) if w.keyword == Keyword::BETWEEN => Ok(p!(Between)),
461                Token::Word(w) if w.keyword == Keyword::LIKE => Ok(p!(Like)),
462                Token::Word(w) if w.keyword == Keyword::ILIKE => Ok(p!(Like)),
463                Token::Word(w) if w.keyword == Keyword::RLIKE => Ok(p!(Like)),
464                Token::Word(w) if w.keyword == Keyword::REGEXP => Ok(p!(Like)),
465                Token::Word(w) if w.keyword == Keyword::SIMILAR => Ok(p!(Like)),
466                _ => Ok(self.prec_unknown()),
467            },
468            Token::Word(w) if w.keyword == Keyword::IS => Ok(p!(Is)),
469            Token::Word(w) if w.keyword == Keyword::IN => Ok(p!(Between)),
470            Token::Word(w) if w.keyword == Keyword::BETWEEN => Ok(p!(Between)),
471            Token::Word(w) if w.keyword == Keyword::LIKE => Ok(p!(Like)),
472            Token::Word(w) if w.keyword == Keyword::ILIKE => Ok(p!(Like)),
473            Token::Word(w) if w.keyword == Keyword::RLIKE => Ok(p!(Like)),
474            Token::Word(w) if w.keyword == Keyword::REGEXP => Ok(p!(Like)),
475            Token::Word(w) if w.keyword == Keyword::SIMILAR => Ok(p!(Like)),
476            Token::Word(w) if w.keyword == Keyword::OPERATOR => Ok(p!(Between)),
477            Token::Word(w) if w.keyword == Keyword::DIV => Ok(p!(MulDivModOp)),
478            Token::Eq
479            | Token::Lt
480            | Token::LtEq
481            | Token::Neq
482            | Token::Gt
483            | Token::GtEq
484            | Token::DoubleEq
485            | Token::Tilde
486            | Token::TildeAsterisk
487            | Token::ExclamationMarkTilde
488            | Token::ExclamationMarkTildeAsterisk
489            | Token::DoubleTilde
490            | Token::DoubleTildeAsterisk
491            | Token::ExclamationMarkDoubleTilde
492            | Token::ExclamationMarkDoubleTildeAsterisk
493            | Token::Spaceship => Ok(p!(Eq)),
494            Token::Pipe => Ok(p!(Pipe)),
495            Token::Caret | Token::Sharp | Token::ShiftRight | Token::ShiftLeft => Ok(p!(Caret)),
496            Token::Ampersand => Ok(p!(Ampersand)),
497            Token::Plus | Token::Minus => Ok(p!(PlusMinus)),
498            Token::Mul | Token::Div | Token::DuckIntDiv | Token::Mod | Token::StringConcat => {
499                Ok(p!(MulDivModOp))
500            }
501            Token::DoubleColon
502            | Token::ExclamationMark
503            | Token::LBracket
504            | Token::Overlap
505            | Token::CaretAt => Ok(p!(DoubleColon)),
506            Token::Arrow
507            | Token::LongArrow
508            | Token::HashArrow
509            | Token::HashLongArrow
510            | Token::AtArrow
511            | Token::ArrowAt
512            | Token::HashMinus
513            | Token::AtQuestion
514            | Token::AtAt
515            | Token::Question
516            | Token::QuestionAnd
517            | Token::QuestionPipe
518            | Token::CustomBinaryOperator(_) => Ok(p!(PgOther)),
519            _ => Ok(self.prec_unknown()),
520        }
521    }
522
523    /// Dialect-specific statement parser override
524    ///
525    /// This method is called to parse the next statement.
526    ///
527    /// If `None` is returned, falls back to the default behavior.
528    fn parse_statement(&self, _parser: &mut Parser) -> Option<Result<Statement, ParserError>> {
529        // return None to fall back to the default behavior
530        None
531    }
532
533    /// Dialect-specific column option parser override
534    ///
535    /// This method is called to parse the next column option.
536    ///
537    /// If `None` is returned, falls back to the default behavior.
538    fn parse_column_option(
539        &self,
540        _parser: &mut Parser,
541    ) -> Result<Option<Result<Option<ColumnOption>, ParserError>>, ParserError> {
542        // return None to fall back to the default behavior
543        Ok(None)
544    }
545
546    /// Decide the lexical Precedence of operators.
547    ///
548    /// Uses (APPROXIMATELY) <https://www.postgresql.org/docs/7.0/operators.htm#AEN2026> as a reference
549    fn prec_value(&self, prec: Precedence) -> u8 {
550        match prec {
551            Precedence::DoubleColon => 50,
552            Precedence::AtTz => 41,
553            Precedence::MulDivModOp => 40,
554            Precedence::PlusMinus => 30,
555            Precedence::Xor => 24,
556            Precedence::Ampersand => 23,
557            Precedence::Caret => 22,
558            Precedence::Pipe => 21,
559            Precedence::Between => 20,
560            Precedence::Eq => 20,
561            Precedence::Like => 19,
562            Precedence::Is => 17,
563            Precedence::PgOther => 16,
564            Precedence::UnaryNot => 15,
565            Precedence::And => 10,
566            Precedence::Or => 5,
567        }
568    }
569
570    /// Returns the precedence when the precedence is otherwise unknown
571    fn prec_unknown(&self) -> u8 {
572        0
573    }
574
575    /// Returns true if this dialect requires the `TABLE` keyword after `DESCRIBE`
576    ///
577    /// Defaults to false.
578    ///
579    /// If true, the following statement is valid: `DESCRIBE TABLE my_table`
580    /// If false, the following statements are valid: `DESCRIBE my_table` and `DESCRIBE table`
581    fn describe_requires_table_keyword(&self) -> bool {
582        false
583    }
584
585    /// Returns true if this dialect allows the `EXTRACT` function to words other than [`Keyword`].
586    fn allow_extract_custom(&self) -> bool {
587        false
588    }
589
590    /// Returns true if this dialect allows the `EXTRACT` function to use single quotes in the part being extracted.
591    fn allow_extract_single_quotes(&self) -> bool {
592        false
593    }
594
595    /// Does the dialect support with clause in create index statement?
596    /// e.g. `CREATE INDEX idx ON t WITH (key = value, key2)`
597    fn supports_create_index_with_clause(&self) -> bool {
598        false
599    }
600
601    /// Whether `INTERVAL` expressions require units (called "qualifiers" in the ANSI SQL spec) to be specified,
602    /// e.g. `INTERVAL 1 DAY` vs `INTERVAL 1`.
603    ///
604    /// Expressions within intervals (e.g. `INTERVAL '1' + '1' DAY`) are only allowed when units are required.
605    ///
606    /// See <https://github.com/sqlparser-rs/sqlparser-rs/pull/1398> for more information.
607    ///
608    /// When `true`:
609    /// * `INTERVAL '1' DAY` is VALID
610    /// * `INTERVAL 1 + 1 DAY` is VALID
611    /// * `INTERVAL '1' + '1' DAY` is VALID
612    /// * `INTERVAL '1'` is INVALID
613    ///
614    /// When `false`:
615    /// * `INTERVAL '1'` is VALID
616    /// * `INTERVAL '1' DAY` is VALID — unit is not required, but still allowed
617    /// * `INTERVAL 1 + 1 DAY` is INVALID
618    fn require_interval_qualifier(&self) -> bool {
619        false
620    }
621
622    fn supports_explain_with_utility_options(&self) -> bool {
623        false
624    }
625
626    fn supports_asc_desc_in_column_definition(&self) -> bool {
627        false
628    }
629
630    /// Returns true if the dialect supports `a!` expressions
631    fn supports_factorial_operator(&self) -> bool {
632        false
633    }
634
635    /// Returns true if this dialect supports treating the equals operator `=` within a `SelectItem`
636    /// as an alias assignment operator, rather than a boolean expression.
637    /// For example: the following statements are equivalent for such a dialect:
638    /// ```sql
639    ///  SELECT col_alias = col FROM tbl;
640    ///  SELECT col_alias AS col FROM tbl;
641    /// ```
642    fn supports_eq_alias_assignment(&self) -> bool {
643        false
644    }
645
646    /// Returns true if this dialect supports the `TRY_CONVERT` function
647    fn supports_try_convert(&self) -> bool {
648        false
649    }
650
651    /// Returns true if the dialect supports `!a` syntax for boolean `NOT` expressions.
652    fn supports_bang_not_operator(&self) -> bool {
653        false
654    }
655
656    /// Returns true if the dialect supports the `LISTEN`, `UNLISTEN` and `NOTIFY` statements
657    fn supports_listen_notify(&self) -> bool {
658        false
659    }
660
661    /// Returns true if the dialect supports the `LOAD DATA` statement
662    fn supports_load_data(&self) -> bool {
663        false
664    }
665
666    /// Returns true if the dialect supports the `LOAD extension` statement
667    fn supports_load_extension(&self) -> bool {
668        false
669    }
670
671    /// Returns true if this dialect expects the `TOP` option
672    /// before the `ALL`/`DISTINCT` options in a `SELECT` statement.
673    fn supports_top_before_distinct(&self) -> bool {
674        false
675    }
676
677    /// Returns true if the dialect supports boolean literals (`true` and `false`).
678    /// For example, in MSSQL these are treated as identifiers rather than boolean literals.
679    fn supports_boolean_literals(&self) -> bool {
680        true
681    }
682
683    /// Returns true if this dialect supports the `LIKE 'pattern'` option in
684    /// a `SHOW` statement before the `IN` option
685    fn supports_show_like_before_in(&self) -> bool {
686        false
687    }
688
689    /// Returns true if this dialect supports the `COMMENT` statement
690    fn supports_comment_on(&self) -> bool {
691        false
692    }
693
694    /// Returns true if the dialect supports the `CREATE TABLE SELECT` statement
695    fn supports_create_table_select(&self) -> bool {
696        false
697    }
698
699    /// Returns true if the dialect supports PartiQL for querying semi-structured data
700    /// <https://partiql.org/index.html>
701    fn supports_partiql(&self) -> bool {
702        false
703    }
704
705    /// Returns true if the specified keyword is reserved and cannot be
706    /// used as an identifier without special handling like quoting.
707    fn is_reserved_for_identifier(&self, kw: Keyword) -> bool {
708        keywords::RESERVED_FOR_IDENTIFIER.contains(&kw)
709    }
710}
711
712/// This represents the operators for which precedence must be defined
713///
714/// higher number -> higher precedence
715#[derive(Debug, Clone, Copy)]
716pub enum Precedence {
717    DoubleColon,
718    AtTz,
719    MulDivModOp,
720    PlusMinus,
721    Xor,
722    Ampersand,
723    Caret,
724    Pipe,
725    Between,
726    Eq,
727    Like,
728    Is,
729    PgOther,
730    UnaryNot,
731    And,
732    Or,
733}
734
735impl dyn Dialect {
736    #[inline]
737    pub fn is<T: Dialect>(&self) -> bool {
738        // borrowed from `Any` implementation
739        TypeId::of::<T>() == self.dialect()
740    }
741}
742
743/// Returns the built in [`Dialect`] corresponding to `dialect_name`.
744///
745/// See [`Dialect`] documentation for an example.
746pub fn dialect_from_str(dialect_name: impl AsRef<str>) -> Option<Box<dyn Dialect>> {
747    let dialect_name = dialect_name.as_ref();
748    match dialect_name.to_lowercase().as_str() {
749        "generic" => Some(Box::new(GenericDialect)),
750        "mysql" => Some(Box::new(MySqlDialect {})),
751        "postgresql" | "postgres" => Some(Box::new(PostgreSqlDialect {})),
752        "hive" => Some(Box::new(HiveDialect {})),
753        "sqlite" => Some(Box::new(SQLiteDialect {})),
754        "snowflake" => Some(Box::new(SnowflakeDialect)),
755        "redshift" => Some(Box::new(RedshiftSqlDialect {})),
756        "mssql" => Some(Box::new(MsSqlDialect {})),
757        "clickhouse" => Some(Box::new(ClickHouseDialect {})),
758        "bigquery" => Some(Box::new(BigQueryDialect)),
759        "ansi" => Some(Box::new(AnsiDialect {})),
760        "duckdb" => Some(Box::new(DuckDbDialect {})),
761        "databricks" => Some(Box::new(DatabricksDialect {})),
762        _ => None,
763    }
764}
765
766#[cfg(test)]
767mod tests {
768    use super::*;
769
770    struct DialectHolder<'a> {
771        dialect: &'a dyn Dialect,
772    }
773
774    #[test]
775    fn test_is_dialect() {
776        let generic_dialect: &dyn Dialect = &GenericDialect {};
777        let ansi_dialect: &dyn Dialect = &AnsiDialect {};
778
779        let generic_holder = DialectHolder {
780            dialect: generic_dialect,
781        };
782        let ansi_holder = DialectHolder {
783            dialect: ansi_dialect,
784        };
785
786        assert!(dialect_of!(generic_holder is GenericDialect |  AnsiDialect),);
787        assert!(!dialect_of!(generic_holder is  AnsiDialect));
788        assert!(dialect_of!(ansi_holder is AnsiDialect));
789        assert!(dialect_of!(ansi_holder is GenericDialect | AnsiDialect));
790        assert!(!dialect_of!(ansi_holder is GenericDialect | MsSqlDialect));
791    }
792
793    #[test]
794    fn test_dialect_from_str() {
795        assert!(parse_dialect("generic").is::<GenericDialect>());
796        assert!(parse_dialect("mysql").is::<MySqlDialect>());
797        assert!(parse_dialect("MySql").is::<MySqlDialect>());
798        assert!(parse_dialect("postgresql").is::<PostgreSqlDialect>());
799        assert!(parse_dialect("postgres").is::<PostgreSqlDialect>());
800        assert!(parse_dialect("hive").is::<HiveDialect>());
801        assert!(parse_dialect("sqlite").is::<SQLiteDialect>());
802        assert!(parse_dialect("snowflake").is::<SnowflakeDialect>());
803        assert!(parse_dialect("SnowFlake").is::<SnowflakeDialect>());
804        assert!(parse_dialect("MsSql").is::<MsSqlDialect>());
805        assert!(parse_dialect("clickhouse").is::<ClickHouseDialect>());
806        assert!(parse_dialect("ClickHouse").is::<ClickHouseDialect>());
807        assert!(parse_dialect("bigquery").is::<BigQueryDialect>());
808        assert!(parse_dialect("BigQuery").is::<BigQueryDialect>());
809        assert!(parse_dialect("ansi").is::<AnsiDialect>());
810        assert!(parse_dialect("ANSI").is::<AnsiDialect>());
811        assert!(parse_dialect("duckdb").is::<DuckDbDialect>());
812        assert!(parse_dialect("DuckDb").is::<DuckDbDialect>());
813        assert!(parse_dialect("DataBricks").is::<DatabricksDialect>());
814        assert!(parse_dialect("databricks").is::<DatabricksDialect>());
815
816        // error cases
817        assert!(dialect_from_str("Unknown").is_none());
818        assert!(dialect_from_str("").is_none());
819    }
820
821    fn parse_dialect(v: &str) -> Box<dyn Dialect> {
822        dialect_from_str(v).unwrap()
823    }
824
825    #[test]
826    fn identifier_quote_style() {
827        let tests: Vec<(&dyn Dialect, &str, Option<char>)> = vec![
828            (&GenericDialect {}, "id", None),
829            (&SQLiteDialect {}, "id", Some('`')),
830            (&PostgreSqlDialect {}, "id", Some('"')),
831        ];
832
833        for (dialect, ident, expected) in tests {
834            let actual = dialect.identifier_quote_style(ident);
835
836            assert_eq!(actual, expected);
837        }
838    }
839
840    #[test]
841    fn parse_with_wrapped_dialect() {
842        /// Wrapper for a dialect. In a real-world example, this wrapper
843        /// would tweak the behavior of the dialect. For the test case,
844        /// it wraps all methods unaltered.
845        #[derive(Debug)]
846        struct WrappedDialect(MySqlDialect);
847
848        impl Dialect for WrappedDialect {
849            fn dialect(&self) -> std::any::TypeId {
850                self.0.dialect()
851            }
852
853            fn is_identifier_start(&self, ch: char) -> bool {
854                self.0.is_identifier_start(ch)
855            }
856
857            fn is_delimited_identifier_start(&self, ch: char) -> bool {
858                self.0.is_delimited_identifier_start(ch)
859            }
860
861            fn identifier_quote_style(&self, identifier: &str) -> Option<char> {
862                self.0.identifier_quote_style(identifier)
863            }
864
865            fn supports_string_literal_backslash_escape(&self) -> bool {
866                self.0.supports_string_literal_backslash_escape()
867            }
868
869            fn is_proper_identifier_inside_quotes(
870                &self,
871                chars: std::iter::Peekable<std::str::Chars<'_>>,
872            ) -> bool {
873                self.0.is_proper_identifier_inside_quotes(chars)
874            }
875
876            fn supports_filter_during_aggregation(&self) -> bool {
877                self.0.supports_filter_during_aggregation()
878            }
879
880            fn supports_within_after_array_aggregation(&self) -> bool {
881                self.0.supports_within_after_array_aggregation()
882            }
883
884            fn supports_group_by_expr(&self) -> bool {
885                self.0.supports_group_by_expr()
886            }
887
888            fn supports_in_empty_list(&self) -> bool {
889                self.0.supports_in_empty_list()
890            }
891
892            fn convert_type_before_value(&self) -> bool {
893                self.0.convert_type_before_value()
894            }
895
896            fn parse_prefix(
897                &self,
898                parser: &mut sqltk_parser::parser::Parser,
899            ) -> Option<Result<Expr, sqltk_parser::parser::ParserError>> {
900                self.0.parse_prefix(parser)
901            }
902
903            fn parse_infix(
904                &self,
905                parser: &mut sqltk_parser::parser::Parser,
906                expr: &Expr,
907                precedence: u8,
908            ) -> Option<Result<Expr, sqltk_parser::parser::ParserError>> {
909                self.0.parse_infix(parser, expr, precedence)
910            }
911
912            fn get_next_precedence(
913                &self,
914                parser: &sqltk_parser::parser::Parser,
915            ) -> Option<Result<u8, sqltk_parser::parser::ParserError>> {
916                self.0.get_next_precedence(parser)
917            }
918
919            fn parse_statement(
920                &self,
921                parser: &mut sqltk_parser::parser::Parser,
922            ) -> Option<Result<Statement, sqltk_parser::parser::ParserError>> {
923                self.0.parse_statement(parser)
924            }
925
926            fn is_identifier_part(&self, ch: char) -> bool {
927                self.0.is_identifier_part(ch)
928            }
929        }
930
931        #[allow(clippy::needless_raw_string_hashes)]
932        let statement = r#"SELECT 'Wayne\'s World'"#;
933        let res1 = Parser::parse_sql(&MySqlDialect {}, statement);
934        let res2 = Parser::parse_sql(&WrappedDialect(MySqlDialect {}), statement);
935        assert!(res1.is_ok());
936        assert_eq!(res1, res2);
937    }
938}
sqltk_parser/dialect/mod.rs

sqltk_parser/dialect/
mod.rs