sqruff_lib_dialects/
databricks.rs

1use crate::databricks_keywords::{RESERVED_KEYWORDS, UNRESERVED_KEYWORDS};
2use crate::sparksql;
3use sqruff_lib_core::helpers::Config;
4use sqruff_lib_core::parser::grammar::anyof::one_of;
5use sqruff_lib_core::parser::grammar::delimited::Delimited;
6use sqruff_lib_core::parser::grammar::sequence::Bracketed;
7use sqruff_lib_core::parser::matchable::MatchableTrait;
8use sqruff_lib_core::parser::segments::meta::MetaSegment;
9use sqruff_lib_core::{
10    dialects::{Dialect, init::DialectKind},
11    helpers::ToMatchable,
12    parser::grammar::{Ref, sequence::Sequence},
13    vec_of_erased,
14};
15
16pub fn dialect() -> Dialect {
17    let raw_sparksql = sparksql::raw_dialect();
18
19    let mut databricks = sparksql::raw_dialect();
20    databricks.name = DialectKind::Databricks;
21
22    databricks
23        .sets_mut("unreserved_keywords")
24        .extend(UNRESERVED_KEYWORDS);
25    databricks
26        .sets_mut("unreserved_keywords")
27        .extend(raw_sparksql.sets("reserved_keywords"));
28    databricks
29        .sets_mut("unreserved_keywords")
30        .retain(|x| !RESERVED_KEYWORDS.contains(x));
31    databricks.sets_mut("reserved_keywords").clear();
32    databricks
33        .sets_mut("reserved_keywords")
34        .extend(RESERVED_KEYWORDS);
35
36    databricks
37        .sets_mut("date_part_function_name")
38        .extend(["TIMEDIFF"]);
39
40    databricks.add([
41        (
42            "PrincipalIdentifierSegment".into(),
43            one_of(vec_of_erased![
44                Ref::new("NakedIdentifierSegment"),
45                Ref::new("BackQuotedIdentifierSegment"),
46            ])
47            .to_matchable()
48            .into(),
49        ),
50        (
51            "SetOwnerGrammar".into(),
52            Sequence::new(vec_of_erased![
53                Ref::keyword("SET").optional(),
54                Ref::keyword("OWNER"),
55                Ref::keyword("TO"),
56                Ref::new("PrincipalIdentifierSegment"),
57            ])
58            .to_matchable()
59            .into(),
60        ),
61        // A reference to a catalog.
62        // https://docs.databricks.com/data-governance/unity-catalog/create-catalogs.html
63        (
64            "CatalogReferenceSegment".into(),
65            Ref::new("ObjectReferenceSegment").to_matchable().into(),
66        ),
67        // An `ALTER CATALOG` statement.
68        // https://docs.databricks.com/sql/language-manual/sql-ref-syntax-ddl-alter-catalog.html
69        (
70            "AlterCatalogStatementSegment".into(),
71            Sequence::new(vec_of_erased![
72                Ref::keyword("ALTER"),
73                Ref::keyword("CATALOG"),
74                Ref::new("CatalogReferenceSegment"),
75                Ref::new("SetOwnerGrammar"),
76            ])
77            .to_matchable()
78            .into(),
79        ),
80        // A `CREATE CATALOG` statement.
81        // https://docs.databricks.com/sql/language-manual/sql-ref-syntax-ddl-create-catalog.html
82        (
83            "CreateCatalogStatementSegment".into(),
84            Sequence::new(vec_of_erased![
85                Ref::keyword("CREATE"),
86                Ref::keyword("CATALOG"),
87                Ref::new("IfNotExistsGrammar").optional(),
88                Ref::new("CatalogReferenceSegment"),
89                Ref::new("CommentGrammar").optional(),
90            ])
91            .to_matchable()
92            .into(),
93        ),
94        // A `DROP CATALOG` statement.
95        // https://docs.databricks.com/sql/language-manual/sql-ref-syntax-ddl-drop-catalog.html
96        (
97            "DropCatalogStatementSegment".into(),
98            Sequence::new(vec_of_erased![
99                Ref::keyword("DROP"),
100                Ref::keyword("CATALOG"),
101                Ref::new("IfExistsGrammar").optional(),
102                Ref::new("CatalogReferenceSegment"),
103                Ref::new("DropBehaviorGrammar").optional(),
104            ])
105            .to_matchable()
106            .into(),
107        ),
108        // A `USE CATALOG` statement.
109        // https://docs.databricks.com/sql/language-manual/sql-ref-syntax-ddl-use-catalog.html
110        (
111            "UseCatalogStatementSegment".into(),
112            Sequence::new(vec_of_erased![
113                Ref::keyword("USE"),
114                Ref::keyword("CATALOG"),
115                Ref::new("CatalogReferenceSegment"),
116            ])
117            .to_matchable()
118            .into(),
119        ),
120        // A `USE DATABASE` statement.
121        // https://docs.databricks.com/sql/language-manual/sql-ref-syntax-ddl-usedb.html
122        (
123            "UseDatabaseStatementSegment".into(),
124            Sequence::new(vec_of_erased![
125                Ref::keyword("USE"),
126                one_of(vec_of_erased![
127                    Ref::keyword("DATABASE"),
128                    Ref::keyword("SCHEMA"),
129                ])
130                .config(|config| {
131                    config.optional();
132                }),
133                Ref::new("DatabaseReferenceSegment"),
134            ])
135            .to_matchable()
136            .into(),
137        ),
138        // A `SET TIME ZONE` statement.
139        // https://docs.databricks.com/sql/language-manual/sql-ref-syntax-aux-conf-mgmt-set-timezone.html
140        (
141            "SetTimeZoneStatementSegment".into(),
142            Sequence::new(vec_of_erased![
143                Ref::keyword("SET"),
144                Ref::keyword("TIME"),
145                Ref::keyword("ZONE"),
146                one_of(vec_of_erased![
147                    Ref::keyword("LOCAL"),
148                    Ref::new("QuotedLiteralSegment"),
149                    Ref::new("IntervalExpressionSegment")
150                ]),
151            ])
152            .to_matchable()
153            .into(),
154        ),
155        // An `OPTIMIZE` statement.
156        // https://docs.databricks.com/en/sql/language-manual/delta-optimize.html
157        (
158            "OptimizeTableStatementSegment".into(),
159            Sequence::new(vec_of_erased![
160                Ref::keyword("OPTIMIZE"),
161                Ref::new("TableReferenceSegment"),
162                Sequence::new(vec_of_erased![
163                    Ref::keyword("WHERE"),
164                    Ref::new("ExpressionSegment"),
165                ])
166                .config(|config| {
167                    config.optional();
168                }),
169                Sequence::new(vec_of_erased![
170                    Ref::keyword("ZORDER"),
171                    Ref::keyword("BY"),
172                    Bracketed::new(vec_of_erased![Delimited::new(vec_of_erased![Ref::new(
173                        "ColumnReferenceSegment"
174                    )])]),
175                ])
176                .config(|config| {
177                    config.optional();
178                }),
179            ])
180            .to_matchable()
181            .into(),
182        ),
183        (
184            // A reference to a database.
185            "DatabaseReferenceSegment".into(),
186            Ref::new("ObjectReferenceSegment").to_matchable().into(),
187        ),
188        (
189            // A reference to an table, CTE, subquery or alias.
190            "TableReferenceSegment".into(),
191            Ref::new("ObjectReferenceSegment").to_matchable().into(),
192        ),
193        (
194            // A reference to a schema.
195            "SchemaReferenceSegment".into(),
196            Ref::new("ObjectReferenceSegment").to_matchable().into(),
197        ),
198        (
199            "IdentifierClauseSegment".into(),
200            Sequence::new(vec_of_erased![
201                Ref::keyword("IDENTIFIER"),
202                Bracketed::new(vec_of_erased![Ref::new("SingleIdentifierGrammar")]),
203            ])
204            .to_matchable()
205            .into(),
206        ),
207        (
208            // Drop Volume Statement.
209            // https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-ddl-drop-volume.html
210            "DropVolumeStatementSegment".into(),
211            Sequence::new(vec_of_erased![
212                Ref::keyword("DROP"),
213                Ref::keyword("VOLUME"),
214                Ref::new("IfExistsGrammar").optional(),
215                Ref::new("VolumeReferenceSegment"),
216            ])
217            .to_matchable()
218            .into(),
219        ),
220        (
221            "VolumeReferenceSegment".into(),
222            Ref::new("ObjectReferenceSegment").to_matchable().into(),
223        ),
224        (
225            // https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-aux-describe-volume.html
226            "DescribeObjectGrammar".into(),
227            sparksql::dialect()
228                .grammar("DescribeObjectGrammar")
229                .copy(
230                    Some(vec_of_erased![Sequence::new(vec_of_erased![
231                        Ref::keyword("VOLUME"),
232                        Ref::new("VolumeReferenceSegment"),
233                    ])]),
234                    Some(0),
235                    None,
236                    None,
237                    Vec::new(),
238                    false,
239                )
240                .into(),
241        ),
242        (
243            // A `DECLARE [OR REPLACE] VARIABLE` statement.
244            // https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-ddl-declare-variable.html
245            "DeclareOrReplaceVariableStatementSegment".into(),
246            Sequence::new(vec_of_erased![
247                Ref::keyword("DECLARE"),
248                Ref::new("OrReplaceGrammar").optional(),
249                Ref::keyword("VARIABLE").optional(),
250                Ref::new("SingleIdentifierGrammar"),
251                Ref::new("DatatypeSegment").optional(),
252                Sequence::new(vec_of_erased![
253                    one_of(vec_of_erased![
254                        Ref::keyword("DEFAULT"),
255                        Ref::new("EqualsSegment")
256                    ]),
257                    Ref::new("ExpressionSegment"),
258                ])
259                .config(|config| {
260                    config.optional();
261                }),
262            ])
263            .to_matchable()
264            .into(),
265        ),
266        // `COMMENT ON` statement.
267        // https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-ddl-comment.html
268        (
269            "CommentOnStatementSegment".into(),
270            Sequence::new(vec_of_erased![
271                Ref::keyword("COMMENT"),
272                Ref::keyword("ON"),
273                one_of(vec_of_erased![
274                    Sequence::new(vec_of_erased![
275                        Ref::keyword("CATALOG"),
276                        Ref::new("CatalogReferenceSegment"),
277                    ]),
278                    Sequence::new(vec_of_erased![
279                        one_of(vec_of_erased![
280                            Ref::keyword("DATABASE"),
281                            Ref::keyword("SCHEMA")
282                        ]),
283                        Ref::new("DatabaseReferenceSegment"),
284                    ]),
285                    Sequence::new(vec_of_erased![
286                        Ref::keyword("TABLE"),
287                        Ref::new("TableReferenceSegment"),
288                    ]),
289                    Sequence::new(vec_of_erased![
290                        Ref::keyword("VOLUME"),
291                        Ref::new("VolumeReferenceSegment"),
292                    ]),
293                    // TODO Split out individual items if they have references
294                    Sequence::new(vec_of_erased![
295                        one_of(vec_of_erased![
296                            Ref::keyword("CONNECTION"),
297                            Ref::keyword("PROVIDER"),
298                            Ref::keyword("RECIPIENT"),
299                            Ref::keyword("SHARE"),
300                        ]),
301                        Ref::new("ObjectReferenceSegment"),
302                    ]),
303                ]),
304                Ref::keyword("IS"),
305                one_of(vec_of_erased![
306                    Ref::new("QuotedLiteralSegment"),
307                    Ref::keyword("NULL"),
308                ]),
309            ])
310            .to_matchable()
311            .into(),
312        ),
313        // https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-aux-show-schemas.html
314        // Differences between this and the SparkSQL version:
315        // - Support for `FROM`|`IN` at the catalog level
316        // - `LIKE` keyword is optional
317        (
318            "ShowDatabasesSchemasGrammar".into(),
319            Sequence::new(vec_of_erased![
320                one_of(vec_of_erased![
321                    Ref::keyword("DATABASES"),
322                    Ref::keyword("SCHEMAS"),
323                ]),
324                Sequence::new(vec_of_erased![
325                    one_of(vec_of_erased![Ref::keyword("FROM"), Ref::keyword("IN"),]),
326                    Ref::new("DatabaseReferenceSegment"),
327                ])
328                .config(|config| {
329                    config.optional();
330                }),
331                Sequence::new(vec_of_erased![
332                    Ref::keyword("LIKE").optional(),
333                    Ref::new("QuotedLiteralSegment"),
334                ])
335                .config(|config| {
336                    config.optional();
337                }),
338            ])
339            .to_matchable()
340            .into(),
341        ),
342        // https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-aux-show-schemas.html
343        // Differences between this and the SparkSQL version:
344        // - Support for `FROM`|`IN` at the catalog level
345        // - `LIKE` keyword is optional
346        (
347            "ShowDatabasesSchemasGrammar".into(),
348            Sequence::new(vec_of_erased![
349                one_of(vec_of_erased![
350                    Ref::keyword("DATABASES"),
351                    Ref::keyword("SCHEMAS"),
352                ]),
353                Sequence::new(vec_of_erased![
354                    one_of(vec_of_erased![Ref::keyword("FROM"), Ref::keyword("IN"),]),
355                    Ref::new("DatabaseReferenceSegment"),
356                ])
357                .config(|config| {
358                    config.optional();
359                }),
360                Sequence::new(vec_of_erased![
361                    Ref::keyword("LIKE").optional(),
362                    Ref::new("QuotedLiteralSegment"),
363                ])
364                .config(|config| {
365                    config.optional();
366                }),
367            ])
368            .to_matchable()
369            .into(),
370        ),
371        // Show Functions Statement
372        // https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-aux-show-functions.html
373        //
374        // Represents the grammar part after the show
375        //
376        // Differences between this and the SparkSQL version:
377        // - Support for `FROM`|`IN` at the schema level
378        // - `LIKE` keyword is optional
379        (
380            "ShowFunctionsGrammar".into(),
381            Sequence::new(vec_of_erased![
382                one_of(vec_of_erased![
383                    Ref::keyword("USER"),
384                    Ref::keyword("SYSTEM"),
385                    Ref::keyword("ALL"),
386                ])
387                .config(|config| {
388                    config.optional();
389                }),
390                Ref::keyword("FUNCTIONS"),
391                Sequence::new(vec_of_erased![
392                    Sequence::new(vec_of_erased![
393                        one_of(vec_of_erased![Ref::keyword("FROM"), Ref::keyword("IN")]),
394                        Ref::new("DatabaseReferenceSegment"),
395                    ])
396                    .config(|config| {
397                        config.optional();
398                    }),
399                    Sequence::new(vec_of_erased![
400                        Ref::keyword("LIKE").optional(),
401                        one_of(vec_of_erased![
402                            // qualified function from a database
403                            Sequence::new(vec_of_erased![
404                                Ref::new("DatabaseReferenceSegment"),
405                                Ref::new("DotSegment"),
406                                Ref::new("FunctionNameSegment"),
407                            ])
408                            .config(|config| {
409                                config.disallow_gaps();
410                            }),
411                            // non-qualified function
412                            Ref::new("FunctionNameSegment"),
413                            // Regex/like string
414                            Ref::new("QuotedLiteralSegment"),
415                        ]),
416                    ])
417                    .config(|config| {
418                        config.optional();
419                    }),
420                ])
421                .config(|config| {
422                    config.optional();
423                }),
424            ])
425            .to_matchable()
426            .into(),
427        ),
428        //     # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-aux-show-tables.html
429        //     # Differences between this and the SparkSQL version:
430        //     # - `LIKE` keyword is optional
431        (
432            "ShowTablesGrammar".into(),
433            Sequence::new(vec_of_erased![
434                Ref::keyword("TABLES"),
435                Sequence::new(vec_of_erased![
436                    one_of(vec_of_erased![Ref::keyword("FROM"), Ref::keyword("IN")]),
437                    Ref::new("DatabaseReferenceSegment"),
438                ])
439                .config(|config| {
440                    config.optional();
441                }),
442                Sequence::new(vec_of_erased![
443                    Ref::keyword("LIKE").optional(),
444                    Ref::new("QuotedLiteralSegment"),
445                ])
446                .config(|config| {
447                    config.optional();
448                }),
449            ])
450            .to_matchable()
451            .into(),
452        ),
453        // https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-aux-show-views.html
454        // Only difference between this and the SparkSQL version:
455        // - `LIKE` keyword is optional
456        (
457            "ShowViewsGrammar".into(),
458            Sequence::new(vec_of_erased![
459                Ref::keyword("VIEWS"),
460                Sequence::new(vec_of_erased![
461                    one_of(vec_of_erased![Ref::keyword("FROM"), Ref::keyword("IN")]),
462                    Ref::new("DatabaseReferenceSegment"),
463                ])
464                .config(|config| {
465                    config.optional();
466                }),
467                Sequence::new(vec_of_erased![
468                    Ref::keyword("LIKE").optional(),
469                    Ref::new("QuotedLiteralSegment"),
470                ])
471                .config(|config| {
472                    config.optional();
473                }),
474            ])
475            .to_matchable()
476            .into(),
477        ),
478        // https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-aux-show-volumes.html
479        (
480            "ShowObjectGrammar".into(),
481            sparksql::raw_dialect()
482                .grammar("ShowObjectGrammar")
483                .copy(
484                    Some(vec_of_erased![Sequence::new(vec_of_erased![
485                        Ref::keyword("VOLUMES"),
486                        Sequence::new(vec_of_erased![
487                            one_of(vec_of_erased![Ref::keyword("FROM"), Ref::keyword("IN")]),
488                            Ref::new("DatabaseReferenceSegment"),
489                        ])
490                        .config(|config| {
491                            config.optional();
492                        }),
493                        Sequence::new(vec_of_erased![
494                            Ref::keyword("LIKE").optional(),
495                            Ref::new("QuotedLiteralSegment"),
496                        ])
497                        .config(|config| {
498                            config.optional();
499                        }),
500                    ])]),
501                    None,
502                    None,
503                    None,
504                    Vec::new(),
505                    false,
506                )
507                .into(),
508        ),
509        // https://docs.databricks.com/aws/en/sql/language-manual/sql-ref-syntax-dml-insert-into#insert-using-the-by-name-clause
510        (
511            "InsertBracketedColumnReferenceListGrammar".into(),
512            one_of(vec_of_erased![
513                Ref::new("BracketedColumnReferenceListGrammar"),
514                Sequence::new(vec_of_erased![Ref::keyword("BY"), Ref::keyword("NAME"),]),
515            ])
516            .to_matchable()
517            .into(),
518        ),
519    ]);
520
521    // A reference to an object.
522    databricks.replace_grammar(
523        "ObjectReferenceSegment",
524        Delimited::new(vec_of_erased![
525            one_of(vec_of_erased![
526                Ref::new("SingleIdentifierGrammar"),
527                Ref::new("IdentifierClauseSegment"),
528            ]),
529            Ref::new("ObjectReferenceDelimiterGrammar"),
530        ])
531        .config(|config| {
532            config.delimiter(Ref::new("ObjectReferenceDelimiterGrammar"));
533            config.terminators = vec_of_erased![Ref::new("ObjectReferenceTerminatorGrammar")];
534            config.disallow_gaps();
535        })
536        .to_matchable(),
537    );
538
539    // The main table expression e.g. within a FROM clause.
540    // Enhance to allow for additional clauses allowed in Spark and Delta Lake.
541    databricks.replace_grammar(
542        "TableExpressionSegment",
543        sparksql::dialect()
544            .grammar("TableExpressionSegment")
545            .match_grammar(&databricks)
546            .unwrap()
547            .copy(
548                Some(vec_of_erased![Ref::new("IdentifierClauseSegment")]),
549                None,
550                Some(Ref::new("ValuesClauseSegment").to_matchable()),
551                None,
552                Vec::new(),
553                false,
554            ),
555    );
556
557    // Override statement segment
558    databricks.replace_grammar(
559        "StatementSegment",
560        raw_sparksql
561            .grammar("StatementSegment")
562            .match_grammar(&databricks)
563            .unwrap()
564            .copy(
565                Some(vec_of_erased![
566                    Ref::new("AlterCatalogStatementSegment"),
567                    Ref::new("CreateCatalogStatementSegment"),
568                    Ref::new("DropCatalogStatementSegment"),
569                    Ref::new("UseCatalogStatementSegment"),
570                    Ref::new("DropVolumeStatementSegment"),
571                    Ref::new("SetTimeZoneStatementSegment"),
572                    Ref::new("OptimizeTableStatementSegment"),
573                    Ref::new("CommentOnStatementSegment"),
574                    Ref::new("DeclareOrReplaceVariableStatementSegment"),
575                ]),
576                None,
577                None,
578                None,
579                Vec::new(),
580                false,
581            ),
582    );
583
584    // Enhance `GROUP BY` clause like in `SELECT` for `CUBE`, `ROLLUP`, and `ALL`.
585    // https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-groupby.html
586    databricks.replace_grammar(
587        "GroupByClauseSegment",
588        Sequence::new(vec_of_erased![
589            Ref::keyword("GROUP"),
590            Ref::keyword("BY"),
591            MetaSegment::indent(),
592            one_of(vec_of_erased![
593                Ref::keyword("ALL"),
594                Delimited::new(vec_of_erased![
595                    Ref::new("CubeRollupClauseSegment"),
596                    Ref::new("GroupingSetsClauseSegment"),
597                    Ref::new("ColumnReferenceSegment"),
598                    // Can `GROUP BY 1`
599                    Ref::new("NumericLiteralSegment").optional(),
600                    // Can `GROUP BY coalesce(col, 1)`
601                    Ref::new("ExpressionSegment").optional(),
602                ]),
603                Sequence::new(vec_of_erased![
604                    Delimited::new(vec_of_erased![
605                        Ref::new("ColumnReferenceSegment"),
606                        // Can `GROUP BY 1`
607                        Ref::new("NumericLiteralSegment").optional(),
608                        // Can `GROUP BY coalesce(col, 1)`
609                        Ref::new("ExpressionSegment").optional(),
610                    ]),
611                    one_of(vec_of_erased![
612                        Ref::new("WithCubeRollupClauseSegment"),
613                        Ref::new("GroupingSetsClauseSegment"),
614                    ]),
615                ]),
616            ]),
617            MetaSegment::dedent(),
618        ])
619        .to_matchable(),
620    );
621
622    databricks.expand();
623    databricks
624}