sqruff_lib_dialects/
databricks.rs

1use crate::databricks_keywords::{RESERVED_KEYWORDS, UNRESERVED_KEYWORDS};
2use crate::sparksql;
3use sqruff_lib_core::helpers::Config;
4use sqruff_lib_core::parser::grammar::anyof::one_of;
5use sqruff_lib_core::parser::grammar::delimited::Delimited;
6use sqruff_lib_core::parser::grammar::sequence::Bracketed;
7use sqruff_lib_core::parser::matchable::MatchableTrait;
8use sqruff_lib_core::{
9    dialects::{base::Dialect, init::DialectKind},
10    helpers::ToMatchable,
11    parser::grammar::{base::Ref, sequence::Sequence},
12    vec_of_erased,
13};
14
15pub fn dialect() -> Dialect {
16    let raw_sparksql = sparksql::raw_dialect();
17
18    let mut databricks = sparksql::raw_dialect();
19    databricks.name = DialectKind::Databricks;
20
21    databricks
22        .sets_mut("unreserved_keywords")
23        .extend(UNRESERVED_KEYWORDS);
24    databricks
25        .sets_mut("unreserved_keywords")
26        .extend(raw_sparksql.sets("reserved_keywords"));
27    databricks
28        .sets_mut("unreserved_keywords")
29        .retain(|x| !RESERVED_KEYWORDS.contains(x));
30    databricks.sets_mut("reserved_keywords").clear();
31    databricks
32        .sets_mut("reserved_keywords")
33        .extend(RESERVED_KEYWORDS);
34
35    databricks
36        .sets_mut("date_part_function_name")
37        .extend(["TIMEDIFF"]);
38
39    databricks.add([
40        (
41            "PrincipalIdentifierSegment".into(),
42            one_of(vec_of_erased![
43                Ref::new("NakedIdentifierSegment"),
44                Ref::new("BackQuotedIdentifierSegment"),
45            ])
46            .to_matchable()
47            .into(),
48        ),
49        (
50            "SetOwnerGrammar".into(),
51            Sequence::new(vec_of_erased![
52                Ref::keyword("SET").optional(),
53                Ref::keyword("OWNER"),
54                Ref::keyword("TO"),
55                Ref::new("PrincipalIdentifierSegment"),
56            ])
57            .to_matchable()
58            .into(),
59        ),
60        // A reference to a catalog.
61        // https://docs.databricks.com/data-governance/unity-catalog/create-catalogs.html
62        (
63            "CatalogReferenceSegment".into(),
64            Ref::new("ObjectReferenceSegment").to_matchable().into(),
65        ),
66        // An `ALTER CATALOG` statement.
67        // https://docs.databricks.com/sql/language-manual/sql-ref-syntax-ddl-alter-catalog.html
68        (
69            "AlterCatalogStatementSegment".into(),
70            Sequence::new(vec_of_erased![
71                Ref::keyword("ALTER"),
72                Ref::keyword("CATALOG"),
73                one_of(vec_of_erased![Ref::new("SetOwnerGrammar")]),
74            ])
75            .to_matchable()
76            .into(),
77        ),
78        // A `CREATE CATALOG` statement.
79        // https://docs.databricks.com/sql/language-manual/sql-ref-syntax-ddl-create-catalog.html
80        (
81            "CreateCatalogStatementSegment".into(),
82            Sequence::new(vec_of_erased![
83                Ref::keyword("CREATE"),
84                Ref::keyword("CATALOG"),
85                Ref::new("IfNotExistsGrammar").optional(),
86                Ref::new("CatalogReferenceSegment"),
87                Ref::new("CommentGrammar").optional(),
88            ])
89            .to_matchable()
90            .into(),
91        ),
92        // A `DROP CATALOG` statement.
93        // https://docs.databricks.com/sql/language-manual/sql-ref-syntax-ddl-drop-catalog.html
94        (
95            "DropCatalogStatementSegment".into(),
96            Sequence::new(vec_of_erased![
97                Ref::keyword("DROP"),
98                Ref::keyword("CATALOG"),
99                Ref::new("IfExistsGrammar").optional(),
100                Ref::new("CatalogReferenceSegment"),
101                Ref::new("DropBehaviorGrammar").optional(),
102            ])
103            .to_matchable()
104            .into(),
105        ),
106        // A `USE CATALOG` statement.
107        // https://docs.databricks.com/sql/language-manual/sql-ref-syntax-ddl-use-catalog.html
108        (
109            "UseCatalogStatementSegment".into(),
110            Sequence::new(vec_of_erased![
111                Ref::keyword("USE"),
112                Ref::keyword("CATALOG"),
113                Ref::new("CatalogReferenceSegment"),
114            ])
115            .to_matchable()
116            .into(),
117        ),
118        // A `USE DATABASE` statement.
119        // https://docs.databricks.com/sql/language-manual/sql-ref-syntax-ddl-usedb.html
120        (
121            "UseDatabaseStatementSegment".into(),
122            Sequence::new(vec_of_erased![
123                Ref::keyword("USE"),
124                one_of(vec_of_erased![
125                    Ref::keyword("DATABASE"),
126                    Ref::keyword("SCHEMA"),
127                ])
128                .config(|config| {
129                    config.optional();
130                }),
131                Ref::new("DatabaseReferenceSegment"),
132            ])
133            .to_matchable()
134            .into(),
135        ),
136        // A `SET TIME ZONE` statement.
137        // https://docs.databricks.com/sql/language-manual/sql-ref-syntax-aux-conf-mgmt-set-timezone.html
138        (
139            "SetTimeZoneStatementSegment".into(),
140            Sequence::new(vec_of_erased![
141                Ref::keyword("SET"),
142                Ref::keyword("TIME"),
143                Ref::keyword("ZONE"),
144                one_of(vec_of_erased![
145                    Ref::keyword("LOCAL"),
146                    Ref::new("QuotedLiteralSegment"),
147                    Ref::new("IntervalExpressionSegment")
148                ]),
149            ])
150            .to_matchable()
151            .into(),
152        ),
153        // An `OPTIMIZE` statement.
154        // https://docs.databricks.com/en/sql/language-manual/delta-optimize.html
155        (
156            "OptimizeTableStatementSegment".into(),
157            Sequence::new(vec_of_erased![
158                Ref::keyword("OPTIMIZE"),
159                Ref::new("TableReferenceSegment"),
160                Sequence::new(vec_of_erased![
161                    Ref::keyword("WHERE"),
162                    Ref::new("ExpressionSegment"),
163                ])
164                .config(|config| {
165                    config.optional();
166                }),
167                Sequence::new(vec_of_erased![
168                    Ref::keyword("ZORDER"),
169                    Ref::keyword("BY"),
170                    Bracketed::new(vec_of_erased![Delimited::new(vec_of_erased![Ref::new(
171                        "ColumnReferenceSegment"
172                    )])]),
173                ])
174                .config(|config| {
175                    config.optional();
176                }),
177            ])
178            .to_matchable()
179            .into(),
180        ),
181        (
182            // A reference to a database.
183            "DatabaseReferenceSegment".into(),
184            Ref::new("ObjectReferenceSegment").to_matchable().into(),
185        ),
186        (
187            // A reference to an table, CTE, subquery or alias.
188            "TableReferenceSegment".into(),
189            Ref::new("ObjectReferenceSegment").to_matchable().into(),
190        ),
191        (
192            // A reference to a schema.
193            "SchemaReferenceSegment".into(),
194            Ref::new("ObjectReferenceSegment").to_matchable().into(),
195        ),
196        (
197            "IdentifierClauseSegment".into(),
198            Sequence::new(vec_of_erased![
199                Ref::keyword("IDENTIFIER"),
200                Bracketed::new(vec_of_erased![Ref::new("SingleIdentifierGrammar")]),
201            ])
202            .to_matchable()
203            .into(),
204        ),
205        (
206            // Drop Volume Statement.
207            // https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-ddl-drop-volume.html
208            "DropVolumeStatementSegment".into(),
209            Sequence::new(vec_of_erased![
210                Ref::keyword("DROP"),
211                Ref::keyword("VOLUME"),
212                Ref::new("IfExistsGrammar").optional(),
213                Ref::new("VolumeReferenceSegment"),
214            ])
215            .to_matchable()
216            .into(),
217        ),
218        (
219            "VolumeReferenceSegment".into(),
220            Ref::new("ObjectReferenceSegment").to_matchable().into(),
221        ),
222        (
223            // https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-aux-describe-volume.html
224            "DescribeObjectGrammar".into(),
225            sparksql::dialect()
226                .grammar("DescribeObjectGrammar")
227                .copy(
228                    Some(vec_of_erased![Sequence::new(vec_of_erased![
229                        Ref::keyword("VOLUME"),
230                        Ref::new("VolumeReferenceSegment"),
231                    ])]),
232                    Some(0),
233                    None,
234                    None,
235                    Vec::new(),
236                    false,
237                )
238                .into(),
239        ),
240        (
241            // A `DECLARE [OR REPLACE] VARIABLE` statement.
242            // https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-ddl-declare-variable.html
243            "DeclareOrReplaceVariableStatementSegment".into(),
244            Sequence::new(vec_of_erased![
245                Ref::keyword("DECLARE"),
246                Ref::new("OrReplaceGrammar").optional(),
247                Ref::keyword("VARIABLE").optional(),
248                Ref::new("SingleIdentifierGrammar"),
249                Ref::new("DatatypeSegment").optional(),
250                Sequence::new(vec_of_erased![
251                    one_of(vec_of_erased![
252                        Ref::keyword("DEFAULT"),
253                        Ref::new("EqualsSegment")
254                    ]),
255                    Ref::new("ExpressionSegment"),
256                ])
257                .config(|config| {
258                    config.optional();
259                }),
260            ])
261            .to_matchable()
262            .into(),
263        ),
264        // `COMMENT ON` statement.
265        // https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-ddl-comment.html
266        (
267            "CommentOnStatementSegment".into(),
268            Sequence::new(vec_of_erased![
269                Ref::keyword("COMMENT"),
270                Ref::keyword("ON"),
271                one_of(vec_of_erased![
272                    Sequence::new(vec_of_erased![
273                        Ref::keyword("CATALOG"),
274                        Ref::new("CatalogReferenceSegment"),
275                    ]),
276                    Sequence::new(vec_of_erased![
277                        one_of(vec_of_erased![
278                            Ref::keyword("DATABASE"),
279                            Ref::keyword("SCHEMA")
280                        ]),
281                        Ref::new("DatabaseReferenceSegment"),
282                    ]),
283                    Sequence::new(vec_of_erased![
284                        Ref::keyword("TABLE"),
285                        Ref::new("TableReferenceSegment"),
286                    ]),
287                    Sequence::new(vec_of_erased![
288                        Ref::keyword("VOLUME"),
289                        Ref::new("VolumeReferenceSegment"),
290                    ]),
291                    // TODO Split out individual items if they have references
292                    Sequence::new(vec_of_erased![
293                        one_of(vec_of_erased![
294                            Ref::keyword("CONNECTION"),
295                            Ref::keyword("PROVIDER"),
296                            Ref::keyword("RECIPIENT"),
297                            Ref::keyword("SHARE"),
298                        ]),
299                        Ref::new("ObjectReferenceSegment"),
300                    ]),
301                ]),
302                Ref::keyword("IS"),
303                one_of(vec_of_erased![
304                    Ref::new("QuotedLiteralSegment"),
305                    Ref::keyword("NULL"),
306                ]),
307            ])
308            .to_matchable()
309            .into(),
310        ),
311        // https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-aux-show-schemas.html
312        // Differences between this and the SparkSQL version:
313        // - Support for `FROM`|`IN` at the catalog level
314        // - `LIKE` keyword is optional
315        (
316            "ShowDatabasesSchemasGrammar".into(),
317            Sequence::new(vec_of_erased![
318                one_of(vec_of_erased![
319                    Ref::keyword("DATABASES"),
320                    Ref::keyword("SCHEMAS"),
321                ]),
322                Sequence::new(vec_of_erased![
323                    one_of(vec_of_erased![Ref::keyword("FROM"), Ref::keyword("IN"),]),
324                    Ref::new("DatabaseReferenceSegment"),
325                ])
326                .config(|config| {
327                    config.optional();
328                }),
329                Sequence::new(vec_of_erased![
330                    Ref::keyword("LIKE").optional(),
331                    Ref::new("QuotedLiteralSegment"),
332                ])
333                .config(|config| {
334                    config.optional();
335                }),
336            ])
337            .to_matchable()
338            .into(),
339        ),
340        // https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-aux-show-schemas.html
341        // Differences between this and the SparkSQL version:
342        // - Support for `FROM`|`IN` at the catalog level
343        // - `LIKE` keyword is optional
344        (
345            "ShowDatabasesSchemasGrammar".into(),
346            Sequence::new(vec_of_erased![
347                one_of(vec_of_erased![
348                    Ref::keyword("DATABASES"),
349                    Ref::keyword("SCHEMAS"),
350                ]),
351                Sequence::new(vec_of_erased![
352                    one_of(vec_of_erased![Ref::keyword("FROM"), Ref::keyword("IN"),]),
353                    Ref::new("DatabaseReferenceSegment"),
354                ])
355                .config(|config| {
356                    config.optional();
357                }),
358                Sequence::new(vec_of_erased![
359                    Ref::keyword("LIKE").optional(),
360                    Ref::new("QuotedLiteralSegment"),
361                ])
362                .config(|config| {
363                    config.optional();
364                }),
365            ])
366            .to_matchable()
367            .into(),
368        ),
369        // Show Functions Statement
370        // https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-aux-show-functions.html
371        //
372        // Represents the grammar part after the show
373        //
374        // Differences between this and the SparkSQL version:
375        // - Support for `FROM`|`IN` at the schema level
376        // - `LIKE` keyword is optional
377        (
378            "ShowFunctionsGrammar".into(),
379            Sequence::new(vec_of_erased![
380                one_of(vec_of_erased![
381                    Ref::keyword("USER"),
382                    Ref::keyword("SYSTEM"),
383                    Ref::keyword("ALL"),
384                ])
385                .config(|config| {
386                    config.optional();
387                }),
388                Ref::keyword("FUNCTIONS"),
389                Sequence::new(vec_of_erased![
390                    Sequence::new(vec_of_erased![
391                        one_of(vec_of_erased![Ref::keyword("FROM"), Ref::keyword("IN")]),
392                        Ref::new("DatabaseReferenceSegment"),
393                    ])
394                    .config(|config| {
395                        config.optional();
396                    }),
397                    Sequence::new(vec_of_erased![
398                        Ref::keyword("LIKE").optional(),
399                        one_of(vec_of_erased![
400                            // qualified function from a database
401                            Sequence::new(vec_of_erased![
402                                Ref::new("DatabaseReferenceSegment"),
403                                Ref::new("DotSegment"),
404                                Ref::new("FunctionNameSegment"),
405                            ])
406                            .config(|config| {
407                                config.disallow_gaps();
408                            }),
409                            // non-qualified function
410                            Ref::new("FunctionNameSegment"),
411                            // Regex/like string
412                            Ref::new("QuotedLiteralSegment"),
413                        ]),
414                    ])
415                    .config(|config| {
416                        config.optional();
417                    }),
418                ])
419                .config(|config| {
420                    config.optional();
421                }),
422            ])
423            .to_matchable()
424            .into(),
425        ),
426        //     # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-aux-show-tables.html
427        //     # Differences between this and the SparkSQL version:
428        //     # - `LIKE` keyword is optional
429        (
430            "ShowTablesGrammar".into(),
431            Sequence::new(vec_of_erased![
432                Ref::keyword("TABLES"),
433                Sequence::new(vec_of_erased![
434                    one_of(vec_of_erased![Ref::keyword("FROM"), Ref::keyword("IN")]),
435                    Ref::new("DatabaseReferenceSegment"),
436                ])
437                .config(|config| {
438                    config.optional();
439                }),
440                Sequence::new(vec_of_erased![
441                    Ref::keyword("LIKE").optional(),
442                    Ref::new("QuotedLiteralSegment"),
443                ])
444                .config(|config| {
445                    config.optional();
446                }),
447            ])
448            .to_matchable()
449            .into(),
450        ),
451        // https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-aux-show-views.html
452        // Only difference between this and the SparkSQL version:
453        // - `LIKE` keyword is optional
454        (
455            "ShowViewsGrammar".into(),
456            Sequence::new(vec_of_erased![
457                Ref::keyword("VIEWS"),
458                Sequence::new(vec_of_erased![
459                    one_of(vec_of_erased![Ref::keyword("FROM"), Ref::keyword("IN")]),
460                    Ref::new("DatabaseReferenceSegment"),
461                ])
462                .config(|config| {
463                    config.optional();
464                }),
465                Sequence::new(vec_of_erased![
466                    Ref::keyword("LIKE").optional(),
467                    Ref::new("QuotedLiteralSegment"),
468                ])
469                .config(|config| {
470                    config.optional();
471                }),
472            ])
473            .to_matchable()
474            .into(),
475        ),
476        // https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-aux-show-volumes.html
477        (
478            "ShowObjectGrammar".into(),
479            sparksql::raw_dialect()
480                .grammar("ShowObjectGrammar")
481                .copy(
482                    Some(vec_of_erased![Sequence::new(vec_of_erased![
483                        Ref::keyword("VOLUMES"),
484                        Sequence::new(vec_of_erased![
485                            one_of(vec_of_erased![Ref::keyword("FROM"), Ref::keyword("IN")]),
486                            Ref::new("DatabaseReferenceSegment"),
487                        ])
488                        .config(|config| {
489                            config.optional();
490                        }),
491                        Sequence::new(vec_of_erased![
492                            Ref::keyword("LIKE").optional(),
493                            Ref::new("QuotedLiteralSegment"),
494                        ])
495                        .config(|config| {
496                            config.optional();
497                        }),
498                    ])]),
499                    None,
500                    None,
501                    None,
502                    Vec::new(),
503                    false,
504                )
505                .into(),
506        ),
507        // https://docs.databricks.com/aws/en/sql/language-manual/sql-ref-syntax-dml-insert-into#insert-using-the-by-name-clause
508        (
509            "InsertBracketedColumnReferenceListGrammar".into(),
510            one_of(vec_of_erased![
511                Ref::new("BracketedColumnReferenceListGrammar"),
512                Sequence::new(vec_of_erased![Ref::keyword("BY"), Ref::keyword("NAME"),]),
513            ])
514            .to_matchable()
515            .into(),
516        ),
517    ]);
518
519    // A reference to an object.
520    databricks.replace_grammar(
521        "ObjectReferenceSegment",
522        Delimited::new(vec_of_erased![
523            one_of(vec_of_erased![
524                Ref::new("SingleIdentifierGrammar"),
525                Ref::new("IdentifierClauseSegment"),
526            ]),
527            Ref::new("ObjectReferenceDelimiterGrammar"),
528        ])
529        .config(|config| {
530            config.delimiter(Ref::new("ObjectReferenceDelimiterGrammar"));
531            config.terminators = vec_of_erased![Ref::new("ObjectReferenceTerminatorGrammar")];
532            config.disallow_gaps();
533        })
534        .to_matchable(),
535    );
536
537    // The main table expression e.g. within a FROM clause.
538    // Enhance to allow for additional clauses allowed in Spark and Delta Lake.
539    databricks.replace_grammar(
540        "TableExpressionSegment",
541        sparksql::dialect()
542            .grammar("TableExpressionSegment")
543            .match_grammar()
544            .unwrap()
545            .copy(
546                Some(vec_of_erased![Ref::new("IdentifierClauseSegment")]),
547                None,
548                Some(Ref::new("ValuesClauseSegment").to_matchable()),
549                None,
550                Vec::new(),
551                false,
552            ),
553    );
554
555    // Override statement segment
556    databricks.replace_grammar(
557        "StatementSegment",
558        raw_sparksql
559            .grammar("StatementSegment")
560            .match_grammar()
561            .unwrap()
562            .copy(
563                Some(vec_of_erased![
564                    Ref::new("AlterCatalogStatementSegment"),
565                    Ref::new("CreateCatalogStatementSegment"),
566                    Ref::new("DropCatalogStatementSegment"),
567                    Ref::new("UseCatalogStatementSegment"),
568                    Ref::new("DropVolumeStatementSegment"),
569                    Ref::new("SetTimeZoneStatementSegment"),
570                    Ref::new("OptimizeTableStatementSegment"),
571                    Ref::new("CommentOnStatementSegment"),
572                    Ref::new("DeclareOrReplaceVariableStatementSegment"),
573                ]),
574                None,
575                None,
576                None,
577                Vec::new(),
578                false,
579            ),
580    );
581
582    databricks.expand();
583    databricks
584}