sqruff_lib_dialects/
databricks.rs

1use crate::databricks_keywords::{RESERVED_KEYWORDS, UNRESERVED_KEYWORDS};
2use crate::sparksql;
3use sqruff_lib_core::helpers::Config;
4use sqruff_lib_core::parser::grammar::anyof::one_of;
5use sqruff_lib_core::parser::grammar::delimited::Delimited;
6use sqruff_lib_core::parser::grammar::sequence::Bracketed;
7use sqruff_lib_core::parser::matchable::MatchableTrait;
8use sqruff_lib_core::{
9    dialects::{Dialect, init::DialectKind},
10    helpers::ToMatchable,
11    parser::grammar::{Ref, sequence::Sequence},
12    vec_of_erased,
13};
14
15pub fn dialect() -> Dialect {
16    let raw_sparksql = sparksql::raw_dialect();
17
18    let mut databricks = sparksql::raw_dialect();
19    databricks.name = DialectKind::Databricks;
20
21    databricks
22        .sets_mut("unreserved_keywords")
23        .extend(UNRESERVED_KEYWORDS);
24    databricks
25        .sets_mut("unreserved_keywords")
26        .extend(raw_sparksql.sets("reserved_keywords"));
27    databricks
28        .sets_mut("unreserved_keywords")
29        .retain(|x| !RESERVED_KEYWORDS.contains(x));
30    databricks.sets_mut("reserved_keywords").clear();
31    databricks
32        .sets_mut("reserved_keywords")
33        .extend(RESERVED_KEYWORDS);
34
35    databricks
36        .sets_mut("date_part_function_name")
37        .extend(["TIMEDIFF"]);
38
39    databricks.add([
40        (
41            "PrincipalIdentifierSegment".into(),
42            one_of(vec_of_erased![
43                Ref::new("NakedIdentifierSegment"),
44                Ref::new("BackQuotedIdentifierSegment"),
45            ])
46            .to_matchable()
47            .into(),
48        ),
49        (
50            "SetOwnerGrammar".into(),
51            Sequence::new(vec_of_erased![
52                Ref::keyword("SET").optional(),
53                Ref::keyword("OWNER"),
54                Ref::keyword("TO"),
55                Ref::new("PrincipalIdentifierSegment"),
56            ])
57            .to_matchable()
58            .into(),
59        ),
60        // A reference to a catalog.
61        // https://docs.databricks.com/data-governance/unity-catalog/create-catalogs.html
62        (
63            "CatalogReferenceSegment".into(),
64            Ref::new("ObjectReferenceSegment").to_matchable().into(),
65        ),
66        // An `ALTER CATALOG` statement.
67        // https://docs.databricks.com/sql/language-manual/sql-ref-syntax-ddl-alter-catalog.html
68        (
69            "AlterCatalogStatementSegment".into(),
70            Sequence::new(vec_of_erased![
71                Ref::keyword("ALTER"),
72                Ref::keyword("CATALOG"),
73                Ref::new("CatalogReferenceSegment"),
74                Ref::new("SetOwnerGrammar"),
75            ])
76            .to_matchable()
77            .into(),
78        ),
79        // A `CREATE CATALOG` statement.
80        // https://docs.databricks.com/sql/language-manual/sql-ref-syntax-ddl-create-catalog.html
81        (
82            "CreateCatalogStatementSegment".into(),
83            Sequence::new(vec_of_erased![
84                Ref::keyword("CREATE"),
85                Ref::keyword("CATALOG"),
86                Ref::new("IfNotExistsGrammar").optional(),
87                Ref::new("CatalogReferenceSegment"),
88                Ref::new("CommentGrammar").optional(),
89            ])
90            .to_matchable()
91            .into(),
92        ),
93        // A `DROP CATALOG` statement.
94        // https://docs.databricks.com/sql/language-manual/sql-ref-syntax-ddl-drop-catalog.html
95        (
96            "DropCatalogStatementSegment".into(),
97            Sequence::new(vec_of_erased![
98                Ref::keyword("DROP"),
99                Ref::keyword("CATALOG"),
100                Ref::new("IfExistsGrammar").optional(),
101                Ref::new("CatalogReferenceSegment"),
102                Ref::new("DropBehaviorGrammar").optional(),
103            ])
104            .to_matchable()
105            .into(),
106        ),
107        // A `USE CATALOG` statement.
108        // https://docs.databricks.com/sql/language-manual/sql-ref-syntax-ddl-use-catalog.html
109        (
110            "UseCatalogStatementSegment".into(),
111            Sequence::new(vec_of_erased![
112                Ref::keyword("USE"),
113                Ref::keyword("CATALOG"),
114                Ref::new("CatalogReferenceSegment"),
115            ])
116            .to_matchable()
117            .into(),
118        ),
119        // A `USE DATABASE` statement.
120        // https://docs.databricks.com/sql/language-manual/sql-ref-syntax-ddl-usedb.html
121        (
122            "UseDatabaseStatementSegment".into(),
123            Sequence::new(vec_of_erased![
124                Ref::keyword("USE"),
125                one_of(vec_of_erased![
126                    Ref::keyword("DATABASE"),
127                    Ref::keyword("SCHEMA"),
128                ])
129                .config(|config| {
130                    config.optional();
131                }),
132                Ref::new("DatabaseReferenceSegment"),
133            ])
134            .to_matchable()
135            .into(),
136        ),
137        // A `SET TIME ZONE` statement.
138        // https://docs.databricks.com/sql/language-manual/sql-ref-syntax-aux-conf-mgmt-set-timezone.html
139        (
140            "SetTimeZoneStatementSegment".into(),
141            Sequence::new(vec_of_erased![
142                Ref::keyword("SET"),
143                Ref::keyword("TIME"),
144                Ref::keyword("ZONE"),
145                one_of(vec_of_erased![
146                    Ref::keyword("LOCAL"),
147                    Ref::new("QuotedLiteralSegment"),
148                    Ref::new("IntervalExpressionSegment")
149                ]),
150            ])
151            .to_matchable()
152            .into(),
153        ),
154        // An `OPTIMIZE` statement.
155        // https://docs.databricks.com/en/sql/language-manual/delta-optimize.html
156        (
157            "OptimizeTableStatementSegment".into(),
158            Sequence::new(vec_of_erased![
159                Ref::keyword("OPTIMIZE"),
160                Ref::new("TableReferenceSegment"),
161                Sequence::new(vec_of_erased![
162                    Ref::keyword("WHERE"),
163                    Ref::new("ExpressionSegment"),
164                ])
165                .config(|config| {
166                    config.optional();
167                }),
168                Sequence::new(vec_of_erased![
169                    Ref::keyword("ZORDER"),
170                    Ref::keyword("BY"),
171                    Bracketed::new(vec_of_erased![Delimited::new(vec_of_erased![Ref::new(
172                        "ColumnReferenceSegment"
173                    )])]),
174                ])
175                .config(|config| {
176                    config.optional();
177                }),
178            ])
179            .to_matchable()
180            .into(),
181        ),
182        (
183            // A reference to a database.
184            "DatabaseReferenceSegment".into(),
185            Ref::new("ObjectReferenceSegment").to_matchable().into(),
186        ),
187        (
188            // A reference to an table, CTE, subquery or alias.
189            "TableReferenceSegment".into(),
190            Ref::new("ObjectReferenceSegment").to_matchable().into(),
191        ),
192        (
193            // A reference to a schema.
194            "SchemaReferenceSegment".into(),
195            Ref::new("ObjectReferenceSegment").to_matchable().into(),
196        ),
197        (
198            "IdentifierClauseSegment".into(),
199            Sequence::new(vec_of_erased![
200                Ref::keyword("IDENTIFIER"),
201                Bracketed::new(vec_of_erased![Ref::new("SingleIdentifierGrammar")]),
202            ])
203            .to_matchable()
204            .into(),
205        ),
206        (
207            // Drop Volume Statement.
208            // https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-ddl-drop-volume.html
209            "DropVolumeStatementSegment".into(),
210            Sequence::new(vec_of_erased![
211                Ref::keyword("DROP"),
212                Ref::keyword("VOLUME"),
213                Ref::new("IfExistsGrammar").optional(),
214                Ref::new("VolumeReferenceSegment"),
215            ])
216            .to_matchable()
217            .into(),
218        ),
219        (
220            "VolumeReferenceSegment".into(),
221            Ref::new("ObjectReferenceSegment").to_matchable().into(),
222        ),
223        (
224            // https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-aux-describe-volume.html
225            "DescribeObjectGrammar".into(),
226            sparksql::dialect()
227                .grammar("DescribeObjectGrammar")
228                .copy(
229                    Some(vec_of_erased![Sequence::new(vec_of_erased![
230                        Ref::keyword("VOLUME"),
231                        Ref::new("VolumeReferenceSegment"),
232                    ])]),
233                    Some(0),
234                    None,
235                    None,
236                    Vec::new(),
237                    false,
238                )
239                .into(),
240        ),
241        (
242            // A `DECLARE [OR REPLACE] VARIABLE` statement.
243            // https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-ddl-declare-variable.html
244            "DeclareOrReplaceVariableStatementSegment".into(),
245            Sequence::new(vec_of_erased![
246                Ref::keyword("DECLARE"),
247                Ref::new("OrReplaceGrammar").optional(),
248                Ref::keyword("VARIABLE").optional(),
249                Ref::new("SingleIdentifierGrammar"),
250                Ref::new("DatatypeSegment").optional(),
251                Sequence::new(vec_of_erased![
252                    one_of(vec_of_erased![
253                        Ref::keyword("DEFAULT"),
254                        Ref::new("EqualsSegment")
255                    ]),
256                    Ref::new("ExpressionSegment"),
257                ])
258                .config(|config| {
259                    config.optional();
260                }),
261            ])
262            .to_matchable()
263            .into(),
264        ),
265        // `COMMENT ON` statement.
266        // https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-ddl-comment.html
267        (
268            "CommentOnStatementSegment".into(),
269            Sequence::new(vec_of_erased![
270                Ref::keyword("COMMENT"),
271                Ref::keyword("ON"),
272                one_of(vec_of_erased![
273                    Sequence::new(vec_of_erased![
274                        Ref::keyword("CATALOG"),
275                        Ref::new("CatalogReferenceSegment"),
276                    ]),
277                    Sequence::new(vec_of_erased![
278                        one_of(vec_of_erased![
279                            Ref::keyword("DATABASE"),
280                            Ref::keyword("SCHEMA")
281                        ]),
282                        Ref::new("DatabaseReferenceSegment"),
283                    ]),
284                    Sequence::new(vec_of_erased![
285                        Ref::keyword("TABLE"),
286                        Ref::new("TableReferenceSegment"),
287                    ]),
288                    Sequence::new(vec_of_erased![
289                        Ref::keyword("VOLUME"),
290                        Ref::new("VolumeReferenceSegment"),
291                    ]),
292                    // TODO Split out individual items if they have references
293                    Sequence::new(vec_of_erased![
294                        one_of(vec_of_erased![
295                            Ref::keyword("CONNECTION"),
296                            Ref::keyword("PROVIDER"),
297                            Ref::keyword("RECIPIENT"),
298                            Ref::keyword("SHARE"),
299                        ]),
300                        Ref::new("ObjectReferenceSegment"),
301                    ]),
302                ]),
303                Ref::keyword("IS"),
304                one_of(vec_of_erased![
305                    Ref::new("QuotedLiteralSegment"),
306                    Ref::keyword("NULL"),
307                ]),
308            ])
309            .to_matchable()
310            .into(),
311        ),
312        // https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-aux-show-schemas.html
313        // Differences between this and the SparkSQL version:
314        // - Support for `FROM`|`IN` at the catalog level
315        // - `LIKE` keyword is optional
316        (
317            "ShowDatabasesSchemasGrammar".into(),
318            Sequence::new(vec_of_erased![
319                one_of(vec_of_erased![
320                    Ref::keyword("DATABASES"),
321                    Ref::keyword("SCHEMAS"),
322                ]),
323                Sequence::new(vec_of_erased![
324                    one_of(vec_of_erased![Ref::keyword("FROM"), Ref::keyword("IN"),]),
325                    Ref::new("DatabaseReferenceSegment"),
326                ])
327                .config(|config| {
328                    config.optional();
329                }),
330                Sequence::new(vec_of_erased![
331                    Ref::keyword("LIKE").optional(),
332                    Ref::new("QuotedLiteralSegment"),
333                ])
334                .config(|config| {
335                    config.optional();
336                }),
337            ])
338            .to_matchable()
339            .into(),
340        ),
341        // https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-aux-show-schemas.html
342        // Differences between this and the SparkSQL version:
343        // - Support for `FROM`|`IN` at the catalog level
344        // - `LIKE` keyword is optional
345        (
346            "ShowDatabasesSchemasGrammar".into(),
347            Sequence::new(vec_of_erased![
348                one_of(vec_of_erased![
349                    Ref::keyword("DATABASES"),
350                    Ref::keyword("SCHEMAS"),
351                ]),
352                Sequence::new(vec_of_erased![
353                    one_of(vec_of_erased![Ref::keyword("FROM"), Ref::keyword("IN"),]),
354                    Ref::new("DatabaseReferenceSegment"),
355                ])
356                .config(|config| {
357                    config.optional();
358                }),
359                Sequence::new(vec_of_erased![
360                    Ref::keyword("LIKE").optional(),
361                    Ref::new("QuotedLiteralSegment"),
362                ])
363                .config(|config| {
364                    config.optional();
365                }),
366            ])
367            .to_matchable()
368            .into(),
369        ),
370        // Show Functions Statement
371        // https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-aux-show-functions.html
372        //
373        // Represents the grammar part after the show
374        //
375        // Differences between this and the SparkSQL version:
376        // - Support for `FROM`|`IN` at the schema level
377        // - `LIKE` keyword is optional
378        (
379            "ShowFunctionsGrammar".into(),
380            Sequence::new(vec_of_erased![
381                one_of(vec_of_erased![
382                    Ref::keyword("USER"),
383                    Ref::keyword("SYSTEM"),
384                    Ref::keyword("ALL"),
385                ])
386                .config(|config| {
387                    config.optional();
388                }),
389                Ref::keyword("FUNCTIONS"),
390                Sequence::new(vec_of_erased![
391                    Sequence::new(vec_of_erased![
392                        one_of(vec_of_erased![Ref::keyword("FROM"), Ref::keyword("IN")]),
393                        Ref::new("DatabaseReferenceSegment"),
394                    ])
395                    .config(|config| {
396                        config.optional();
397                    }),
398                    Sequence::new(vec_of_erased![
399                        Ref::keyword("LIKE").optional(),
400                        one_of(vec_of_erased![
401                            // qualified function from a database
402                            Sequence::new(vec_of_erased![
403                                Ref::new("DatabaseReferenceSegment"),
404                                Ref::new("DotSegment"),
405                                Ref::new("FunctionNameSegment"),
406                            ])
407                            .config(|config| {
408                                config.disallow_gaps();
409                            }),
410                            // non-qualified function
411                            Ref::new("FunctionNameSegment"),
412                            // Regex/like string
413                            Ref::new("QuotedLiteralSegment"),
414                        ]),
415                    ])
416                    .config(|config| {
417                        config.optional();
418                    }),
419                ])
420                .config(|config| {
421                    config.optional();
422                }),
423            ])
424            .to_matchable()
425            .into(),
426        ),
427        //     # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-aux-show-tables.html
428        //     # Differences between this and the SparkSQL version:
429        //     # - `LIKE` keyword is optional
430        (
431            "ShowTablesGrammar".into(),
432            Sequence::new(vec_of_erased![
433                Ref::keyword("TABLES"),
434                Sequence::new(vec_of_erased![
435                    one_of(vec_of_erased![Ref::keyword("FROM"), Ref::keyword("IN")]),
436                    Ref::new("DatabaseReferenceSegment"),
437                ])
438                .config(|config| {
439                    config.optional();
440                }),
441                Sequence::new(vec_of_erased![
442                    Ref::keyword("LIKE").optional(),
443                    Ref::new("QuotedLiteralSegment"),
444                ])
445                .config(|config| {
446                    config.optional();
447                }),
448            ])
449            .to_matchable()
450            .into(),
451        ),
452        // https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-aux-show-views.html
453        // Only difference between this and the SparkSQL version:
454        // - `LIKE` keyword is optional
455        (
456            "ShowViewsGrammar".into(),
457            Sequence::new(vec_of_erased![
458                Ref::keyword("VIEWS"),
459                Sequence::new(vec_of_erased![
460                    one_of(vec_of_erased![Ref::keyword("FROM"), Ref::keyword("IN")]),
461                    Ref::new("DatabaseReferenceSegment"),
462                ])
463                .config(|config| {
464                    config.optional();
465                }),
466                Sequence::new(vec_of_erased![
467                    Ref::keyword("LIKE").optional(),
468                    Ref::new("QuotedLiteralSegment"),
469                ])
470                .config(|config| {
471                    config.optional();
472                }),
473            ])
474            .to_matchable()
475            .into(),
476        ),
477        // https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-aux-show-volumes.html
478        (
479            "ShowObjectGrammar".into(),
480            sparksql::raw_dialect()
481                .grammar("ShowObjectGrammar")
482                .copy(
483                    Some(vec_of_erased![Sequence::new(vec_of_erased![
484                        Ref::keyword("VOLUMES"),
485                        Sequence::new(vec_of_erased![
486                            one_of(vec_of_erased![Ref::keyword("FROM"), Ref::keyword("IN")]),
487                            Ref::new("DatabaseReferenceSegment"),
488                        ])
489                        .config(|config| {
490                            config.optional();
491                        }),
492                        Sequence::new(vec_of_erased![
493                            Ref::keyword("LIKE").optional(),
494                            Ref::new("QuotedLiteralSegment"),
495                        ])
496                        .config(|config| {
497                            config.optional();
498                        }),
499                    ])]),
500                    None,
501                    None,
502                    None,
503                    Vec::new(),
504                    false,
505                )
506                .into(),
507        ),
508        // https://docs.databricks.com/aws/en/sql/language-manual/sql-ref-syntax-dml-insert-into#insert-using-the-by-name-clause
509        (
510            "InsertBracketedColumnReferenceListGrammar".into(),
511            one_of(vec_of_erased![
512                Ref::new("BracketedColumnReferenceListGrammar"),
513                Sequence::new(vec_of_erased![Ref::keyword("BY"), Ref::keyword("NAME"),]),
514            ])
515            .to_matchable()
516            .into(),
517        ),
518    ]);
519
520    // A reference to an object.
521    databricks.replace_grammar(
522        "ObjectReferenceSegment",
523        Delimited::new(vec_of_erased![
524            one_of(vec_of_erased![
525                Ref::new("SingleIdentifierGrammar"),
526                Ref::new("IdentifierClauseSegment"),
527            ]),
528            Ref::new("ObjectReferenceDelimiterGrammar"),
529        ])
530        .config(|config| {
531            config.delimiter(Ref::new("ObjectReferenceDelimiterGrammar"));
532            config.terminators = vec_of_erased![Ref::new("ObjectReferenceTerminatorGrammar")];
533            config.disallow_gaps();
534        })
535        .to_matchable(),
536    );
537
538    // The main table expression e.g. within a FROM clause.
539    // Enhance to allow for additional clauses allowed in Spark and Delta Lake.
540    databricks.replace_grammar(
541        "TableExpressionSegment",
542        sparksql::dialect()
543            .grammar("TableExpressionSegment")
544            .match_grammar(&databricks)
545            .unwrap()
546            .copy(
547                Some(vec_of_erased![Ref::new("IdentifierClauseSegment")]),
548                None,
549                Some(Ref::new("ValuesClauseSegment").to_matchable()),
550                None,
551                Vec::new(),
552                false,
553            ),
554    );
555
556    // Override statement segment
557    databricks.replace_grammar(
558        "StatementSegment",
559        raw_sparksql
560            .grammar("StatementSegment")
561            .match_grammar(&databricks)
562            .unwrap()
563            .copy(
564                Some(vec_of_erased![
565                    Ref::new("AlterCatalogStatementSegment"),
566                    Ref::new("CreateCatalogStatementSegment"),
567                    Ref::new("DropCatalogStatementSegment"),
568                    Ref::new("UseCatalogStatementSegment"),
569                    Ref::new("DropVolumeStatementSegment"),
570                    Ref::new("SetTimeZoneStatementSegment"),
571                    Ref::new("OptimizeTableStatementSegment"),
572                    Ref::new("CommentOnStatementSegment"),
573                    Ref::new("DeclareOrReplaceVariableStatementSegment"),
574                ]),
575                None,
576                None,
577                None,
578                Vec::new(),
579                false,
580            ),
581    );
582
583    databricks.expand();
584    databricks
585}