1use crate::databricks_keywords::{RESERVED_KEYWORDS, UNRESERVED_KEYWORDS};
2use crate::sparksql;
3use sqruff_lib_core::helpers::Config;
4use sqruff_lib_core::parser::grammar::anyof::one_of;
5use sqruff_lib_core::parser::grammar::delimited::Delimited;
6use sqruff_lib_core::parser::grammar::sequence::Bracketed;
7use sqruff_lib_core::parser::matchable::MatchableTrait;
8use sqruff_lib_core::parser::segments::meta::MetaSegment;
9use sqruff_lib_core::{
10 dialects::{Dialect, init::DialectKind},
11 helpers::ToMatchable,
12 parser::grammar::{Ref, sequence::Sequence},
13};
14
15pub fn dialect() -> Dialect {
16 let raw_sparksql = sparksql::raw_dialect();
17
18 let mut databricks = sparksql::raw_dialect();
19 databricks.name = DialectKind::Databricks;
20
21 databricks
22 .sets_mut("unreserved_keywords")
23 .extend(UNRESERVED_KEYWORDS);
24 databricks
25 .sets_mut("unreserved_keywords")
26 .extend(raw_sparksql.sets("reserved_keywords"));
27 databricks
28 .sets_mut("unreserved_keywords")
29 .retain(|x| !RESERVED_KEYWORDS.contains(x));
30 databricks.sets_mut("reserved_keywords").clear();
31 databricks
32 .sets_mut("reserved_keywords")
33 .extend(RESERVED_KEYWORDS);
34
35 databricks
36 .sets_mut("date_part_function_name")
37 .extend(["TIMEDIFF"]);
38
39 databricks.add([
40 (
41 "PrincipalIdentifierSegment".into(),
42 one_of(vec![
43 Ref::new("NakedIdentifierSegment").to_matchable(),
44 Ref::new("BackQuotedIdentifierSegment").to_matchable(),
45 ])
46 .to_matchable()
47 .into(),
48 ),
49 (
50 "SetOwnerGrammar".into(),
51 Sequence::new(vec![
52 Ref::keyword("SET").optional().to_matchable(),
53 Ref::keyword("OWNER").to_matchable(),
54 Ref::keyword("TO").to_matchable(),
55 Ref::new("PrincipalIdentifierSegment").to_matchable(),
56 ])
57 .to_matchable()
58 .into(),
59 ),
60 (
63 "CatalogReferenceSegment".into(),
64 Ref::new("ObjectReferenceSegment").to_matchable().into(),
65 ),
66 (
69 "AlterCatalogStatementSegment".into(),
70 Sequence::new(vec![
71 Ref::keyword("ALTER").to_matchable(),
72 Ref::keyword("CATALOG").to_matchable(),
73 Ref::new("CatalogReferenceSegment").to_matchable(),
74 Ref::new("SetOwnerGrammar").to_matchable(),
75 ])
76 .to_matchable()
77 .into(),
78 ),
79 (
82 "CreateCatalogStatementSegment".into(),
83 Sequence::new(vec![
84 Ref::keyword("CREATE").to_matchable(),
85 Ref::keyword("CATALOG").to_matchable(),
86 Ref::new("IfNotExistsGrammar").optional().to_matchable(),
87 Ref::new("CatalogReferenceSegment").to_matchable(),
88 Ref::new("CommentGrammar").optional().to_matchable(),
89 ])
90 .to_matchable()
91 .into(),
92 ),
93 (
96 "DropCatalogStatementSegment".into(),
97 Sequence::new(vec![
98 Ref::keyword("DROP").to_matchable(),
99 Ref::keyword("CATALOG").to_matchable(),
100 Ref::new("IfExistsGrammar").optional().to_matchable(),
101 Ref::new("CatalogReferenceSegment").to_matchable(),
102 Ref::new("DropBehaviorGrammar").optional().to_matchable(),
103 ])
104 .to_matchable()
105 .into(),
106 ),
107 (
110 "UseCatalogStatementSegment".into(),
111 Sequence::new(vec![
112 Ref::keyword("USE").to_matchable(),
113 Ref::keyword("CATALOG").to_matchable(),
114 Ref::new("CatalogReferenceSegment").to_matchable(),
115 ])
116 .to_matchable()
117 .into(),
118 ),
119 (
122 "UseDatabaseStatementSegment".into(),
123 Sequence::new(vec![
124 Ref::keyword("USE").to_matchable(),
125 one_of(vec![
126 Ref::keyword("DATABASE").to_matchable(),
127 Ref::keyword("SCHEMA").to_matchable(),
128 ])
129 .config(|config| {
130 config.optional();
131 })
132 .to_matchable(),
133 Ref::new("DatabaseReferenceSegment").to_matchable(),
134 ])
135 .to_matchable()
136 .into(),
137 ),
138 (
141 "SetTimeZoneStatementSegment".into(),
142 Sequence::new(vec![
143 Ref::keyword("SET").to_matchable(),
144 Ref::keyword("TIME").to_matchable(),
145 Ref::keyword("ZONE").to_matchable(),
146 one_of(vec![
147 Ref::keyword("LOCAL").to_matchable(),
148 Ref::new("QuotedLiteralSegment").to_matchable(),
149 Ref::new("IntervalExpressionSegment").to_matchable(),
150 ])
151 .to_matchable(),
152 ])
153 .to_matchable()
154 .into(),
155 ),
156 (
159 "OptimizeTableStatementSegment".into(),
160 Sequence::new(vec![
161 Ref::keyword("OPTIMIZE").to_matchable(),
162 Ref::new("TableReferenceSegment").to_matchable(),
163 Sequence::new(vec![
164 Ref::keyword("WHERE").to_matchable(),
165 Ref::new("ExpressionSegment").to_matchable(),
166 ])
167 .config(|config| {
168 config.optional();
169 })
170 .to_matchable(),
171 Sequence::new(vec![
172 Ref::keyword("ZORDER").to_matchable(),
173 Ref::keyword("BY").to_matchable(),
174 Bracketed::new(vec![
175 Delimited::new(vec![Ref::new("ColumnReferenceSegment").to_matchable()])
176 .to_matchable(),
177 ])
178 .to_matchable(),
179 ])
180 .config(|config| {
181 config.optional();
182 })
183 .to_matchable(),
184 ])
185 .to_matchable()
186 .into(),
187 ),
188 (
189 "DatabaseReferenceSegment".into(),
191 Ref::new("ObjectReferenceSegment").to_matchable().into(),
192 ),
193 (
194 "TableReferenceSegment".into(),
196 Ref::new("ObjectReferenceSegment").to_matchable().into(),
197 ),
198 (
199 "SchemaReferenceSegment".into(),
201 Ref::new("ObjectReferenceSegment").to_matchable().into(),
202 ),
203 (
204 "IdentifierClauseSegment".into(),
205 Sequence::new(vec![
206 Ref::keyword("IDENTIFIER").to_matchable(),
207 Bracketed::new(vec![Ref::new("SingleIdentifierGrammar").to_matchable()])
208 .to_matchable(),
209 ])
210 .to_matchable()
211 .into(),
212 ),
213 (
214 "DropVolumeStatementSegment".into(),
217 Sequence::new(vec![
218 Ref::keyword("DROP").to_matchable(),
219 Ref::keyword("VOLUME").to_matchable(),
220 Ref::new("IfExistsGrammar").optional().to_matchable(),
221 Ref::new("VolumeReferenceSegment").to_matchable(),
222 ])
223 .to_matchable()
224 .into(),
225 ),
226 (
227 "VolumeReferenceSegment".into(),
228 Ref::new("ObjectReferenceSegment").to_matchable().into(),
229 ),
230 (
231 "DescribeObjectGrammar".into(),
233 sparksql::dialect()
234 .grammar("DescribeObjectGrammar")
235 .copy(
236 Some(vec![
237 Sequence::new(vec![
238 Ref::keyword("VOLUME").to_matchable(),
239 Ref::new("VolumeReferenceSegment").to_matchable(),
240 ])
241 .to_matchable(),
242 ]),
243 Some(0),
244 None,
245 None,
246 Vec::new(),
247 false,
248 )
249 .into(),
250 ),
251 (
252 "DeclareOrReplaceVariableStatementSegment".into(),
255 Sequence::new(vec![
256 Ref::keyword("DECLARE").to_matchable(),
257 Ref::new("OrReplaceGrammar").optional().to_matchable(),
258 Ref::keyword("VARIABLE").optional().to_matchable(),
259 Ref::new("SingleIdentifierGrammar").to_matchable(),
260 Ref::new("DatatypeSegment").optional().to_matchable(),
261 Sequence::new(vec![
262 one_of(vec![
263 Ref::keyword("DEFAULT").to_matchable(),
264 Ref::new("EqualsSegment").to_matchable(),
265 ])
266 .to_matchable(),
267 Ref::new("ExpressionSegment").to_matchable(),
268 ])
269 .config(|config| {
270 config.optional();
271 })
272 .to_matchable(),
273 ])
274 .to_matchable()
275 .into(),
276 ),
277 (
280 "CommentOnStatementSegment".into(),
281 Sequence::new(vec![
282 Ref::keyword("COMMENT").to_matchable(),
283 Ref::keyword("ON").to_matchable(),
284 one_of(vec![
285 Sequence::new(vec![
286 Ref::keyword("CATALOG").to_matchable(),
287 Ref::new("CatalogReferenceSegment").to_matchable(),
288 ])
289 .to_matchable(),
290 Sequence::new(vec![
291 one_of(vec![
292 Ref::keyword("DATABASE").to_matchable(),
293 Ref::keyword("SCHEMA").to_matchable(),
294 ])
295 .to_matchable(),
296 Ref::new("DatabaseReferenceSegment").to_matchable(),
297 ])
298 .to_matchable(),
299 Sequence::new(vec![
300 Ref::keyword("TABLE").to_matchable(),
301 Ref::new("TableReferenceSegment").to_matchable(),
302 ])
303 .to_matchable(),
304 Sequence::new(vec![
305 Ref::keyword("VOLUME").to_matchable(),
306 Ref::new("VolumeReferenceSegment").to_matchable(),
307 ])
308 .to_matchable(),
309 Sequence::new(vec![
311 one_of(vec![
312 Ref::keyword("CONNECTION").to_matchable(),
313 Ref::keyword("PROVIDER").to_matchable(),
314 Ref::keyword("RECIPIENT").to_matchable(),
315 Ref::keyword("SHARE").to_matchable(),
316 ])
317 .to_matchable(),
318 Ref::new("ObjectReferenceSegment").to_matchable(),
319 ])
320 .to_matchable(),
321 ])
322 .to_matchable(),
323 Ref::keyword("IS").to_matchable(),
324 one_of(vec![
325 Ref::new("QuotedLiteralSegment").to_matchable(),
326 Ref::keyword("NULL").to_matchable(),
327 ])
328 .to_matchable(),
329 ])
330 .to_matchable()
331 .into(),
332 ),
333 (
338 "ShowDatabasesSchemasGrammar".into(),
339 Sequence::new(vec![
340 one_of(vec![
341 Ref::keyword("DATABASES").to_matchable(),
342 Ref::keyword("SCHEMAS").to_matchable(),
343 ])
344 .to_matchable(),
345 Sequence::new(vec![
346 one_of(vec![
347 Ref::keyword("FROM").to_matchable(),
348 Ref::keyword("IN").to_matchable(),
349 ])
350 .to_matchable(),
351 Ref::new("DatabaseReferenceSegment").to_matchable(),
352 ])
353 .config(|config| {
354 config.optional();
355 })
356 .to_matchable(),
357 Sequence::new(vec![
358 Ref::keyword("LIKE").optional().to_matchable(),
359 Ref::new("QuotedLiteralSegment").to_matchable(),
360 ])
361 .config(|config| {
362 config.optional();
363 })
364 .to_matchable(),
365 ])
366 .to_matchable()
367 .into(),
368 ),
369 (
374 "ShowDatabasesSchemasGrammar".into(),
375 Sequence::new(vec![
376 one_of(vec![
377 Ref::keyword("DATABASES").to_matchable(),
378 Ref::keyword("SCHEMAS").to_matchable(),
379 ])
380 .to_matchable(),
381 Sequence::new(vec![
382 one_of(vec![
383 Ref::keyword("FROM").to_matchable(),
384 Ref::keyword("IN").to_matchable(),
385 ])
386 .to_matchable(),
387 Ref::new("DatabaseReferenceSegment").to_matchable(),
388 ])
389 .config(|config| {
390 config.optional();
391 })
392 .to_matchable(),
393 Sequence::new(vec![
394 Ref::keyword("LIKE").optional().to_matchable(),
395 Ref::new("QuotedLiteralSegment").to_matchable(),
396 ])
397 .config(|config| {
398 config.optional();
399 })
400 .to_matchable(),
401 ])
402 .to_matchable()
403 .into(),
404 ),
405 (
414 "ShowFunctionsGrammar".into(),
415 Sequence::new(vec![
416 one_of(vec![
417 Ref::keyword("USER").to_matchable(),
418 Ref::keyword("SYSTEM").to_matchable(),
419 Ref::keyword("ALL").to_matchable(),
420 ])
421 .config(|config| {
422 config.optional();
423 })
424 .to_matchable(),
425 Ref::keyword("FUNCTIONS").to_matchable(),
426 Sequence::new(vec![
427 Sequence::new(vec![
428 one_of(vec![
429 Ref::keyword("FROM").to_matchable(),
430 Ref::keyword("IN").to_matchable(),
431 ])
432 .to_matchable(),
433 Ref::new("DatabaseReferenceSegment").to_matchable(),
434 ])
435 .config(|config| {
436 config.optional();
437 })
438 .to_matchable(),
439 Sequence::new(vec![
440 Ref::keyword("LIKE").optional().to_matchable(),
441 one_of(vec![
442 Sequence::new(vec![
444 Ref::new("DatabaseReferenceSegment").to_matchable(),
445 Ref::new("DotSegment").to_matchable(),
446 Ref::new("FunctionNameSegment").to_matchable(),
447 ])
448 .config(|config| {
449 config.disallow_gaps();
450 })
451 .to_matchable(),
452 Ref::new("FunctionNameSegment").to_matchable(),
454 Ref::new("QuotedLiteralSegment").to_matchable(),
456 ])
457 .to_matchable(),
458 ])
459 .config(|config| {
460 config.optional();
461 })
462 .to_matchable(),
463 ])
464 .config(|config| {
465 config.optional();
466 })
467 .to_matchable(),
468 ])
469 .to_matchable()
470 .into(),
471 ),
472 (
476 "ShowTablesGrammar".into(),
477 Sequence::new(vec![
478 Ref::keyword("TABLES").to_matchable(),
479 Sequence::new(vec![
480 one_of(vec![
481 Ref::keyword("FROM").to_matchable(),
482 Ref::keyword("IN").to_matchable(),
483 ])
484 .to_matchable(),
485 Ref::new("DatabaseReferenceSegment").to_matchable(),
486 ])
487 .config(|config| {
488 config.optional();
489 })
490 .to_matchable(),
491 Sequence::new(vec![
492 Ref::keyword("LIKE").optional().to_matchable(),
493 Ref::new("QuotedLiteralSegment").to_matchable(),
494 ])
495 .config(|config| {
496 config.optional();
497 })
498 .to_matchable(),
499 ])
500 .to_matchable()
501 .into(),
502 ),
503 (
507 "ShowViewsGrammar".into(),
508 Sequence::new(vec![
509 Ref::keyword("VIEWS").to_matchable(),
510 Sequence::new(vec![
511 one_of(vec![
512 Ref::keyword("FROM").to_matchable(),
513 Ref::keyword("IN").to_matchable(),
514 ])
515 .to_matchable(),
516 Ref::new("DatabaseReferenceSegment").to_matchable(),
517 ])
518 .config(|config| {
519 config.optional();
520 })
521 .to_matchable(),
522 Sequence::new(vec![
523 Ref::keyword("LIKE").optional().to_matchable(),
524 Ref::new("QuotedLiteralSegment").to_matchable(),
525 ])
526 .config(|config| {
527 config.optional();
528 })
529 .to_matchable(),
530 ])
531 .to_matchable()
532 .into(),
533 ),
534 (
536 "ShowObjectGrammar".into(),
537 sparksql::raw_dialect()
538 .grammar("ShowObjectGrammar")
539 .copy(
540 Some(vec![
541 Sequence::new(vec![
542 Ref::keyword("VOLUMES").to_matchable(),
543 Sequence::new(vec![
544 one_of(vec![
545 Ref::keyword("FROM").to_matchable(),
546 Ref::keyword("IN").to_matchable(),
547 ])
548 .to_matchable(),
549 Ref::new("DatabaseReferenceSegment").to_matchable(),
550 ])
551 .config(|config| {
552 config.optional();
553 })
554 .to_matchable(),
555 Sequence::new(vec![
556 Ref::keyword("LIKE").optional().to_matchable(),
557 Ref::new("QuotedLiteralSegment").to_matchable(),
558 ])
559 .config(|config| {
560 config.optional();
561 })
562 .to_matchable(),
563 ])
564 .to_matchable(),
565 ]),
566 None,
567 None,
568 None,
569 Vec::new(),
570 false,
571 )
572 .into(),
573 ),
574 (
576 "InsertBracketedColumnReferenceListGrammar".into(),
577 one_of(vec![
578 Ref::new("BracketedColumnReferenceListGrammar").to_matchable(),
579 Sequence::new(vec![
580 Ref::keyword("BY").to_matchable(),
581 Ref::keyword("NAME").to_matchable(),
582 ])
583 .to_matchable(),
584 ])
585 .to_matchable()
586 .into(),
587 ),
588 ]);
589
590 databricks.replace_grammar(
592 "ObjectReferenceSegment",
593 Delimited::new(vec![
594 one_of(vec![
595 Ref::new("SingleIdentifierGrammar").to_matchable(),
596 Ref::new("IdentifierClauseSegment").to_matchable(),
597 ])
598 .to_matchable(),
599 Ref::new("ObjectReferenceDelimiterGrammar").to_matchable(),
600 ])
601 .config(|config| {
602 config.delimiter(Ref::new("ObjectReferenceDelimiterGrammar"));
603 config.terminators = vec![Ref::new("ObjectReferenceTerminatorGrammar").to_matchable()];
604 config.disallow_gaps();
605 })
606 .to_matchable(),
607 );
608
609 databricks.replace_grammar(
612 "TableExpressionSegment",
613 sparksql::dialect()
614 .grammar("TableExpressionSegment")
615 .match_grammar(&databricks)
616 .unwrap()
617 .copy(
618 Some(vec![Ref::new("IdentifierClauseSegment").to_matchable()]),
619 None,
620 Some(Ref::new("ValuesClauseSegment").to_matchable()),
621 None,
622 Vec::new(),
623 false,
624 ),
625 );
626
627 databricks.replace_grammar(
629 "StatementSegment",
630 raw_sparksql
631 .grammar("StatementSegment")
632 .match_grammar(&databricks)
633 .unwrap()
634 .copy(
635 Some(vec![
636 Ref::new("AlterCatalogStatementSegment").to_matchable(),
637 Ref::new("CreateCatalogStatementSegment").to_matchable(),
638 Ref::new("DropCatalogStatementSegment").to_matchable(),
639 Ref::new("UseCatalogStatementSegment").to_matchable(),
640 Ref::new("DropVolumeStatementSegment").to_matchable(),
641 Ref::new("SetTimeZoneStatementSegment").to_matchable(),
642 Ref::new("OptimizeTableStatementSegment").to_matchable(),
643 Ref::new("CommentOnStatementSegment").to_matchable(),
644 Ref::new("DeclareOrReplaceVariableStatementSegment").to_matchable(),
645 ]),
646 None,
647 None,
648 None,
649 Vec::new(),
650 false,
651 ),
652 );
653
654 databricks.replace_grammar(
657 "GroupByClauseSegment",
658 Sequence::new(vec![
659 Ref::keyword("GROUP").to_matchable(),
660 Ref::keyword("BY").to_matchable(),
661 MetaSegment::indent().to_matchable(),
662 one_of(vec![
663 Ref::keyword("ALL").to_matchable(),
664 Delimited::new(vec![
665 Ref::new("CubeRollupClauseSegment").to_matchable(),
666 Ref::new("GroupingSetsClauseSegment").to_matchable(),
667 Ref::new("ColumnReferenceSegment").to_matchable(),
668 Ref::new("NumericLiteralSegment").optional().to_matchable(),
670 Ref::new("ExpressionSegment").optional().to_matchable(),
672 ])
673 .to_matchable(),
674 Sequence::new(vec![
675 Delimited::new(vec![
676 Ref::new("ColumnReferenceSegment").to_matchable(),
677 Ref::new("NumericLiteralSegment").optional().to_matchable(),
679 Ref::new("ExpressionSegment").optional().to_matchable(),
681 ])
682 .to_matchable(),
683 one_of(vec![
684 Ref::new("WithCubeRollupClauseSegment").to_matchable(),
685 Ref::new("GroupingSetsClauseSegment").to_matchable(),
686 ])
687 .to_matchable(),
688 ])
689 .to_matchable(),
690 ])
691 .to_matchable(),
692 MetaSegment::dedent().to_matchable(),
693 ])
694 .to_matchable(),
695 );
696
697 databricks.expand();
698 databricks
699}