1use crate::databricks_keywords::{RESERVED_KEYWORDS, UNRESERVED_KEYWORDS};
2use crate::sparksql;
3use sqruff_lib_core::dialects::init::DialectConfig;
4use sqruff_lib_core::helpers::Config;
5use sqruff_lib_core::parser::grammar::anyof::one_of;
6use sqruff_lib_core::parser::grammar::delimited::Delimited;
7use sqruff_lib_core::parser::grammar::sequence::Bracketed;
8use sqruff_lib_core::parser::matchable::MatchableTrait;
9use sqruff_lib_core::parser::segments::meta::MetaSegment;
10use sqruff_lib_core::{
11 dialects::{Dialect, init::DialectKind},
12 helpers::ToMatchable,
13 parser::grammar::{Ref, sequence::Sequence},
14 value::Value,
15};
16
17sqruff_lib_core::dialect_config!(DatabricksDialectConfig {});
18
19pub fn dialect(config: Option<&Value>) -> Dialect {
20 let _dialect_config: DatabricksDialectConfig = config
22 .map(DatabricksDialectConfig::from_value)
23 .unwrap_or_default();
24 let raw_sparksql = sparksql::raw_dialect();
25
26 let mut databricks = sparksql::raw_dialect();
27 databricks.name = DialectKind::Databricks;
28
29 databricks
30 .sets_mut("unreserved_keywords")
31 .extend(UNRESERVED_KEYWORDS);
32 databricks
33 .sets_mut("unreserved_keywords")
34 .extend(raw_sparksql.sets("reserved_keywords"));
35 databricks
36 .sets_mut("unreserved_keywords")
37 .retain(|x| !RESERVED_KEYWORDS.contains(x));
38 databricks.sets_mut("reserved_keywords").clear();
39 databricks
40 .sets_mut("reserved_keywords")
41 .extend(RESERVED_KEYWORDS);
42
43 databricks
44 .sets_mut("date_part_function_name")
45 .extend(["TIMEDIFF"]);
46
47 databricks.add([
48 (
49 "PrincipalIdentifierSegment".into(),
50 one_of(vec![
51 Ref::new("NakedIdentifierSegment").to_matchable(),
52 Ref::new("BackQuotedIdentifierSegment").to_matchable(),
53 ])
54 .to_matchable()
55 .into(),
56 ),
57 (
58 "SetOwnerGrammar".into(),
59 Sequence::new(vec![
60 Ref::keyword("SET").optional().to_matchable(),
61 Ref::keyword("OWNER").to_matchable(),
62 Ref::keyword("TO").to_matchable(),
63 Ref::new("PrincipalIdentifierSegment").to_matchable(),
64 ])
65 .to_matchable()
66 .into(),
67 ),
68 (
71 "CatalogReferenceSegment".into(),
72 Ref::new("ObjectReferenceSegment").to_matchable().into(),
73 ),
74 (
77 "AlterCatalogStatementSegment".into(),
78 Sequence::new(vec![
79 Ref::keyword("ALTER").to_matchable(),
80 Ref::keyword("CATALOG").to_matchable(),
81 Ref::new("CatalogReferenceSegment").to_matchable(),
82 Ref::new("SetOwnerGrammar").to_matchable(),
83 ])
84 .to_matchable()
85 .into(),
86 ),
87 (
90 "CreateCatalogStatementSegment".into(),
91 Sequence::new(vec![
92 Ref::keyword("CREATE").to_matchable(),
93 Ref::keyword("CATALOG").to_matchable(),
94 Ref::new("IfNotExistsGrammar").optional().to_matchable(),
95 Ref::new("CatalogReferenceSegment").to_matchable(),
96 Ref::new("CommentGrammar").optional().to_matchable(),
97 ])
98 .to_matchable()
99 .into(),
100 ),
101 (
104 "DropCatalogStatementSegment".into(),
105 Sequence::new(vec![
106 Ref::keyword("DROP").to_matchable(),
107 Ref::keyword("CATALOG").to_matchable(),
108 Ref::new("IfExistsGrammar").optional().to_matchable(),
109 Ref::new("CatalogReferenceSegment").to_matchable(),
110 Ref::new("DropBehaviorGrammar").optional().to_matchable(),
111 ])
112 .to_matchable()
113 .into(),
114 ),
115 (
118 "UseCatalogStatementSegment".into(),
119 Sequence::new(vec![
120 Ref::keyword("USE").to_matchable(),
121 Ref::keyword("CATALOG").to_matchable(),
122 Ref::new("CatalogReferenceSegment").to_matchable(),
123 ])
124 .to_matchable()
125 .into(),
126 ),
127 (
130 "UseDatabaseStatementSegment".into(),
131 Sequence::new(vec![
132 Ref::keyword("USE").to_matchable(),
133 one_of(vec![
134 Ref::keyword("DATABASE").to_matchable(),
135 Ref::keyword("SCHEMA").to_matchable(),
136 ])
137 .config(|config| {
138 config.optional();
139 })
140 .to_matchable(),
141 Ref::new("DatabaseReferenceSegment").to_matchable(),
142 ])
143 .to_matchable()
144 .into(),
145 ),
146 (
149 "SetTimeZoneStatementSegment".into(),
150 Sequence::new(vec![
151 Ref::keyword("SET").to_matchable(),
152 Ref::keyword("TIME").to_matchable(),
153 Ref::keyword("ZONE").to_matchable(),
154 one_of(vec![
155 Ref::keyword("LOCAL").to_matchable(),
156 Ref::new("QuotedLiteralSegment").to_matchable(),
157 Ref::new("IntervalExpressionSegment").to_matchable(),
158 ])
159 .to_matchable(),
160 ])
161 .to_matchable()
162 .into(),
163 ),
164 (
167 "OptimizeTableStatementSegment".into(),
168 Sequence::new(vec![
169 Ref::keyword("OPTIMIZE").to_matchable(),
170 Ref::new("TableReferenceSegment").to_matchable(),
171 Sequence::new(vec![
172 Ref::keyword("WHERE").to_matchable(),
173 Ref::new("ExpressionSegment").to_matchable(),
174 ])
175 .config(|config| {
176 config.optional();
177 })
178 .to_matchable(),
179 Sequence::new(vec![
180 Ref::keyword("ZORDER").to_matchable(),
181 Ref::keyword("BY").to_matchable(),
182 Bracketed::new(vec![
183 Delimited::new(vec![Ref::new("ColumnReferenceSegment").to_matchable()])
184 .to_matchable(),
185 ])
186 .to_matchable(),
187 ])
188 .config(|config| {
189 config.optional();
190 })
191 .to_matchable(),
192 ])
193 .to_matchable()
194 .into(),
195 ),
196 (
197 "DatabaseReferenceSegment".into(),
199 Ref::new("ObjectReferenceSegment").to_matchable().into(),
200 ),
201 (
202 "TableReferenceSegment".into(),
204 Ref::new("ObjectReferenceSegment").to_matchable().into(),
205 ),
206 (
207 "SchemaReferenceSegment".into(),
209 Ref::new("ObjectReferenceSegment").to_matchable().into(),
210 ),
211 (
212 "IdentifierClauseSegment".into(),
213 Sequence::new(vec![
214 Ref::keyword("IDENTIFIER").to_matchable(),
215 Bracketed::new(vec![Ref::new("SingleIdentifierGrammar").to_matchable()])
216 .to_matchable(),
217 ])
218 .to_matchable()
219 .into(),
220 ),
221 (
222 "DropVolumeStatementSegment".into(),
225 Sequence::new(vec![
226 Ref::keyword("DROP").to_matchable(),
227 Ref::keyword("VOLUME").to_matchable(),
228 Ref::new("IfExistsGrammar").optional().to_matchable(),
229 Ref::new("VolumeReferenceSegment").to_matchable(),
230 ])
231 .to_matchable()
232 .into(),
233 ),
234 (
235 "VolumeReferenceSegment".into(),
236 Ref::new("ObjectReferenceSegment").to_matchable().into(),
237 ),
238 (
239 "DescribeObjectGrammar".into(),
241 sparksql::dialect(None)
242 .grammar("DescribeObjectGrammar")
243 .copy(
244 Some(vec![
245 Sequence::new(vec![
246 Ref::keyword("VOLUME").to_matchable(),
247 Ref::new("VolumeReferenceSegment").to_matchable(),
248 ])
249 .to_matchable(),
250 ]),
251 Some(0),
252 None,
253 None,
254 Vec::new(),
255 false,
256 )
257 .into(),
258 ),
259 (
260 "DeclareOrReplaceVariableStatementSegment".into(),
263 Sequence::new(vec![
264 Ref::keyword("DECLARE").to_matchable(),
265 Ref::new("OrReplaceGrammar").optional().to_matchable(),
266 Ref::keyword("VARIABLE").optional().to_matchable(),
267 Ref::new("SingleIdentifierGrammar").to_matchable(),
268 Ref::new("DatatypeSegment").optional().to_matchable(),
269 Sequence::new(vec![
270 one_of(vec![
271 Ref::keyword("DEFAULT").to_matchable(),
272 Ref::new("EqualsSegment").to_matchable(),
273 ])
274 .to_matchable(),
275 Ref::new("ExpressionSegment").to_matchable(),
276 ])
277 .config(|config| {
278 config.optional();
279 })
280 .to_matchable(),
281 ])
282 .to_matchable()
283 .into(),
284 ),
285 (
288 "CommentOnStatementSegment".into(),
289 Sequence::new(vec![
290 Ref::keyword("COMMENT").to_matchable(),
291 Ref::keyword("ON").to_matchable(),
292 one_of(vec![
293 Sequence::new(vec![
294 Ref::keyword("CATALOG").to_matchable(),
295 Ref::new("CatalogReferenceSegment").to_matchable(),
296 ])
297 .to_matchable(),
298 Sequence::new(vec![
299 one_of(vec![
300 Ref::keyword("DATABASE").to_matchable(),
301 Ref::keyword("SCHEMA").to_matchable(),
302 ])
303 .to_matchable(),
304 Ref::new("DatabaseReferenceSegment").to_matchable(),
305 ])
306 .to_matchable(),
307 Sequence::new(vec![
308 Ref::keyword("TABLE").to_matchable(),
309 Ref::new("TableReferenceSegment").to_matchable(),
310 ])
311 .to_matchable(),
312 Sequence::new(vec![
313 Ref::keyword("VOLUME").to_matchable(),
314 Ref::new("VolumeReferenceSegment").to_matchable(),
315 ])
316 .to_matchable(),
317 Sequence::new(vec![
319 one_of(vec![
320 Ref::keyword("CONNECTION").to_matchable(),
321 Ref::keyword("PROVIDER").to_matchable(),
322 Ref::keyword("RECIPIENT").to_matchable(),
323 Ref::keyword("SHARE").to_matchable(),
324 ])
325 .to_matchable(),
326 Ref::new("ObjectReferenceSegment").to_matchable(),
327 ])
328 .to_matchable(),
329 ])
330 .to_matchable(),
331 Ref::keyword("IS").to_matchable(),
332 one_of(vec![
333 Ref::new("QuotedLiteralSegment").to_matchable(),
334 Ref::keyword("NULL").to_matchable(),
335 ])
336 .to_matchable(),
337 ])
338 .to_matchable()
339 .into(),
340 ),
341 (
346 "ShowDatabasesSchemasGrammar".into(),
347 Sequence::new(vec![
348 one_of(vec![
349 Ref::keyword("DATABASES").to_matchable(),
350 Ref::keyword("SCHEMAS").to_matchable(),
351 ])
352 .to_matchable(),
353 Sequence::new(vec![
354 one_of(vec![
355 Ref::keyword("FROM").to_matchable(),
356 Ref::keyword("IN").to_matchable(),
357 ])
358 .to_matchable(),
359 Ref::new("DatabaseReferenceSegment").to_matchable(),
360 ])
361 .config(|config| {
362 config.optional();
363 })
364 .to_matchable(),
365 Sequence::new(vec![
366 Ref::keyword("LIKE").optional().to_matchable(),
367 Ref::new("QuotedLiteralSegment").to_matchable(),
368 ])
369 .config(|config| {
370 config.optional();
371 })
372 .to_matchable(),
373 ])
374 .to_matchable()
375 .into(),
376 ),
377 (
382 "ShowDatabasesSchemasGrammar".into(),
383 Sequence::new(vec![
384 one_of(vec![
385 Ref::keyword("DATABASES").to_matchable(),
386 Ref::keyword("SCHEMAS").to_matchable(),
387 ])
388 .to_matchable(),
389 Sequence::new(vec![
390 one_of(vec![
391 Ref::keyword("FROM").to_matchable(),
392 Ref::keyword("IN").to_matchable(),
393 ])
394 .to_matchable(),
395 Ref::new("DatabaseReferenceSegment").to_matchable(),
396 ])
397 .config(|config| {
398 config.optional();
399 })
400 .to_matchable(),
401 Sequence::new(vec![
402 Ref::keyword("LIKE").optional().to_matchable(),
403 Ref::new("QuotedLiteralSegment").to_matchable(),
404 ])
405 .config(|config| {
406 config.optional();
407 })
408 .to_matchable(),
409 ])
410 .to_matchable()
411 .into(),
412 ),
413 (
422 "ShowFunctionsGrammar".into(),
423 Sequence::new(vec![
424 one_of(vec![
425 Ref::keyword("USER").to_matchable(),
426 Ref::keyword("SYSTEM").to_matchable(),
427 Ref::keyword("ALL").to_matchable(),
428 ])
429 .config(|config| {
430 config.optional();
431 })
432 .to_matchable(),
433 Ref::keyword("FUNCTIONS").to_matchable(),
434 Sequence::new(vec![
435 Sequence::new(vec![
436 one_of(vec![
437 Ref::keyword("FROM").to_matchable(),
438 Ref::keyword("IN").to_matchable(),
439 ])
440 .to_matchable(),
441 Ref::new("DatabaseReferenceSegment").to_matchable(),
442 ])
443 .config(|config| {
444 config.optional();
445 })
446 .to_matchable(),
447 Sequence::new(vec![
448 Ref::keyword("LIKE").optional().to_matchable(),
449 one_of(vec![
450 Sequence::new(vec![
452 Ref::new("DatabaseReferenceSegment").to_matchable(),
453 Ref::new("DotSegment").to_matchable(),
454 Ref::new("FunctionNameSegment").to_matchable(),
455 ])
456 .config(|config| {
457 config.disallow_gaps();
458 })
459 .to_matchable(),
460 Ref::new("FunctionNameSegment").to_matchable(),
462 Ref::new("QuotedLiteralSegment").to_matchable(),
464 ])
465 .to_matchable(),
466 ])
467 .config(|config| {
468 config.optional();
469 })
470 .to_matchable(),
471 ])
472 .config(|config| {
473 config.optional();
474 })
475 .to_matchable(),
476 ])
477 .to_matchable()
478 .into(),
479 ),
480 (
484 "ShowTablesGrammar".into(),
485 Sequence::new(vec![
486 Ref::keyword("TABLES").to_matchable(),
487 Sequence::new(vec![
488 one_of(vec![
489 Ref::keyword("FROM").to_matchable(),
490 Ref::keyword("IN").to_matchable(),
491 ])
492 .to_matchable(),
493 Ref::new("DatabaseReferenceSegment").to_matchable(),
494 ])
495 .config(|config| {
496 config.optional();
497 })
498 .to_matchable(),
499 Sequence::new(vec![
500 Ref::keyword("LIKE").optional().to_matchable(),
501 Ref::new("QuotedLiteralSegment").to_matchable(),
502 ])
503 .config(|config| {
504 config.optional();
505 })
506 .to_matchable(),
507 ])
508 .to_matchable()
509 .into(),
510 ),
511 (
515 "ShowViewsGrammar".into(),
516 Sequence::new(vec![
517 Ref::keyword("VIEWS").to_matchable(),
518 Sequence::new(vec![
519 one_of(vec![
520 Ref::keyword("FROM").to_matchable(),
521 Ref::keyword("IN").to_matchable(),
522 ])
523 .to_matchable(),
524 Ref::new("DatabaseReferenceSegment").to_matchable(),
525 ])
526 .config(|config| {
527 config.optional();
528 })
529 .to_matchable(),
530 Sequence::new(vec![
531 Ref::keyword("LIKE").optional().to_matchable(),
532 Ref::new("QuotedLiteralSegment").to_matchable(),
533 ])
534 .config(|config| {
535 config.optional();
536 })
537 .to_matchable(),
538 ])
539 .to_matchable()
540 .into(),
541 ),
542 (
544 "ShowObjectGrammar".into(),
545 sparksql::raw_dialect()
546 .grammar("ShowObjectGrammar")
547 .copy(
548 Some(vec![
549 Sequence::new(vec![
550 Ref::keyword("VOLUMES").to_matchable(),
551 Sequence::new(vec![
552 one_of(vec![
553 Ref::keyword("FROM").to_matchable(),
554 Ref::keyword("IN").to_matchable(),
555 ])
556 .to_matchable(),
557 Ref::new("DatabaseReferenceSegment").to_matchable(),
558 ])
559 .config(|config| {
560 config.optional();
561 })
562 .to_matchable(),
563 Sequence::new(vec![
564 Ref::keyword("LIKE").optional().to_matchable(),
565 Ref::new("QuotedLiteralSegment").to_matchable(),
566 ])
567 .config(|config| {
568 config.optional();
569 })
570 .to_matchable(),
571 ])
572 .to_matchable(),
573 ]),
574 None,
575 None,
576 None,
577 Vec::new(),
578 false,
579 )
580 .into(),
581 ),
582 (
584 "InsertBracketedColumnReferenceListGrammar".into(),
585 one_of(vec![
586 Ref::new("BracketedColumnReferenceListGrammar").to_matchable(),
587 Sequence::new(vec![
588 Ref::keyword("BY").to_matchable(),
589 Ref::keyword("NAME").to_matchable(),
590 ])
591 .to_matchable(),
592 ])
593 .to_matchable()
594 .into(),
595 ),
596 ]);
597
598 databricks.replace_grammar(
600 "ObjectReferenceSegment",
601 Delimited::new(vec![
602 one_of(vec![
603 Ref::new("SingleIdentifierGrammar").to_matchable(),
604 Ref::new("IdentifierClauseSegment").to_matchable(),
605 ])
606 .to_matchable(),
607 Ref::new("ObjectReferenceDelimiterGrammar").to_matchable(),
608 ])
609 .config(|config| {
610 config.delimiter(Ref::new("ObjectReferenceDelimiterGrammar"));
611 config.terminators = vec![Ref::new("ObjectReferenceTerminatorGrammar").to_matchable()];
612 config.disallow_gaps();
613 })
614 .to_matchable(),
615 );
616
617 databricks.replace_grammar(
620 "TableExpressionSegment",
621 sparksql::dialect(None)
622 .grammar("TableExpressionSegment")
623 .match_grammar(&databricks)
624 .unwrap()
625 .copy(
626 Some(vec![Ref::new("IdentifierClauseSegment").to_matchable()]),
627 None,
628 Some(Ref::new("ValuesClauseSegment").to_matchable()),
629 None,
630 Vec::new(),
631 false,
632 ),
633 );
634
635 databricks.replace_grammar(
637 "StatementSegment",
638 raw_sparksql
639 .grammar("StatementSegment")
640 .match_grammar(&databricks)
641 .unwrap()
642 .copy(
643 Some(vec![
644 Ref::new("AlterCatalogStatementSegment").to_matchable(),
645 Ref::new("CreateCatalogStatementSegment").to_matchable(),
646 Ref::new("DropCatalogStatementSegment").to_matchable(),
647 Ref::new("UseCatalogStatementSegment").to_matchable(),
648 Ref::new("DropVolumeStatementSegment").to_matchable(),
649 Ref::new("SetTimeZoneStatementSegment").to_matchable(),
650 Ref::new("OptimizeTableStatementSegment").to_matchable(),
651 Ref::new("CommentOnStatementSegment").to_matchable(),
652 Ref::new("DeclareOrReplaceVariableStatementSegment").to_matchable(),
653 ]),
654 None,
655 None,
656 None,
657 Vec::new(),
658 false,
659 ),
660 );
661
662 databricks.replace_grammar(
665 "GroupByClauseSegment",
666 Sequence::new(vec![
667 Ref::keyword("GROUP").to_matchable(),
668 Ref::keyword("BY").to_matchable(),
669 MetaSegment::indent().to_matchable(),
670 one_of(vec![
671 Ref::keyword("ALL").to_matchable(),
672 Delimited::new(vec![
673 Ref::new("CubeRollupClauseSegment").to_matchable(),
674 Ref::new("GroupingSetsClauseSegment").to_matchable(),
675 Ref::new("ColumnReferenceSegment").to_matchable(),
676 Ref::new("NumericLiteralSegment").optional().to_matchable(),
678 Ref::new("ExpressionSegment").optional().to_matchable(),
680 ])
681 .to_matchable(),
682 Sequence::new(vec![
683 Delimited::new(vec![
684 Ref::new("ColumnReferenceSegment").to_matchable(),
685 Ref::new("NumericLiteralSegment").optional().to_matchable(),
687 Ref::new("ExpressionSegment").optional().to_matchable(),
689 ])
690 .to_matchable(),
691 one_of(vec![
692 Ref::new("WithCubeRollupClauseSegment").to_matchable(),
693 Ref::new("GroupingSetsClauseSegment").to_matchable(),
694 ])
695 .to_matchable(),
696 ])
697 .to_matchable(),
698 ])
699 .to_matchable(),
700 MetaSegment::dedent().to_matchable(),
701 ])
702 .to_matchable(),
703 );
704
705 databricks.expand();
706 databricks
707}