1use crate::databricks_keywords::{RESERVED_KEYWORDS, UNRESERVED_KEYWORDS};
2use crate::sparksql;
3use sqruff_lib_core::dialects::init::{DialectConfig, NullDialectConfig};
4use sqruff_lib_core::helpers::Config;
5use sqruff_lib_core::parser::grammar::anyof::one_of;
6use sqruff_lib_core::parser::grammar::delimited::Delimited;
7use sqruff_lib_core::parser::grammar::sequence::Bracketed;
8use sqruff_lib_core::parser::matchable::MatchableTrait;
9use sqruff_lib_core::parser::segments::meta::MetaSegment;
10use sqruff_lib_core::{
11 dialects::{Dialect, init::DialectKind},
12 helpers::ToMatchable,
13 parser::grammar::{Ref, sequence::Sequence},
14 value::Value,
15};
16
17pub type DatabricksDialectConfig = NullDialectConfig;
19
20pub fn dialect(config: Option<&Value>) -> Dialect {
21 let _dialect_config: DatabricksDialectConfig = config
23 .map(DatabricksDialectConfig::from_value)
24 .unwrap_or_default();
25 let raw_sparksql = sparksql::raw_dialect();
26
27 let mut databricks = sparksql::raw_dialect();
28 databricks.name = DialectKind::Databricks;
29
30 databricks
31 .sets_mut("unreserved_keywords")
32 .extend(UNRESERVED_KEYWORDS);
33 databricks
34 .sets_mut("unreserved_keywords")
35 .extend(raw_sparksql.sets("reserved_keywords"));
36 databricks
37 .sets_mut("unreserved_keywords")
38 .retain(|x| !RESERVED_KEYWORDS.contains(x));
39 databricks.sets_mut("reserved_keywords").clear();
40 databricks
41 .sets_mut("reserved_keywords")
42 .extend(RESERVED_KEYWORDS);
43
44 databricks
45 .sets_mut("date_part_function_name")
46 .extend(["TIMEDIFF"]);
47
48 databricks.add([
49 (
50 "PrincipalIdentifierSegment".into(),
51 one_of(vec![
52 Ref::new("NakedIdentifierSegment").to_matchable(),
53 Ref::new("BackQuotedIdentifierSegment").to_matchable(),
54 ])
55 .to_matchable()
56 .into(),
57 ),
58 (
59 "SetOwnerGrammar".into(),
60 Sequence::new(vec![
61 Ref::keyword("SET").optional().to_matchable(),
62 Ref::keyword("OWNER").to_matchable(),
63 Ref::keyword("TO").to_matchable(),
64 Ref::new("PrincipalIdentifierSegment").to_matchable(),
65 ])
66 .to_matchable()
67 .into(),
68 ),
69 (
72 "CatalogReferenceSegment".into(),
73 Ref::new("ObjectReferenceSegment").to_matchable().into(),
74 ),
75 (
78 "AlterCatalogStatementSegment".into(),
79 Sequence::new(vec![
80 Ref::keyword("ALTER").to_matchable(),
81 Ref::keyword("CATALOG").to_matchable(),
82 Ref::new("CatalogReferenceSegment").to_matchable(),
83 Ref::new("SetOwnerGrammar").to_matchable(),
84 ])
85 .to_matchable()
86 .into(),
87 ),
88 (
91 "CreateCatalogStatementSegment".into(),
92 Sequence::new(vec![
93 Ref::keyword("CREATE").to_matchable(),
94 Ref::keyword("CATALOG").to_matchable(),
95 Ref::new("IfNotExistsGrammar").optional().to_matchable(),
96 Ref::new("CatalogReferenceSegment").to_matchable(),
97 Ref::new("CommentGrammar").optional().to_matchable(),
98 ])
99 .to_matchable()
100 .into(),
101 ),
102 (
105 "DropCatalogStatementSegment".into(),
106 Sequence::new(vec![
107 Ref::keyword("DROP").to_matchable(),
108 Ref::keyword("CATALOG").to_matchable(),
109 Ref::new("IfExistsGrammar").optional().to_matchable(),
110 Ref::new("CatalogReferenceSegment").to_matchable(),
111 Ref::new("DropBehaviorGrammar").optional().to_matchable(),
112 ])
113 .to_matchable()
114 .into(),
115 ),
116 (
119 "UseCatalogStatementSegment".into(),
120 Sequence::new(vec![
121 Ref::keyword("USE").to_matchable(),
122 Ref::keyword("CATALOG").to_matchable(),
123 Ref::new("CatalogReferenceSegment").to_matchable(),
124 ])
125 .to_matchable()
126 .into(),
127 ),
128 (
131 "UseDatabaseStatementSegment".into(),
132 Sequence::new(vec![
133 Ref::keyword("USE").to_matchable(),
134 one_of(vec![
135 Ref::keyword("DATABASE").to_matchable(),
136 Ref::keyword("SCHEMA").to_matchable(),
137 ])
138 .config(|config| {
139 config.optional();
140 })
141 .to_matchable(),
142 Ref::new("DatabaseReferenceSegment").to_matchable(),
143 ])
144 .to_matchable()
145 .into(),
146 ),
147 (
150 "SetTimeZoneStatementSegment".into(),
151 Sequence::new(vec![
152 Ref::keyword("SET").to_matchable(),
153 Ref::keyword("TIME").to_matchable(),
154 Ref::keyword("ZONE").to_matchable(),
155 one_of(vec![
156 Ref::keyword("LOCAL").to_matchable(),
157 Ref::new("QuotedLiteralSegment").to_matchable(),
158 Ref::new("IntervalExpressionSegment").to_matchable(),
159 ])
160 .to_matchable(),
161 ])
162 .to_matchable()
163 .into(),
164 ),
165 (
168 "OptimizeTableStatementSegment".into(),
169 Sequence::new(vec![
170 Ref::keyword("OPTIMIZE").to_matchable(),
171 Ref::new("TableReferenceSegment").to_matchable(),
172 Sequence::new(vec![
173 Ref::keyword("WHERE").to_matchable(),
174 Ref::new("ExpressionSegment").to_matchable(),
175 ])
176 .config(|config| {
177 config.optional();
178 })
179 .to_matchable(),
180 Sequence::new(vec![
181 Ref::keyword("ZORDER").to_matchable(),
182 Ref::keyword("BY").to_matchable(),
183 Bracketed::new(vec![
184 Delimited::new(vec![Ref::new("ColumnReferenceSegment").to_matchable()])
185 .to_matchable(),
186 ])
187 .to_matchable(),
188 ])
189 .config(|config| {
190 config.optional();
191 })
192 .to_matchable(),
193 ])
194 .to_matchable()
195 .into(),
196 ),
197 (
198 "DatabaseReferenceSegment".into(),
200 Ref::new("ObjectReferenceSegment").to_matchable().into(),
201 ),
202 (
203 "TableReferenceSegment".into(),
205 Ref::new("ObjectReferenceSegment").to_matchable().into(),
206 ),
207 (
208 "SchemaReferenceSegment".into(),
210 Ref::new("ObjectReferenceSegment").to_matchable().into(),
211 ),
212 (
213 "IdentifierClauseSegment".into(),
214 Sequence::new(vec![
215 Ref::keyword("IDENTIFIER").to_matchable(),
216 Bracketed::new(vec![Ref::new("SingleIdentifierGrammar").to_matchable()])
217 .to_matchable(),
218 ])
219 .to_matchable()
220 .into(),
221 ),
222 (
223 "DropVolumeStatementSegment".into(),
226 Sequence::new(vec![
227 Ref::keyword("DROP").to_matchable(),
228 Ref::keyword("VOLUME").to_matchable(),
229 Ref::new("IfExistsGrammar").optional().to_matchable(),
230 Ref::new("VolumeReferenceSegment").to_matchable(),
231 ])
232 .to_matchable()
233 .into(),
234 ),
235 (
236 "VolumeReferenceSegment".into(),
237 Ref::new("ObjectReferenceSegment").to_matchable().into(),
238 ),
239 (
240 "DescribeObjectGrammar".into(),
242 sparksql::dialect(None)
243 .grammar("DescribeObjectGrammar")
244 .copy(
245 Some(vec![
246 Sequence::new(vec![
247 Ref::keyword("VOLUME").to_matchable(),
248 Ref::new("VolumeReferenceSegment").to_matchable(),
249 ])
250 .to_matchable(),
251 ]),
252 Some(0),
253 None,
254 None,
255 Vec::new(),
256 false,
257 )
258 .into(),
259 ),
260 (
261 "DeclareOrReplaceVariableStatementSegment".into(),
264 Sequence::new(vec![
265 Ref::keyword("DECLARE").to_matchable(),
266 Ref::new("OrReplaceGrammar").optional().to_matchable(),
267 Ref::keyword("VARIABLE").optional().to_matchable(),
268 Ref::new("SingleIdentifierGrammar").to_matchable(),
269 Ref::new("DatatypeSegment").optional().to_matchable(),
270 Sequence::new(vec![
271 one_of(vec![
272 Ref::keyword("DEFAULT").to_matchable(),
273 Ref::new("EqualsSegment").to_matchable(),
274 ])
275 .to_matchable(),
276 Ref::new("ExpressionSegment").to_matchable(),
277 ])
278 .config(|config| {
279 config.optional();
280 })
281 .to_matchable(),
282 ])
283 .to_matchable()
284 .into(),
285 ),
286 (
289 "CommentOnStatementSegment".into(),
290 Sequence::new(vec![
291 Ref::keyword("COMMENT").to_matchable(),
292 Ref::keyword("ON").to_matchable(),
293 one_of(vec![
294 Sequence::new(vec![
295 Ref::keyword("CATALOG").to_matchable(),
296 Ref::new("CatalogReferenceSegment").to_matchable(),
297 ])
298 .to_matchable(),
299 Sequence::new(vec![
300 one_of(vec![
301 Ref::keyword("DATABASE").to_matchable(),
302 Ref::keyword("SCHEMA").to_matchable(),
303 ])
304 .to_matchable(),
305 Ref::new("DatabaseReferenceSegment").to_matchable(),
306 ])
307 .to_matchable(),
308 Sequence::new(vec![
309 Ref::keyword("TABLE").to_matchable(),
310 Ref::new("TableReferenceSegment").to_matchable(),
311 ])
312 .to_matchable(),
313 Sequence::new(vec![
314 Ref::keyword("VOLUME").to_matchable(),
315 Ref::new("VolumeReferenceSegment").to_matchable(),
316 ])
317 .to_matchable(),
318 Sequence::new(vec![
320 one_of(vec![
321 Ref::keyword("CONNECTION").to_matchable(),
322 Ref::keyword("PROVIDER").to_matchable(),
323 Ref::keyword("RECIPIENT").to_matchable(),
324 Ref::keyword("SHARE").to_matchable(),
325 ])
326 .to_matchable(),
327 Ref::new("ObjectReferenceSegment").to_matchable(),
328 ])
329 .to_matchable(),
330 ])
331 .to_matchable(),
332 Ref::keyword("IS").to_matchable(),
333 one_of(vec![
334 Ref::new("QuotedLiteralSegment").to_matchable(),
335 Ref::keyword("NULL").to_matchable(),
336 ])
337 .to_matchable(),
338 ])
339 .to_matchable()
340 .into(),
341 ),
342 (
347 "ShowDatabasesSchemasGrammar".into(),
348 Sequence::new(vec![
349 one_of(vec![
350 Ref::keyword("DATABASES").to_matchable(),
351 Ref::keyword("SCHEMAS").to_matchable(),
352 ])
353 .to_matchable(),
354 Sequence::new(vec![
355 one_of(vec![
356 Ref::keyword("FROM").to_matchable(),
357 Ref::keyword("IN").to_matchable(),
358 ])
359 .to_matchable(),
360 Ref::new("DatabaseReferenceSegment").to_matchable(),
361 ])
362 .config(|config| {
363 config.optional();
364 })
365 .to_matchable(),
366 Sequence::new(vec![
367 Ref::keyword("LIKE").optional().to_matchable(),
368 Ref::new("QuotedLiteralSegment").to_matchable(),
369 ])
370 .config(|config| {
371 config.optional();
372 })
373 .to_matchable(),
374 ])
375 .to_matchable()
376 .into(),
377 ),
378 (
383 "ShowDatabasesSchemasGrammar".into(),
384 Sequence::new(vec![
385 one_of(vec![
386 Ref::keyword("DATABASES").to_matchable(),
387 Ref::keyword("SCHEMAS").to_matchable(),
388 ])
389 .to_matchable(),
390 Sequence::new(vec![
391 one_of(vec![
392 Ref::keyword("FROM").to_matchable(),
393 Ref::keyword("IN").to_matchable(),
394 ])
395 .to_matchable(),
396 Ref::new("DatabaseReferenceSegment").to_matchable(),
397 ])
398 .config(|config| {
399 config.optional();
400 })
401 .to_matchable(),
402 Sequence::new(vec![
403 Ref::keyword("LIKE").optional().to_matchable(),
404 Ref::new("QuotedLiteralSegment").to_matchable(),
405 ])
406 .config(|config| {
407 config.optional();
408 })
409 .to_matchable(),
410 ])
411 .to_matchable()
412 .into(),
413 ),
414 (
423 "ShowFunctionsGrammar".into(),
424 Sequence::new(vec![
425 one_of(vec![
426 Ref::keyword("USER").to_matchable(),
427 Ref::keyword("SYSTEM").to_matchable(),
428 Ref::keyword("ALL").to_matchable(),
429 ])
430 .config(|config| {
431 config.optional();
432 })
433 .to_matchable(),
434 Ref::keyword("FUNCTIONS").to_matchable(),
435 Sequence::new(vec![
436 Sequence::new(vec![
437 one_of(vec![
438 Ref::keyword("FROM").to_matchable(),
439 Ref::keyword("IN").to_matchable(),
440 ])
441 .to_matchable(),
442 Ref::new("DatabaseReferenceSegment").to_matchable(),
443 ])
444 .config(|config| {
445 config.optional();
446 })
447 .to_matchable(),
448 Sequence::new(vec![
449 Ref::keyword("LIKE").optional().to_matchable(),
450 one_of(vec![
451 Sequence::new(vec![
453 Ref::new("DatabaseReferenceSegment").to_matchable(),
454 Ref::new("DotSegment").to_matchable(),
455 Ref::new("FunctionNameSegment").to_matchable(),
456 ])
457 .config(|config| {
458 config.disallow_gaps();
459 })
460 .to_matchable(),
461 Ref::new("FunctionNameSegment").to_matchable(),
463 Ref::new("QuotedLiteralSegment").to_matchable(),
465 ])
466 .to_matchable(),
467 ])
468 .config(|config| {
469 config.optional();
470 })
471 .to_matchable(),
472 ])
473 .config(|config| {
474 config.optional();
475 })
476 .to_matchable(),
477 ])
478 .to_matchable()
479 .into(),
480 ),
481 (
485 "ShowTablesGrammar".into(),
486 Sequence::new(vec![
487 Ref::keyword("TABLES").to_matchable(),
488 Sequence::new(vec![
489 one_of(vec![
490 Ref::keyword("FROM").to_matchable(),
491 Ref::keyword("IN").to_matchable(),
492 ])
493 .to_matchable(),
494 Ref::new("DatabaseReferenceSegment").to_matchable(),
495 ])
496 .config(|config| {
497 config.optional();
498 })
499 .to_matchable(),
500 Sequence::new(vec![
501 Ref::keyword("LIKE").optional().to_matchable(),
502 Ref::new("QuotedLiteralSegment").to_matchable(),
503 ])
504 .config(|config| {
505 config.optional();
506 })
507 .to_matchable(),
508 ])
509 .to_matchable()
510 .into(),
511 ),
512 (
516 "ShowViewsGrammar".into(),
517 Sequence::new(vec![
518 Ref::keyword("VIEWS").to_matchable(),
519 Sequence::new(vec![
520 one_of(vec![
521 Ref::keyword("FROM").to_matchable(),
522 Ref::keyword("IN").to_matchable(),
523 ])
524 .to_matchable(),
525 Ref::new("DatabaseReferenceSegment").to_matchable(),
526 ])
527 .config(|config| {
528 config.optional();
529 })
530 .to_matchable(),
531 Sequence::new(vec![
532 Ref::keyword("LIKE").optional().to_matchable(),
533 Ref::new("QuotedLiteralSegment").to_matchable(),
534 ])
535 .config(|config| {
536 config.optional();
537 })
538 .to_matchable(),
539 ])
540 .to_matchable()
541 .into(),
542 ),
543 (
545 "ShowObjectGrammar".into(),
546 sparksql::raw_dialect()
547 .grammar("ShowObjectGrammar")
548 .copy(
549 Some(vec![
550 Sequence::new(vec![
551 Ref::keyword("VOLUMES").to_matchable(),
552 Sequence::new(vec![
553 one_of(vec![
554 Ref::keyword("FROM").to_matchable(),
555 Ref::keyword("IN").to_matchable(),
556 ])
557 .to_matchable(),
558 Ref::new("DatabaseReferenceSegment").to_matchable(),
559 ])
560 .config(|config| {
561 config.optional();
562 })
563 .to_matchable(),
564 Sequence::new(vec![
565 Ref::keyword("LIKE").optional().to_matchable(),
566 Ref::new("QuotedLiteralSegment").to_matchable(),
567 ])
568 .config(|config| {
569 config.optional();
570 })
571 .to_matchable(),
572 ])
573 .to_matchable(),
574 ]),
575 None,
576 None,
577 None,
578 Vec::new(),
579 false,
580 )
581 .into(),
582 ),
583 (
585 "InsertBracketedColumnReferenceListGrammar".into(),
586 one_of(vec![
587 Ref::new("BracketedColumnReferenceListGrammar").to_matchable(),
588 Sequence::new(vec![
589 Ref::keyword("BY").to_matchable(),
590 Ref::keyword("NAME").to_matchable(),
591 ])
592 .to_matchable(),
593 ])
594 .to_matchable()
595 .into(),
596 ),
597 ]);
598
599 databricks.replace_grammar(
601 "ObjectReferenceSegment",
602 Delimited::new(vec![
603 one_of(vec![
604 Ref::new("SingleIdentifierGrammar").to_matchable(),
605 Ref::new("IdentifierClauseSegment").to_matchable(),
606 ])
607 .to_matchable(),
608 Ref::new("ObjectReferenceDelimiterGrammar").to_matchable(),
609 ])
610 .config(|config| {
611 config.delimiter(Ref::new("ObjectReferenceDelimiterGrammar"));
612 config.terminators = vec![Ref::new("ObjectReferenceTerminatorGrammar").to_matchable()];
613 config.disallow_gaps();
614 })
615 .to_matchable(),
616 );
617
618 databricks.replace_grammar(
621 "TableExpressionSegment",
622 sparksql::dialect(None)
623 .grammar("TableExpressionSegment")
624 .match_grammar(&databricks)
625 .unwrap()
626 .copy(
627 Some(vec![Ref::new("IdentifierClauseSegment").to_matchable()]),
628 None,
629 Some(Ref::new("ValuesClauseSegment").to_matchable()),
630 None,
631 Vec::new(),
632 false,
633 ),
634 );
635
636 databricks.replace_grammar(
638 "StatementSegment",
639 raw_sparksql
640 .grammar("StatementSegment")
641 .match_grammar(&databricks)
642 .unwrap()
643 .copy(
644 Some(vec![
645 Ref::new("AlterCatalogStatementSegment").to_matchable(),
646 Ref::new("CreateCatalogStatementSegment").to_matchable(),
647 Ref::new("DropCatalogStatementSegment").to_matchable(),
648 Ref::new("UseCatalogStatementSegment").to_matchable(),
649 Ref::new("DropVolumeStatementSegment").to_matchable(),
650 Ref::new("SetTimeZoneStatementSegment").to_matchable(),
651 Ref::new("OptimizeTableStatementSegment").to_matchable(),
652 Ref::new("CommentOnStatementSegment").to_matchable(),
653 Ref::new("DeclareOrReplaceVariableStatementSegment").to_matchable(),
654 ]),
655 None,
656 None,
657 None,
658 Vec::new(),
659 false,
660 ),
661 );
662
663 databricks.replace_grammar(
666 "GroupByClauseSegment",
667 Sequence::new(vec![
668 Ref::keyword("GROUP").to_matchable(),
669 Ref::keyword("BY").to_matchable(),
670 MetaSegment::indent().to_matchable(),
671 one_of(vec![
672 Ref::keyword("ALL").to_matchable(),
673 Delimited::new(vec![
674 Ref::new("CubeRollupClauseSegment").to_matchable(),
675 Ref::new("GroupingSetsClauseSegment").to_matchable(),
676 Ref::new("ColumnReferenceSegment").to_matchable(),
677 Ref::new("NumericLiteralSegment").optional().to_matchable(),
679 Ref::new("ExpressionSegment").optional().to_matchable(),
681 ])
682 .to_matchable(),
683 Sequence::new(vec![
684 Delimited::new(vec![
685 Ref::new("ColumnReferenceSegment").to_matchable(),
686 Ref::new("NumericLiteralSegment").optional().to_matchable(),
688 Ref::new("ExpressionSegment").optional().to_matchable(),
690 ])
691 .to_matchable(),
692 one_of(vec![
693 Ref::new("WithCubeRollupClauseSegment").to_matchable(),
694 Ref::new("GroupingSetsClauseSegment").to_matchable(),
695 ])
696 .to_matchable(),
697 ])
698 .to_matchable(),
699 ])
700 .to_matchable(),
701 MetaSegment::dedent().to_matchable(),
702 ])
703 .to_matchable(),
704 );
705
706 databricks.expand();
707 databricks
708}