1use crate::databricks_keywords::{RESERVED_KEYWORDS, UNRESERVED_KEYWORDS};
2use crate::sparksql;
3use sqruff_lib_core::helpers::Config;
4use sqruff_lib_core::parser::grammar::anyof::one_of;
5use sqruff_lib_core::parser::grammar::delimited::Delimited;
6use sqruff_lib_core::parser::grammar::sequence::Bracketed;
7use sqruff_lib_core::parser::matchable::MatchableTrait;
8use sqruff_lib_core::parser::segments::meta::MetaSegment;
9use sqruff_lib_core::{
10 dialects::{Dialect, init::DialectKind},
11 helpers::ToMatchable,
12 parser::grammar::{Ref, sequence::Sequence},
13 vec_of_erased,
14};
15
16pub fn dialect() -> Dialect {
17 let raw_sparksql = sparksql::raw_dialect();
18
19 let mut databricks = sparksql::raw_dialect();
20 databricks.name = DialectKind::Databricks;
21
22 databricks
23 .sets_mut("unreserved_keywords")
24 .extend(UNRESERVED_KEYWORDS);
25 databricks
26 .sets_mut("unreserved_keywords")
27 .extend(raw_sparksql.sets("reserved_keywords"));
28 databricks
29 .sets_mut("unreserved_keywords")
30 .retain(|x| !RESERVED_KEYWORDS.contains(x));
31 databricks.sets_mut("reserved_keywords").clear();
32 databricks
33 .sets_mut("reserved_keywords")
34 .extend(RESERVED_KEYWORDS);
35
36 databricks
37 .sets_mut("date_part_function_name")
38 .extend(["TIMEDIFF"]);
39
40 databricks.add([
41 (
42 "PrincipalIdentifierSegment".into(),
43 one_of(vec_of_erased![
44 Ref::new("NakedIdentifierSegment"),
45 Ref::new("BackQuotedIdentifierSegment"),
46 ])
47 .to_matchable()
48 .into(),
49 ),
50 (
51 "SetOwnerGrammar".into(),
52 Sequence::new(vec_of_erased![
53 Ref::keyword("SET").optional(),
54 Ref::keyword("OWNER"),
55 Ref::keyword("TO"),
56 Ref::new("PrincipalIdentifierSegment"),
57 ])
58 .to_matchable()
59 .into(),
60 ),
61 (
64 "CatalogReferenceSegment".into(),
65 Ref::new("ObjectReferenceSegment").to_matchable().into(),
66 ),
67 (
70 "AlterCatalogStatementSegment".into(),
71 Sequence::new(vec_of_erased![
72 Ref::keyword("ALTER"),
73 Ref::keyword("CATALOG"),
74 Ref::new("CatalogReferenceSegment"),
75 Ref::new("SetOwnerGrammar"),
76 ])
77 .to_matchable()
78 .into(),
79 ),
80 (
83 "CreateCatalogStatementSegment".into(),
84 Sequence::new(vec_of_erased![
85 Ref::keyword("CREATE"),
86 Ref::keyword("CATALOG"),
87 Ref::new("IfNotExistsGrammar").optional(),
88 Ref::new("CatalogReferenceSegment"),
89 Ref::new("CommentGrammar").optional(),
90 ])
91 .to_matchable()
92 .into(),
93 ),
94 (
97 "DropCatalogStatementSegment".into(),
98 Sequence::new(vec_of_erased![
99 Ref::keyword("DROP"),
100 Ref::keyword("CATALOG"),
101 Ref::new("IfExistsGrammar").optional(),
102 Ref::new("CatalogReferenceSegment"),
103 Ref::new("DropBehaviorGrammar").optional(),
104 ])
105 .to_matchable()
106 .into(),
107 ),
108 (
111 "UseCatalogStatementSegment".into(),
112 Sequence::new(vec_of_erased![
113 Ref::keyword("USE"),
114 Ref::keyword("CATALOG"),
115 Ref::new("CatalogReferenceSegment"),
116 ])
117 .to_matchable()
118 .into(),
119 ),
120 (
123 "UseDatabaseStatementSegment".into(),
124 Sequence::new(vec_of_erased![
125 Ref::keyword("USE"),
126 one_of(vec_of_erased![
127 Ref::keyword("DATABASE"),
128 Ref::keyword("SCHEMA"),
129 ])
130 .config(|config| {
131 config.optional();
132 }),
133 Ref::new("DatabaseReferenceSegment"),
134 ])
135 .to_matchable()
136 .into(),
137 ),
138 (
141 "SetTimeZoneStatementSegment".into(),
142 Sequence::new(vec_of_erased![
143 Ref::keyword("SET"),
144 Ref::keyword("TIME"),
145 Ref::keyword("ZONE"),
146 one_of(vec_of_erased![
147 Ref::keyword("LOCAL"),
148 Ref::new("QuotedLiteralSegment"),
149 Ref::new("IntervalExpressionSegment")
150 ]),
151 ])
152 .to_matchable()
153 .into(),
154 ),
155 (
158 "OptimizeTableStatementSegment".into(),
159 Sequence::new(vec_of_erased![
160 Ref::keyword("OPTIMIZE"),
161 Ref::new("TableReferenceSegment"),
162 Sequence::new(vec_of_erased![
163 Ref::keyword("WHERE"),
164 Ref::new("ExpressionSegment"),
165 ])
166 .config(|config| {
167 config.optional();
168 }),
169 Sequence::new(vec_of_erased![
170 Ref::keyword("ZORDER"),
171 Ref::keyword("BY"),
172 Bracketed::new(vec_of_erased![Delimited::new(vec_of_erased![Ref::new(
173 "ColumnReferenceSegment"
174 )])]),
175 ])
176 .config(|config| {
177 config.optional();
178 }),
179 ])
180 .to_matchable()
181 .into(),
182 ),
183 (
184 "DatabaseReferenceSegment".into(),
186 Ref::new("ObjectReferenceSegment").to_matchable().into(),
187 ),
188 (
189 "TableReferenceSegment".into(),
191 Ref::new("ObjectReferenceSegment").to_matchable().into(),
192 ),
193 (
194 "SchemaReferenceSegment".into(),
196 Ref::new("ObjectReferenceSegment").to_matchable().into(),
197 ),
198 (
199 "IdentifierClauseSegment".into(),
200 Sequence::new(vec_of_erased![
201 Ref::keyword("IDENTIFIER"),
202 Bracketed::new(vec_of_erased![Ref::new("SingleIdentifierGrammar")]),
203 ])
204 .to_matchable()
205 .into(),
206 ),
207 (
208 "DropVolumeStatementSegment".into(),
211 Sequence::new(vec_of_erased![
212 Ref::keyword("DROP"),
213 Ref::keyword("VOLUME"),
214 Ref::new("IfExistsGrammar").optional(),
215 Ref::new("VolumeReferenceSegment"),
216 ])
217 .to_matchable()
218 .into(),
219 ),
220 (
221 "VolumeReferenceSegment".into(),
222 Ref::new("ObjectReferenceSegment").to_matchable().into(),
223 ),
224 (
225 "DescribeObjectGrammar".into(),
227 sparksql::dialect()
228 .grammar("DescribeObjectGrammar")
229 .copy(
230 Some(vec_of_erased![Sequence::new(vec_of_erased![
231 Ref::keyword("VOLUME"),
232 Ref::new("VolumeReferenceSegment"),
233 ])]),
234 Some(0),
235 None,
236 None,
237 Vec::new(),
238 false,
239 )
240 .into(),
241 ),
242 (
243 "DeclareOrReplaceVariableStatementSegment".into(),
246 Sequence::new(vec_of_erased![
247 Ref::keyword("DECLARE"),
248 Ref::new("OrReplaceGrammar").optional(),
249 Ref::keyword("VARIABLE").optional(),
250 Ref::new("SingleIdentifierGrammar"),
251 Ref::new("DatatypeSegment").optional(),
252 Sequence::new(vec_of_erased![
253 one_of(vec_of_erased![
254 Ref::keyword("DEFAULT"),
255 Ref::new("EqualsSegment")
256 ]),
257 Ref::new("ExpressionSegment"),
258 ])
259 .config(|config| {
260 config.optional();
261 }),
262 ])
263 .to_matchable()
264 .into(),
265 ),
266 (
269 "CommentOnStatementSegment".into(),
270 Sequence::new(vec_of_erased![
271 Ref::keyword("COMMENT"),
272 Ref::keyword("ON"),
273 one_of(vec_of_erased![
274 Sequence::new(vec_of_erased![
275 Ref::keyword("CATALOG"),
276 Ref::new("CatalogReferenceSegment"),
277 ]),
278 Sequence::new(vec_of_erased![
279 one_of(vec_of_erased![
280 Ref::keyword("DATABASE"),
281 Ref::keyword("SCHEMA")
282 ]),
283 Ref::new("DatabaseReferenceSegment"),
284 ]),
285 Sequence::new(vec_of_erased![
286 Ref::keyword("TABLE"),
287 Ref::new("TableReferenceSegment"),
288 ]),
289 Sequence::new(vec_of_erased![
290 Ref::keyword("VOLUME"),
291 Ref::new("VolumeReferenceSegment"),
292 ]),
293 Sequence::new(vec_of_erased![
295 one_of(vec_of_erased![
296 Ref::keyword("CONNECTION"),
297 Ref::keyword("PROVIDER"),
298 Ref::keyword("RECIPIENT"),
299 Ref::keyword("SHARE"),
300 ]),
301 Ref::new("ObjectReferenceSegment"),
302 ]),
303 ]),
304 Ref::keyword("IS"),
305 one_of(vec_of_erased![
306 Ref::new("QuotedLiteralSegment"),
307 Ref::keyword("NULL"),
308 ]),
309 ])
310 .to_matchable()
311 .into(),
312 ),
313 (
318 "ShowDatabasesSchemasGrammar".into(),
319 Sequence::new(vec_of_erased![
320 one_of(vec_of_erased![
321 Ref::keyword("DATABASES"),
322 Ref::keyword("SCHEMAS"),
323 ]),
324 Sequence::new(vec_of_erased![
325 one_of(vec_of_erased![Ref::keyword("FROM"), Ref::keyword("IN"),]),
326 Ref::new("DatabaseReferenceSegment"),
327 ])
328 .config(|config| {
329 config.optional();
330 }),
331 Sequence::new(vec_of_erased![
332 Ref::keyword("LIKE").optional(),
333 Ref::new("QuotedLiteralSegment"),
334 ])
335 .config(|config| {
336 config.optional();
337 }),
338 ])
339 .to_matchable()
340 .into(),
341 ),
342 (
347 "ShowDatabasesSchemasGrammar".into(),
348 Sequence::new(vec_of_erased![
349 one_of(vec_of_erased![
350 Ref::keyword("DATABASES"),
351 Ref::keyword("SCHEMAS"),
352 ]),
353 Sequence::new(vec_of_erased![
354 one_of(vec_of_erased![Ref::keyword("FROM"), Ref::keyword("IN"),]),
355 Ref::new("DatabaseReferenceSegment"),
356 ])
357 .config(|config| {
358 config.optional();
359 }),
360 Sequence::new(vec_of_erased![
361 Ref::keyword("LIKE").optional(),
362 Ref::new("QuotedLiteralSegment"),
363 ])
364 .config(|config| {
365 config.optional();
366 }),
367 ])
368 .to_matchable()
369 .into(),
370 ),
371 (
380 "ShowFunctionsGrammar".into(),
381 Sequence::new(vec_of_erased![
382 one_of(vec_of_erased![
383 Ref::keyword("USER"),
384 Ref::keyword("SYSTEM"),
385 Ref::keyword("ALL"),
386 ])
387 .config(|config| {
388 config.optional();
389 }),
390 Ref::keyword("FUNCTIONS"),
391 Sequence::new(vec_of_erased![
392 Sequence::new(vec_of_erased![
393 one_of(vec_of_erased![Ref::keyword("FROM"), Ref::keyword("IN")]),
394 Ref::new("DatabaseReferenceSegment"),
395 ])
396 .config(|config| {
397 config.optional();
398 }),
399 Sequence::new(vec_of_erased![
400 Ref::keyword("LIKE").optional(),
401 one_of(vec_of_erased![
402 Sequence::new(vec_of_erased![
404 Ref::new("DatabaseReferenceSegment"),
405 Ref::new("DotSegment"),
406 Ref::new("FunctionNameSegment"),
407 ])
408 .config(|config| {
409 config.disallow_gaps();
410 }),
411 Ref::new("FunctionNameSegment"),
413 Ref::new("QuotedLiteralSegment"),
415 ]),
416 ])
417 .config(|config| {
418 config.optional();
419 }),
420 ])
421 .config(|config| {
422 config.optional();
423 }),
424 ])
425 .to_matchable()
426 .into(),
427 ),
428 (
432 "ShowTablesGrammar".into(),
433 Sequence::new(vec_of_erased![
434 Ref::keyword("TABLES"),
435 Sequence::new(vec_of_erased![
436 one_of(vec_of_erased![Ref::keyword("FROM"), Ref::keyword("IN")]),
437 Ref::new("DatabaseReferenceSegment"),
438 ])
439 .config(|config| {
440 config.optional();
441 }),
442 Sequence::new(vec_of_erased![
443 Ref::keyword("LIKE").optional(),
444 Ref::new("QuotedLiteralSegment"),
445 ])
446 .config(|config| {
447 config.optional();
448 }),
449 ])
450 .to_matchable()
451 .into(),
452 ),
453 (
457 "ShowViewsGrammar".into(),
458 Sequence::new(vec_of_erased![
459 Ref::keyword("VIEWS"),
460 Sequence::new(vec_of_erased![
461 one_of(vec_of_erased![Ref::keyword("FROM"), Ref::keyword("IN")]),
462 Ref::new("DatabaseReferenceSegment"),
463 ])
464 .config(|config| {
465 config.optional();
466 }),
467 Sequence::new(vec_of_erased![
468 Ref::keyword("LIKE").optional(),
469 Ref::new("QuotedLiteralSegment"),
470 ])
471 .config(|config| {
472 config.optional();
473 }),
474 ])
475 .to_matchable()
476 .into(),
477 ),
478 (
480 "ShowObjectGrammar".into(),
481 sparksql::raw_dialect()
482 .grammar("ShowObjectGrammar")
483 .copy(
484 Some(vec_of_erased![Sequence::new(vec_of_erased![
485 Ref::keyword("VOLUMES"),
486 Sequence::new(vec_of_erased![
487 one_of(vec_of_erased![Ref::keyword("FROM"), Ref::keyword("IN")]),
488 Ref::new("DatabaseReferenceSegment"),
489 ])
490 .config(|config| {
491 config.optional();
492 }),
493 Sequence::new(vec_of_erased![
494 Ref::keyword("LIKE").optional(),
495 Ref::new("QuotedLiteralSegment"),
496 ])
497 .config(|config| {
498 config.optional();
499 }),
500 ])]),
501 None,
502 None,
503 None,
504 Vec::new(),
505 false,
506 )
507 .into(),
508 ),
509 (
511 "InsertBracketedColumnReferenceListGrammar".into(),
512 one_of(vec_of_erased![
513 Ref::new("BracketedColumnReferenceListGrammar"),
514 Sequence::new(vec_of_erased![Ref::keyword("BY"), Ref::keyword("NAME"),]),
515 ])
516 .to_matchable()
517 .into(),
518 ),
519 ]);
520
521 databricks.replace_grammar(
523 "ObjectReferenceSegment",
524 Delimited::new(vec_of_erased![
525 one_of(vec_of_erased![
526 Ref::new("SingleIdentifierGrammar"),
527 Ref::new("IdentifierClauseSegment"),
528 ]),
529 Ref::new("ObjectReferenceDelimiterGrammar"),
530 ])
531 .config(|config| {
532 config.delimiter(Ref::new("ObjectReferenceDelimiterGrammar"));
533 config.terminators = vec_of_erased![Ref::new("ObjectReferenceTerminatorGrammar")];
534 config.disallow_gaps();
535 })
536 .to_matchable(),
537 );
538
539 databricks.replace_grammar(
542 "TableExpressionSegment",
543 sparksql::dialect()
544 .grammar("TableExpressionSegment")
545 .match_grammar(&databricks)
546 .unwrap()
547 .copy(
548 Some(vec_of_erased![Ref::new("IdentifierClauseSegment")]),
549 None,
550 Some(Ref::new("ValuesClauseSegment").to_matchable()),
551 None,
552 Vec::new(),
553 false,
554 ),
555 );
556
557 databricks.replace_grammar(
559 "StatementSegment",
560 raw_sparksql
561 .grammar("StatementSegment")
562 .match_grammar(&databricks)
563 .unwrap()
564 .copy(
565 Some(vec_of_erased![
566 Ref::new("AlterCatalogStatementSegment"),
567 Ref::new("CreateCatalogStatementSegment"),
568 Ref::new("DropCatalogStatementSegment"),
569 Ref::new("UseCatalogStatementSegment"),
570 Ref::new("DropVolumeStatementSegment"),
571 Ref::new("SetTimeZoneStatementSegment"),
572 Ref::new("OptimizeTableStatementSegment"),
573 Ref::new("CommentOnStatementSegment"),
574 Ref::new("DeclareOrReplaceVariableStatementSegment"),
575 ]),
576 None,
577 None,
578 None,
579 Vec::new(),
580 false,
581 ),
582 );
583
584 databricks.replace_grammar(
587 "GroupByClauseSegment",
588 Sequence::new(vec_of_erased![
589 Ref::keyword("GROUP"),
590 Ref::keyword("BY"),
591 MetaSegment::indent(),
592 one_of(vec_of_erased![
593 Ref::keyword("ALL"),
594 Delimited::new(vec_of_erased![
595 Ref::new("CubeRollupClauseSegment"),
596 Ref::new("GroupingSetsClauseSegment"),
597 Ref::new("ColumnReferenceSegment"),
598 Ref::new("NumericLiteralSegment").optional(),
600 Ref::new("ExpressionSegment").optional(),
602 ]),
603 Sequence::new(vec_of_erased![
604 Delimited::new(vec_of_erased![
605 Ref::new("ColumnReferenceSegment"),
606 Ref::new("NumericLiteralSegment").optional(),
608 Ref::new("ExpressionSegment").optional(),
610 ]),
611 one_of(vec_of_erased![
612 Ref::new("WithCubeRollupClauseSegment"),
613 Ref::new("GroupingSetsClauseSegment"),
614 ]),
615 ]),
616 ]),
617 MetaSegment::dedent(),
618 ])
619 .to_matchable(),
620 );
621
622 databricks.expand();
623 databricks
624}