1use itertools::Itertools;
5use sqruff_lib_core::dialects::Dialect;
6use sqruff_lib_core::dialects::init::DialectConfig;
7use sqruff_lib_core::dialects::init::DialectKind;
8use sqruff_lib_core::dialects::syntax::SyntaxKind;
9use sqruff_lib_core::helpers::{Config, ToMatchable};
10use sqruff_lib_core::parser::grammar::anyof::{AnyNumberOf, one_of, optionally_bracketed};
11use sqruff_lib_core::parser::grammar::delimited::Delimited;
12use sqruff_lib_core::parser::grammar::sequence::{Bracketed, Sequence};
13use sqruff_lib_core::parser::grammar::{Nothing, Ref};
14use sqruff_lib_core::parser::lexer::Matcher;
15use sqruff_lib_core::parser::matchable::MatchableTrait;
16use sqruff_lib_core::parser::node_matcher::NodeMatcher;
17use sqruff_lib_core::parser::parsers::{RegexParser, StringParser, TypedParser};
18use sqruff_lib_core::parser::segments::generator::SegmentGenerator;
19use sqruff_lib_core::parser::segments::meta::MetaSegment;
20use sqruff_lib_core::value::Value;
21
22sqruff_lib_core::dialect_config!(AthenaDialectConfig {});
23
24pub fn dialect(config: Option<&Value>) -> Dialect {
25 let _dialect_config: AthenaDialectConfig = config
27 .map(AthenaDialectConfig::from_value)
28 .unwrap_or_default();
29
30 let ansi_dialect = super::ansi::dialect(None);
31 let mut dialect = super::ansi::raw_dialect();
32 dialect.name = DialectKind::Athena;
33
34 dialect
35 .sets_mut("unreserved_keywords")
36 .extend(super::athena_keywords::ATHENA_UNRESERVED_KEYWORDS);
37 dialect
38 .sets_mut("reserved_keywords")
39 .extend(super::athena_keywords::ATHENA_RESERVED_KEYWORDS);
40
41 dialect.insert_lexer_matchers(
42 vec![Matcher::string("right_arrow", "->", SyntaxKind::RightArrow)],
44 "like_operator",
45 );
46
47 dialect
48 .bracket_sets_mut("angle_bracket_pairs")
49 .extend(vec![(
50 "angle",
51 "StartAngleBracketSegment",
52 "EndAngleBracketSegment",
53 false,
54 )]);
55
56 dialect.add([(
59 "NonWithSelectableGrammar".into(),
60 one_of(vec![
61 Ref::new("SetExpressionSegment").to_matchable(),
62 optionally_bracketed(vec![Ref::new("SelectStatementSegment").to_matchable()])
63 .to_matchable(),
64 Ref::new("NonSetSelectableGrammar").to_matchable(),
65 Ref::new("UpdateStatementSegment").to_matchable(),
66 Ref::new("InsertStatementSegment").to_matchable(),
67 Ref::new("DeleteStatementSegment").to_matchable(),
68 Ref::new("MergeStatementSegment").to_matchable(),
69 ])
70 .to_matchable()
71 .into(),
72 )]);
73
74 dialect.add([
75 (
76 "StartAngleBracketSegment".into(),
77 StringParser::new("<", SyntaxKind::StartAngleBracket)
78 .to_matchable()
79 .into(),
80 ),
81 (
82 "EndAngleBracketSegment".into(),
83 StringParser::new(">", SyntaxKind::EndAngleBracket)
84 .to_matchable()
85 .into(),
86 ),
87 (
88 "RightArrowOperator".into(),
89 StringParser::new("->", SyntaxKind::BinaryOperator)
90 .to_matchable()
91 .into(),
92 ),
93 (
94 "JSONFILE".into(),
95 StringParser::new("JSONFILE", SyntaxKind::FileFormat)
96 .to_matchable()
97 .into(),
98 ),
99 (
100 "RCFILE".into(),
101 StringParser::new("RCFILE", SyntaxKind::FileFormat)
102 .to_matchable()
103 .into(),
104 ),
105 (
106 "ORC".into(),
107 StringParser::new("ORCFILE", SyntaxKind::FileFormat)
108 .to_matchable()
109 .into(),
110 ),
111 (
112 "PARQUET".into(),
113 StringParser::new("PARQUETFILE", SyntaxKind::FileFormat)
114 .to_matchable()
115 .into(),
116 ),
117 (
118 "AVRO".into(),
119 StringParser::new("AVROFILE", SyntaxKind::FileFormat)
120 .to_matchable()
121 .into(),
122 ),
123 (
124 "ION".into(),
125 StringParser::new("IONFILE", SyntaxKind::FileFormat)
126 .to_matchable()
127 .into(),
128 ),
129 (
130 "SEQUENCEFILE".into(),
131 StringParser::new("SEQUENCEFILE", SyntaxKind::FileFormat)
132 .to_matchable()
133 .into(),
134 ),
135 (
136 "TEXTFILE".into(),
137 StringParser::new("TEXTFILE", SyntaxKind::FileFormat)
138 .to_matchable()
139 .into(),
140 ),
141 (
142 "PropertyGrammar".into(),
143 Sequence::new(vec![
144 Ref::new("QuotedLiteralSegment").to_matchable(),
145 Ref::new("EqualsSegment").to_matchable(),
146 Ref::new("QuotedLiteralSegment").to_matchable(),
147 ])
148 .to_matchable()
149 .into(),
150 ),
151 (
152 "LocationGrammar".into(),
153 Sequence::new(vec![
154 Ref::keyword("LOCATION").to_matchable(),
155 Ref::new("QuotedLiteralSegment").to_matchable(),
156 ])
157 .to_matchable()
158 .into(),
159 ),
160 (
161 "BracketedPropertyListGrammar".into(),
162 Bracketed::new(vec![
163 Delimited::new(vec![Ref::new("PropertyGrammar").to_matchable()]).to_matchable(),
164 ])
165 .to_matchable()
166 .into(),
167 ),
168 (
169 "CTASPropertyGrammar".into(),
170 Sequence::new(vec![
171 one_of(vec![
172 Ref::keyword("FORMAT").to_matchable(),
173 Ref::keyword("PARTITIONED_BY").to_matchable(),
174 Ref::keyword("BUCKETED_BY").to_matchable(),
175 Ref::keyword("BUCKET_COUNT").to_matchable(),
176 Ref::keyword("WRITE_COMPRESSION").to_matchable(),
177 Ref::keyword("ORC_COMPRESSION").to_matchable(),
178 Ref::keyword("PARQUET_COMPRESSION").to_matchable(),
179 Ref::keyword("COMPRESSION_LEVEL").to_matchable(),
180 Ref::keyword("FIELD_DELIMITER").to_matchable(),
181 Ref::keyword("IS_EXTERNAL").to_matchable(),
182 Ref::keyword("TABLE_TYPE").to_matchable(),
183 Ref::keyword("EXTERNAL_LOCATION").to_matchable(),
184 ])
185 .to_matchable(),
186 Ref::new("EqualsSegment").to_matchable(),
187 Ref::new("LiteralGrammar").to_matchable(),
188 ])
189 .to_matchable()
190 .into(),
191 ),
192 (
193 "CTASIcebergPropertyGrammar".into(),
194 Sequence::new(vec![
195 one_of(vec![
196 Ref::keyword("FORMAT").to_matchable(),
197 Ref::keyword("PARTITIONED_BY").to_matchable(),
198 Ref::keyword("BUCKETED_BY").to_matchable(),
199 Ref::keyword("BUCKET_COUNT").to_matchable(),
200 Ref::keyword("WRITE_COMPRESSION").to_matchable(),
201 Ref::keyword("ORC_COMPRESSION").to_matchable(),
202 Ref::keyword("PARQUET_COMPRESSION").to_matchable(),
203 Ref::keyword("COMPRESSION_LEVEL").to_matchable(),
204 Ref::keyword("FIELD_DELIMITER").to_matchable(),
205 Ref::keyword("IS_EXTERNAL").to_matchable(),
206 Ref::keyword("TABLE_TYPE").to_matchable(),
207 Ref::keyword("LOCATION").to_matchable(),
209 Ref::keyword("PARTITIONING").to_matchable(),
210 Ref::keyword("VACUUM_MAX_SNAPSHOT_AGE_SECONDS").to_matchable(),
211 Ref::keyword("VACUUM_MIN_SNAPSHOTS_TO_KEEP").to_matchable(),
212 Ref::keyword("OPTIMIZE_REWRITE_MIN_DATA_FILE_SIZE_BYTES").to_matchable(),
213 Ref::keyword("OPTIMIZE_REWRITE_MAX_DATA_FILE_SIZE_BYTES").to_matchable(),
214 Ref::keyword("OPTIMIZE_REWRITE_DATA_FILE_THRESHOLD").to_matchable(),
215 Ref::keyword("OPTIMIZE_REWRITE_DELETE_FILE_THRESHOLD").to_matchable(),
216 ])
217 .to_matchable(),
218 Ref::new("EqualsSegment").to_matchable(),
219 Ref::new("LiteralGrammar").to_matchable(),
220 ])
221 .to_matchable()
222 .into(),
223 ),
224 (
225 "BracketedCTASPropertyGrammar".into(),
226 Bracketed::new(vec![
227 one_of(vec![
228 Delimited::new(vec![Ref::new("CTASPropertyGrammar").to_matchable()])
229 .to_matchable(),
230 Delimited::new(vec![Ref::new("CTASIcebergPropertyGrammar").to_matchable()])
231 .to_matchable(),
232 ])
233 .to_matchable(),
234 ])
235 .to_matchable()
236 .into(),
237 ),
238 (
239 "UnloadPropertyGrammar".into(),
240 Sequence::new(vec![
241 one_of(vec![
242 Ref::keyword("FORMAT").to_matchable(),
243 Ref::keyword("PARTITIONED_BY").to_matchable(),
244 Ref::keyword("COMPRESSION").to_matchable(),
245 Ref::keyword("FIELD_DELIMITER").to_matchable(),
246 ])
247 .to_matchable(),
248 Ref::new("EqualsSegment").to_matchable(),
249 Ref::new("LiteralGrammar").to_matchable(),
250 ])
251 .to_matchable()
252 .into(),
253 ),
254 (
255 "BracketedUnloadPropertyGrammar".into(),
256 Bracketed::new(vec![
257 Delimited::new(vec![Ref::new("UnloadPropertyGrammar").to_matchable()])
258 .to_matchable(),
259 ])
260 .to_matchable()
261 .into(),
262 ),
263 (
264 "TablePropertiesGrammar".into(),
265 Sequence::new(vec![
266 Ref::keyword("TBLPROPERTIES").to_matchable(),
267 Ref::new("BracketedPropertyListGrammar").to_matchable(),
268 ])
269 .to_matchable()
270 .into(),
271 ),
272 (
273 "SerdePropertiesGrammar".into(),
274 Sequence::new(vec![
275 Ref::keyword("WITH").to_matchable(),
276 Ref::keyword("SERDEPROPERTIES").to_matchable(),
277 Ref::new("BracketedPropertyListGrammar").to_matchable(),
278 ])
279 .to_matchable()
280 .into(),
281 ),
282 (
283 "TerminatedByGrammar".into(),
284 Sequence::new(vec![
285 Ref::keyword("TERMINATED").to_matchable(),
286 Ref::keyword("BY").to_matchable(),
287 Ref::new("QuotedLiteralSegment").to_matchable(),
288 ])
289 .to_matchable()
290 .into(),
291 ),
292 (
293 "FileFormatGrammar".into(),
294 one_of(vec![
295 Ref::keyword("SEQUENCEFILE").to_matchable(),
296 Ref::keyword("TEXTFILE").to_matchable(),
297 Ref::keyword("RCFILE").to_matchable(),
298 Ref::keyword("ORC").to_matchable(),
299 Ref::keyword("PARQUET").to_matchable(),
300 Ref::keyword("AVRO").to_matchable(),
301 Ref::keyword("JSONFILE").to_matchable(),
302 Ref::keyword("ION").to_matchable(),
303 Sequence::new(vec![
304 Ref::keyword("INPUTFORMAT").to_matchable(),
305 Ref::new("QuotedLiteralSegment").to_matchable(),
306 Ref::keyword("OUTPUTFORMAT").to_matchable(),
307 Ref::new("QuotedLiteralSegment").to_matchable(),
308 ])
309 .to_matchable(),
310 ])
311 .to_matchable()
312 .into(),
313 ),
314 (
315 "StoredAsGrammar".into(),
316 Sequence::new(vec![
317 Ref::keyword("STORED").to_matchable(),
318 Ref::keyword("AS").to_matchable(),
319 Ref::new("FileFormatGrammar").to_matchable(),
320 ])
321 .to_matchable()
322 .into(),
323 ),
324 (
325 "StoredByGrammar".into(),
326 Sequence::new(vec![
327 Ref::keyword("STORED").to_matchable(),
328 Ref::keyword("BY").to_matchable(),
329 Ref::new("QuotedLiteralSegment").to_matchable(),
330 Ref::new("SerdePropertiesGrammar").optional().to_matchable(),
331 ])
332 .to_matchable()
333 .into(),
334 ),
335 (
336 "StorageFormatGrammar".into(),
337 one_of(vec![
338 Sequence::new(vec![
339 Ref::new("RowFormatClauseSegment").optional().to_matchable(),
340 Ref::new("StoredAsGrammar").optional().to_matchable(),
341 ])
342 .to_matchable(),
343 Ref::new("StoredByGrammar").to_matchable(),
344 ])
345 .to_matchable()
346 .into(),
347 ),
348 (
349 "CommentGrammar".into(),
350 Sequence::new(vec![
351 Ref::keyword("COMMENT").to_matchable(),
352 Ref::new("QuotedLiteralSegment").to_matchable(),
353 ])
354 .to_matchable()
355 .into(),
356 ),
357 (
358 "PartitionSpecGrammar".into(),
359 Sequence::new(vec![
360 Ref::keyword("PARTITION").to_matchable(),
361 Bracketed::new(vec![
362 Delimited::new(vec![
363 Sequence::new(vec![
364 Ref::new("ColumnReferenceSegment").to_matchable(),
365 Sequence::new(vec![
366 Ref::new("EqualsSegment").to_matchable(),
367 Ref::new("LiteralGrammar").to_matchable(),
368 ])
369 .config(|config| {
370 config.optional();
371 })
372 .to_matchable(),
373 ])
374 .to_matchable(),
375 ])
376 .to_matchable(),
377 ])
378 .to_matchable(),
379 ])
380 .to_matchable()
381 .into(),
382 ),
383 (
384 "BackQuotedIdentifierSegment".into(),
385 TypedParser::new(SyntaxKind::BackQuote, SyntaxKind::QuotedIdentifier)
386 .to_matchable()
387 .into(),
388 ),
389 ]);
390
391 dialect.add([
392 (
393 "LiteralGrammar".into(),
394 ansi_dialect
395 .grammar("LiteralGrammar")
396 .copy(
397 Some(vec![Ref::new("ParameterSegment").to_matchable()]),
398 None,
399 None,
400 None,
401 Vec::new(),
402 false,
403 )
404 .into(),
405 ),
406 (
407 "AccessorGrammar".into(),
408 Sequence::new(vec![
409 AnyNumberOf::new(vec![Ref::new("ArrayAccessorSegment").to_matchable()])
410 .config(|config| {
411 config.optional();
412 })
413 .to_matchable(),
414 AnyNumberOf::new(vec![
415 Sequence::new(vec![
416 Ref::new("ObjectReferenceDelimiterGrammar").to_matchable(),
417 Ref::new("ObjectReferenceSegment").to_matchable(),
418 ])
419 .to_matchable(),
420 ])
421 .config(|config| {
422 config.optional();
423 })
424 .to_matchable(),
425 ])
426 .to_matchable()
427 .into(),
428 ),
429 (
430 "QuotedLiteralSegment".into(),
431 one_of(vec![
432 TypedParser::new(SyntaxKind::SingleQuote, SyntaxKind::QuotedLiteral).to_matchable(),
433 TypedParser::new(SyntaxKind::DoubleQuote, SyntaxKind::QuotedLiteral).to_matchable(),
434 TypedParser::new(SyntaxKind::BackQuote, SyntaxKind::QuotedLiteral).to_matchable(),
435 ])
436 .to_matchable()
437 .into(),
438 ),
439 (
440 "TrimParametersGrammar".into(),
441 Nothing::new().to_matchable().into(),
442 ),
443 (
444 "NakedIdentifierSegment".into(),
445 SegmentGenerator::new(|dialect| {
446 let reserved_keywords = dialect.sets("reserved_keywords");
447 let pattern = reserved_keywords.iter().join("|");
448 let anti_template = format!("^({pattern})$");
449
450 RegexParser::new("[A-Z0-9_]*[A-Z_][A-Z0-9_]*", SyntaxKind::NakedIdentifier)
451 .anti_template(&anti_template)
452 .to_matchable()
453 })
454 .into(),
455 ),
456 (
457 "SingleIdentifierGrammar".into(),
458 ansi_dialect
459 .grammar("SingleIdentifierGrammar")
460 .copy(
461 Some(vec![Ref::new("BackQuotedIdentifierSegment").to_matchable()]),
462 None,
463 None,
464 None,
465 Vec::new(),
466 false,
467 )
468 .into(),
469 ),
470 (
471 "BinaryOperatorGrammar".into(),
472 one_of(vec![
473 Ref::new("ArithmeticBinaryOperatorGrammar").to_matchable(),
474 Ref::new("StringBinaryOperatorGrammar").to_matchable(),
475 Ref::new("BooleanBinaryOperatorGrammar").to_matchable(),
476 Ref::new("ComparisonOperatorGrammar").to_matchable(),
477 Ref::new("RightArrowOperator").to_matchable(),
478 ])
479 .to_matchable()
480 .into(),
481 ),
482 (
483 "PostFunctionGrammar".into(),
484 ansi_dialect
485 .grammar("PostFunctionGrammar")
486 .copy(
487 Some(vec![
488 Sequence::new(vec![
489 Ref::keyword("WITH").to_matchable(),
490 Ref::keyword("ORDINALITY").to_matchable(),
491 ])
492 .config(|config| config.optional())
493 .to_matchable(),
494 Ref::new("WithinGroupClauseSegment").to_matchable(),
495 ]),
496 None,
497 None,
498 None,
499 Vec::new(),
500 false,
501 )
502 .into(),
503 ),
504 (
505 "FunctionContentsGrammar".into(),
506 ansi_dialect
507 .grammar("FunctionContentsGrammar")
508 .copy(
509 Some(vec![
510 Ref::new("ListaggOverflowClauseSegment").to_matchable(),
511 ]),
512 None,
513 None,
514 None,
515 Vec::new(),
516 false,
517 )
518 .into(),
519 ),
520 ]);
521
522 dialect.add([
524 (
525 "WithinGroupClauseSegment".into(),
526 Sequence::new(vec![
527 Ref::keyword("WITHIN").to_matchable(),
528 Ref::keyword("GROUP").to_matchable(),
529 Bracketed::new(vec![Ref::new("OrderByClauseSegment").to_matchable()])
530 .to_matchable(),
531 Ref::new("FilterClauseGrammar").optional().to_matchable(),
532 ])
533 .to_matchable()
534 .into(),
535 ),
536 (
537 "ListaggOverflowClauseSegment".into(),
538 Sequence::new(vec![
539 Ref::keyword("ON").to_matchable(),
540 Ref::keyword("OVERFLOW").to_matchable(),
541 one_of(vec![
542 Ref::keyword("ERROR").to_matchable(),
543 Sequence::new(vec![
544 Ref::keyword("TRUNCATE").to_matchable(),
545 Ref::new("QuotedLiteralSegment").optional().to_matchable(),
546 one_of(vec![
547 Ref::keyword("WITH").to_matchable(),
548 Ref::keyword("WITHOUT").to_matchable(),
549 ])
550 .config(|config| {
551 config.optional();
552 })
553 .to_matchable(),
554 Ref::keyword("COUNT").optional().to_matchable(),
555 ])
556 .to_matchable(),
557 ])
558 .to_matchable(),
559 ])
560 .to_matchable()
561 .into(),
562 ),
563 (
564 "ValuesClauseSegment".into(),
565 NodeMatcher::new(SyntaxKind::ValuesClause, |_| {
566 Sequence::new(vec![
567 Ref::keyword("VALUES").to_matchable(),
568 Delimited::new(vec![Ref::new("ExpressionSegment").to_matchable()])
569 .to_matchable(),
570 ])
571 .to_matchable()
572 })
573 .to_matchable()
574 .into(),
575 ),
576 ]);
577
578 dialect.replace_grammar(
579 "ArrayTypeSegment",
580 Sequence::new(vec![
581 Ref::keyword("ARRAY").to_matchable(),
582 Ref::new("ArrayTypeSchemaSegment").optional().to_matchable(),
583 ])
584 .to_matchable(),
585 );
586
587 dialect.replace_grammar(
588 "ArrayTypeSchemaSegment",
589 Bracketed::new(vec![Ref::new("DatatypeSegment").to_matchable()])
590 .config(|config| {
591 config.bracket_pairs_set = "angle_bracket_pairs";
592 config.bracket_type = "angle";
593 })
594 .to_matchable(),
595 );
596
597 dialect.replace_grammar(
598 "StructTypeSegment",
599 Sequence::new(vec![
600 Ref::keyword("STRUCT").to_matchable(),
601 Ref::new("StructTypeSchemaSegment")
602 .optional()
603 .to_matchable(),
604 ])
605 .to_matchable(),
606 );
607
608 dialect.add([
609 (
610 "MapTypeSegment".into(),
611 NodeMatcher::new(SyntaxKind::MapType, |_| {
612 Sequence::new(vec![
613 Ref::keyword("MAP").to_matchable(),
614 Ref::new("MapTypeSchemaSegment").optional().to_matchable(),
615 ])
616 .to_matchable()
617 })
618 .to_matchable()
619 .into(),
620 ),
621 (
622 "MapTypeSchemaSegment".into(),
623 NodeMatcher::new(SyntaxKind::MapTypeSchema, |_| {
624 Bracketed::new(vec![
625 Sequence::new(vec![
626 Ref::new("PrimitiveTypeSegment").to_matchable(),
627 Ref::new("CommaSegment").to_matchable(),
628 Ref::new("DatatypeSegment").to_matchable(),
629 ])
630 .to_matchable(),
631 ])
632 .config(|config| {
633 config.bracket_pairs_set = "angle_bracket_pairs";
634 config.bracket_type = "angle";
635 })
636 .to_matchable()
637 })
638 .to_matchable()
639 .into(),
640 ),
641 ]);
642
643 dialect.replace_grammar(
644 "StatementSegment",
645 super::ansi::statement_segment().copy(
646 Some(vec![
647 Ref::new("MsckRepairTableStatementSegment").to_matchable(),
648 Ref::new("UnloadStatementSegment").to_matchable(),
649 Ref::new("PrepareStatementSegment").to_matchable(),
650 Ref::new("ExecuteStatementSegment").to_matchable(),
651 Ref::new("ShowStatementSegment").to_matchable(),
652 ]),
653 None,
654 None,
655 Some(vec![
656 Ref::new("TransactionStatementSegment").to_matchable(),
657 Ref::new("CreateSchemaStatementSegment").to_matchable(),
658 Ref::new("SetSchemaStatementSegment").to_matchable(),
659 Ref::new("CreateModelStatementSegment").to_matchable(),
660 Ref::new("DropModelStatementSegment").to_matchable(),
661 ]),
662 Vec::new(),
663 false,
664 ),
665 );
666
667 dialect.add([
668 (
669 "StructTypeSchemaSegment".into(),
670 NodeMatcher::new(SyntaxKind::StructTypeSchema, |_| {
671 Bracketed::new(vec![
672 Delimited::new(vec![
673 Sequence::new(vec![
674 Ref::new("NakedIdentifierSegment").to_matchable(),
675 Ref::new("ColonSegment").to_matchable(),
676 Ref::new("DatatypeSegment").to_matchable(),
677 Ref::new("CommentGrammar").optional().to_matchable(),
678 ])
679 .to_matchable(),
680 ])
681 .to_matchable(),
682 ])
683 .config(|config| {
684 config.bracket_pairs_set = "angle_bracket_pairs";
685 config.bracket_type = "angle";
686 })
687 .to_matchable()
688 })
689 .to_matchable()
690 .into(),
691 ),
692 (
693 "PrimitiveTypeSegment".into(),
694 NodeMatcher::new(SyntaxKind::PrimitiveType, |_| {
695 one_of(vec![
696 Ref::keyword("BOOLEAN").to_matchable(),
697 Ref::keyword("TINYINT").to_matchable(),
698 Ref::keyword("SMALLINT").to_matchable(),
699 Ref::keyword("INTEGER").to_matchable(),
700 Ref::keyword("INT").to_matchable(),
701 Ref::keyword("BIGINT").to_matchable(),
702 Ref::keyword("DOUBLE").to_matchable(),
703 Ref::keyword("FLOAT").to_matchable(),
704 Ref::keyword("REAL").to_matchable(),
705 Sequence::new(vec![
706 one_of(vec![
707 Ref::keyword("DECIMAL").to_matchable(),
708 Ref::keyword("CHAR").to_matchable(),
709 Ref::keyword("VARCHAR").to_matchable(),
710 ])
711 .to_matchable(),
712 Ref::new("BracketedArguments").optional().to_matchable(),
713 ])
714 .to_matchable(),
715 Ref::keyword("STRING").to_matchable(),
716 Ref::keyword("BINARY").to_matchable(),
717 Ref::keyword("DATE").to_matchable(),
718 Ref::keyword("TIMESTAMP").to_matchable(),
719 Ref::keyword("VARBINARY").to_matchable(),
720 Ref::keyword("JSON").to_matchable(),
721 Ref::keyword("TIME").to_matchable(),
722 Ref::keyword("IPADDRESS").to_matchable(),
723 Ref::keyword("HYPERLOGLOG").to_matchable(),
724 Ref::keyword("P4HYPERLOGLOG").to_matchable(),
725 ])
726 .to_matchable()
727 })
728 .to_matchable()
729 .into(),
730 ),
731 (
732 "DatatypeSegment".into(),
733 NodeMatcher::new(SyntaxKind::DataType, |_| {
734 one_of(vec![
735 Ref::new("PrimitiveTypeSegment").to_matchable(),
736 Ref::new("StructTypeSegment").to_matchable(),
737 Ref::new("ArrayTypeSegment").to_matchable(),
738 Ref::new("MapTypeSegment").to_matchable(),
739 Sequence::new(vec![
740 Ref::keyword("ROW").to_matchable(),
741 Bracketed::new(vec![
742 Delimited::new(vec![
743 AnyNumberOf::new(vec![
744 Sequence::new(vec![
745 Ref::new("NakedIdentifierSegment").to_matchable(),
746 Ref::new("DatatypeSegment").to_matchable(),
747 ])
748 .to_matchable(),
749 Ref::new("LiteralGrammar").to_matchable(),
750 ])
751 .to_matchable(),
752 ])
753 .to_matchable(),
754 ])
755 .to_matchable(),
756 ])
757 .to_matchable(),
758 Ref::new("TimeWithTZGrammar").to_matchable(),
759 ])
760 .to_matchable()
761 })
762 .to_matchable()
763 .into(),
764 ),
765 ]);
766
767 dialect.replace_grammar(
768 "GroupByClauseSegment",
769 Sequence::new(vec![
770 Ref::keyword("GROUP").to_matchable(),
771 Ref::keyword("BY").to_matchable(),
772 MetaSegment::indent().to_matchable(),
773 Delimited::new(vec![
774 one_of(vec![
775 Ref::new("CubeRollupClauseSegment").to_matchable(),
776 Ref::new("GroupingSetsClauseSegment").to_matchable(),
777 Ref::new("ColumnReferenceSegment").to_matchable(),
778 Ref::new("NumericLiteralSegment").to_matchable(),
779 Ref::new("ExpressionSegment").to_matchable(),
780 ])
781 .to_matchable(),
782 ])
783 .to_matchable(),
784 MetaSegment::dedent().to_matchable(),
785 ])
786 .to_matchable(),
787 );
788
789 dialect.add([
790 (
791 "CreateTableStatementSegment".into(),
792 NodeMatcher::new(SyntaxKind::CreateTableStatement, |_| {
793 Sequence::new(vec![
794 Ref::keyword("CREATE").to_matchable(),
795 Ref::keyword("EXTERNAL").optional().to_matchable(),
796 Ref::keyword("TABLE").to_matchable(),
797 Ref::new("IfNotExistsGrammar").optional().to_matchable(),
798 Ref::new("TableReferenceSegment").to_matchable(),
799 one_of(vec![
800 Sequence::new(vec![
801 Bracketed::new(vec![
802 Delimited::new(vec![
803 one_of(vec![
804 Ref::new("TableConstraintSegment")
805 .optional()
806 .to_matchable(),
807 Sequence::new(vec![
808 Ref::new("ColumnDefinitionSegment").to_matchable(),
809 Ref::new("CommentGrammar").optional().to_matchable(),
810 ])
811 .to_matchable(),
812 ])
813 .to_matchable(),
814 ])
815 .to_matchable(),
816 ])
817 .config(|config| {
818 config.optional();
819 })
820 .to_matchable(),
821 Ref::new("CommentGrammar").optional().to_matchable(),
822 Ref::new("StoredAsGrammar").optional().to_matchable(),
823 Sequence::new(vec![
824 Ref::keyword("PARTITIONED").to_matchable(),
825 Ref::keyword("BY").to_matchable(),
826 Bracketed::new(vec![
827 Delimited::new(vec![
828 Sequence::new(vec![
829 one_of(vec![
830 Ref::new("ColumnDefinitionSegment").to_matchable(),
832 Ref::new("SingleIdentifierGrammar").to_matchable(),
834 Ref::new("FunctionSegment").to_matchable(),
836 ])
837 .to_matchable(),
838 Ref::new("CommentGrammar").optional().to_matchable(),
839 ])
840 .to_matchable(),
841 ])
842 .to_matchable(),
843 ])
844 .to_matchable(),
845 ])
846 .config(|config| {
847 config.optional();
848 })
849 .to_matchable(),
850 Sequence::new(vec![
851 Ref::keyword("CLUSTERED").to_matchable(),
852 Ref::keyword("BY").to_matchable(),
853 Ref::new("BracketedColumnReferenceListGrammar").to_matchable(),
854 Ref::keyword("INTO").to_matchable(),
855 Ref::new("NumericLiteralSegment").to_matchable(),
856 Ref::keyword("BUCKETS").to_matchable(),
857 ])
858 .config(|config| {
859 config.optional();
860 })
861 .to_matchable(),
862 Ref::new("StoredAsGrammar").optional().to_matchable(),
863 Ref::new("StorageFormatGrammar").optional().to_matchable(),
864 Ref::new("LocationGrammar").optional().to_matchable(),
865 Ref::new("TablePropertiesGrammar").optional().to_matchable(),
866 Ref::new("CommentGrammar").optional().to_matchable(),
867 ])
868 .to_matchable(),
869 Sequence::new(vec![
870 Sequence::new(vec![
871 Ref::keyword("WITH").to_matchable(),
872 Ref::new("BracketedCTASPropertyGrammar").to_matchable(),
873 ])
874 .config(|config| {
875 config.optional();
876 })
877 .to_matchable(),
878 Ref::keyword("AS").to_matchable(),
879 optionally_bracketed(vec![
880 Ref::new("SelectableGrammar").to_matchable(),
881 ])
882 .to_matchable(),
883 Sequence::new(vec![
884 Ref::keyword("WITH").to_matchable(),
885 Ref::keyword("NO").to_matchable(),
886 Ref::keyword("DATA").to_matchable(),
887 ])
888 .config(|config| {
889 config.optional();
890 })
891 .to_matchable(),
892 ])
893 .to_matchable(),
894 ])
895 .to_matchable(),
896 ])
897 .to_matchable()
898 })
899 .to_matchable()
900 .into(),
901 ),
902 (
903 "MsckRepairTableStatementSegment".into(),
904 NodeMatcher::new(SyntaxKind::MsckRepairTableStatement, |_| {
905 Sequence::new(vec![
906 Ref::keyword("MSCK").to_matchable(),
907 Ref::keyword("REPAIR").to_matchable(),
908 Ref::keyword("TABLE").to_matchable(),
909 Ref::new("TableReferenceSegment").to_matchable(),
910 ])
911 .to_matchable()
912 })
913 .to_matchable()
914 .into(),
915 ),
916 (
917 "RowFormatClauseSegment".into(),
918 NodeMatcher::new(SyntaxKind::RowFormatClause, |_| {
919 Sequence::new(vec![
920 Ref::keyword("ROW").to_matchable(),
921 Ref::keyword("FORMAT").to_matchable(),
922 one_of(vec![
923 Sequence::new(vec![
924 Ref::keyword("DELIMITED").to_matchable(),
925 Sequence::new(vec![
926 Ref::keyword("FIELDS").to_matchable(),
927 Ref::new("TerminatedByGrammar").to_matchable(),
928 Sequence::new(vec![
929 Ref::keyword("ESCAPED").to_matchable(),
930 Ref::keyword("BY").to_matchable(),
931 Ref::new("QuotedLiteralSegment").to_matchable(),
932 ])
933 .config(|config| {
934 config.optional();
935 })
936 .to_matchable(),
937 ])
938 .config(|config| {
939 config.optional();
940 })
941 .to_matchable(),
942 Sequence::new(vec![
943 Ref::keyword("COLLECTION").to_matchable(),
944 Ref::keyword("ITEMS").to_matchable(),
945 Ref::new("TerminatedByGrammar").to_matchable(),
946 ])
947 .config(|config| {
948 config.optional();
949 })
950 .to_matchable(),
951 Sequence::new(vec![
952 Ref::keyword("MAP").to_matchable(),
953 Ref::keyword("KEYS").to_matchable(),
954 Ref::new("TerminatedByGrammar").to_matchable(),
955 ])
956 .config(|config| {
957 config.optional();
958 })
959 .to_matchable(),
960 Sequence::new(vec![
961 Ref::keyword("LINES").to_matchable(),
962 Ref::new("TerminatedByGrammar").to_matchable(),
963 ])
964 .config(|config| {
965 config.optional();
966 })
967 .to_matchable(),
968 Sequence::new(vec![
969 Ref::keyword("NULL").to_matchable(),
970 Ref::keyword("DEFINED").to_matchable(),
971 Ref::keyword("AS").to_matchable(),
972 Ref::new("QuotedLiteralSegment").to_matchable(),
973 ])
974 .config(|config| {
975 config.optional();
976 })
977 .to_matchable(),
978 ])
979 .to_matchable(),
980 Sequence::new(vec![
981 Ref::keyword("SERDE").to_matchable(),
982 Ref::new("QuotedLiteralSegment").to_matchable(),
983 Ref::new("SerdePropertiesGrammar").optional().to_matchable(),
984 ])
985 .to_matchable(),
986 ])
987 .to_matchable(),
988 ])
989 .to_matchable()
990 })
991 .to_matchable()
992 .into(),
993 ),
994 (
995 "InsertStatementSegment".into(),
996 NodeMatcher::new(SyntaxKind::InsertStatement, |_| {
997 Sequence::new(vec![
998 Ref::keyword("INSERT").to_matchable(),
999 Ref::keyword("INTO").to_matchable(),
1000 Ref::new("TableReferenceSegment").to_matchable(),
1001 one_of(vec![
1002 optionally_bracketed(vec![Ref::new("SelectableGrammar").to_matchable()])
1003 .to_matchable(),
1004 Sequence::new(vec![
1005 Ref::keyword("DEFAULT").to_matchable(),
1006 Ref::keyword("VALUES").to_matchable(),
1007 ])
1008 .to_matchable(),
1009 Sequence::new(vec![
1010 Ref::new("BracketedColumnReferenceListGrammar")
1011 .optional()
1012 .to_matchable(),
1013 one_of(vec![
1014 Ref::new("ValuesClauseSegment").to_matchable(),
1015 optionally_bracketed(vec![
1016 Ref::new("SelectableGrammar").to_matchable(),
1017 ])
1018 .to_matchable(),
1019 ])
1020 .to_matchable(),
1021 ])
1022 .to_matchable(),
1023 ])
1024 .to_matchable(),
1025 ])
1026 .to_matchable()
1027 })
1028 .to_matchable()
1029 .into(),
1030 ),
1031 (
1032 "UnloadStatementSegment".into(),
1033 NodeMatcher::new(SyntaxKind::UnloadStatement, |_| {
1034 Sequence::new(vec![
1035 Ref::keyword("UNLOAD").to_matchable(),
1036 Bracketed::new(vec![Ref::new("SelectableGrammar").to_matchable()])
1037 .to_matchable(),
1038 Ref::keyword("TO").to_matchable(),
1039 Ref::new("QuotedLiteralSegment").to_matchable(),
1040 Sequence::new(vec![
1041 Ref::keyword("WITH").to_matchable(),
1042 Ref::new("BracketedUnloadPropertyGrammar").to_matchable(),
1043 ])
1044 .config(|config| {
1045 config.optional();
1046 })
1047 .to_matchable(),
1048 ])
1049 .to_matchable()
1050 })
1051 .to_matchable()
1052 .into(),
1053 ),
1054 (
1055 "PrepareStatementSegment".into(),
1056 NodeMatcher::new(SyntaxKind::PrepareStatement, |_| {
1057 Sequence::new(vec![
1058 Ref::keyword("PREPARE").to_matchable(),
1059 Ref::new("TableReferenceSegment").to_matchable(),
1060 Ref::keyword("FROM").to_matchable(),
1061 optionally_bracketed(vec![
1062 one_of(vec![
1063 Ref::new("SelectableGrammar").to_matchable(),
1064 Ref::new("UnloadStatementSegment").to_matchable(),
1065 Ref::new("InsertStatementSegment").to_matchable(),
1066 ])
1067 .to_matchable(),
1068 ])
1069 .to_matchable(),
1070 ])
1071 .to_matchable()
1072 })
1073 .to_matchable()
1074 .into(),
1075 ),
1076 (
1077 "ExecuteStatementSegment".into(),
1078 NodeMatcher::new(SyntaxKind::ExecuteStatement, |_| {
1079 Sequence::new(vec![
1080 Ref::keyword("EXECUTE").to_matchable(),
1081 Ref::new("TableReferenceSegment").to_matchable(),
1082 one_of(vec![
1083 Sequence::new(vec![
1084 Ref::keyword("USING").to_matchable(),
1085 Delimited::new(vec![Ref::new("LiteralGrammar").to_matchable()])
1086 .to_matchable(),
1087 ])
1088 .to_matchable(),
1089 ])
1090 .config(|config| {
1091 config.optional();
1092 })
1093 .to_matchable(),
1094 ])
1095 .to_matchable()
1096 })
1097 .to_matchable()
1098 .into(),
1099 ),
1100 (
1101 "IntervalExpressionSegment".into(),
1102 NodeMatcher::new(SyntaxKind::IntervalExpression, |_| {
1103 Sequence::new(vec![
1104 Ref::keyword("INTERVAL").optional().to_matchable(),
1105 one_of(vec![
1106 Sequence::new(vec![
1107 one_of(vec![
1108 Ref::new("QuotedLiteralSegment").to_matchable(),
1109 Ref::new("NumericLiteralSegment").to_matchable(),
1110 Bracketed::new(vec![Ref::new("ExpressionSegment").to_matchable()])
1111 .to_matchable(),
1112 ])
1113 .to_matchable(),
1114 Ref::new("DatetimeUnitSegment").to_matchable(),
1115 Sequence::new(vec![
1116 Ref::keyword("TO").to_matchable(),
1117 Ref::new("DatetimeUnitSegment").to_matchable(),
1118 ])
1119 .config(|config| {
1120 config.optional();
1121 })
1122 .to_matchable(),
1123 ])
1124 .to_matchable(),
1125 ])
1126 .to_matchable(),
1127 ])
1128 .to_matchable()
1129 })
1130 .to_matchable()
1131 .into(),
1132 ),
1133 ]);
1134
1135 dialect.add([
1136 (
1137 "AlterTableDropColumnGrammar".into(),
1138 Sequence::new(vec![
1139 Ref::keyword("DROP").to_matchable(),
1140 Ref::keyword("COLUMN").to_matchable(),
1141 Ref::new("SingleIdentifierGrammar").to_matchable(),
1142 ])
1143 .to_matchable()
1144 .into(),
1145 ),
1146 (
1147 "ShowStatementSegment".into(),
1148 NodeMatcher::new(SyntaxKind::ShowStatement, |_| {
1149 Sequence::new(vec![
1150 Ref::keyword("SHOW").to_matchable(),
1151 one_of(vec![
1152 Sequence::new(vec![
1153 Ref::keyword("COLUMNS").to_matchable(),
1154 one_of(vec![
1155 Ref::keyword("FROM").to_matchable(),
1156 Ref::keyword("IN").to_matchable(),
1157 ])
1158 .to_matchable(),
1159 one_of(vec![
1160 Sequence::new(vec![
1161 Ref::new("DatabaseReferenceSegment").to_matchable(),
1162 Ref::new("TableReferenceSegment").to_matchable(),
1163 ])
1164 .to_matchable(),
1165 Sequence::new(vec![
1166 Ref::new("TableReferenceSegment").to_matchable(),
1167 Sequence::new(vec![
1168 one_of(vec![
1169 Ref::keyword("FROM").to_matchable(),
1170 Ref::keyword("IN").to_matchable(),
1171 ])
1172 .to_matchable(),
1173 Ref::new("DatabaseReferenceSegment").to_matchable(),
1174 ])
1175 .config(|config| {
1176 config.optional();
1177 })
1178 .to_matchable(),
1179 ])
1180 .to_matchable(),
1181 ])
1182 .to_matchable(),
1183 ])
1184 .to_matchable(),
1185 Sequence::new(vec![
1186 Ref::keyword("CREATE").to_matchable(),
1187 one_of(vec![
1188 Ref::keyword("TABLE").to_matchable(),
1189 Ref::keyword("VIEW").to_matchable(),
1190 ])
1191 .to_matchable(),
1192 Ref::new("TableReferenceSegment").to_matchable(),
1193 ])
1194 .to_matchable(),
1195 Sequence::new(vec![
1196 one_of(vec![
1197 Ref::keyword("DATABASES").to_matchable(),
1198 Ref::keyword("SCHEMAS").to_matchable(),
1199 ])
1200 .to_matchable(),
1201 Sequence::new(vec![
1202 Ref::keyword("LIKE").to_matchable(),
1203 Ref::new("QuotedLiteralSegment").to_matchable(),
1204 ])
1205 .config(|config| {
1206 config.optional();
1207 })
1208 .to_matchable(),
1209 ])
1210 .to_matchable(),
1211 Sequence::new(vec![
1212 Ref::keyword("PARTITIONS").to_matchable(),
1213 Ref::new("TableReferenceSegment").to_matchable(),
1214 ])
1215 .to_matchable(),
1216 Sequence::new(vec![
1217 Ref::keyword("TABLES").to_matchable(),
1218 Sequence::new(vec![
1219 Ref::keyword("IN").to_matchable(),
1220 Ref::new("DatabaseReferenceSegment").to_matchable(),
1221 ])
1222 .config(|config| {
1223 config.optional();
1224 })
1225 .to_matchable(),
1226 Ref::new("QuotedLiteralSegment").optional().to_matchable(),
1227 ])
1228 .to_matchable(),
1229 Sequence::new(vec![
1230 Ref::keyword("TBLPROPERTIES").to_matchable(),
1231 Ref::new("TableReferenceSegment").to_matchable(),
1232 Bracketed::new(vec![Ref::new("QuotedLiteralSegment").to_matchable()])
1233 .config(|config| {
1234 config.optional();
1235 })
1236 .to_matchable(),
1237 ])
1238 .to_matchable(),
1239 Sequence::new(vec![
1240 Ref::keyword("VIEWS").to_matchable(),
1241 Sequence::new(vec![
1242 Ref::keyword("IN").to_matchable(),
1243 Ref::new("DatabaseReferenceSegment").to_matchable(),
1244 ])
1245 .config(|config| {
1246 config.optional();
1247 })
1248 .to_matchable(),
1249 Sequence::new(vec![
1250 Ref::keyword("LIKE").to_matchable(),
1251 Ref::new("QuotedLiteralSegment").to_matchable(),
1252 ])
1253 .config(|config| {
1254 config.optional();
1255 })
1256 .to_matchable(),
1257 ])
1258 .to_matchable(),
1259 ])
1260 .to_matchable(),
1261 ])
1262 .to_matchable()
1263 })
1264 .to_matchable()
1265 .into(),
1266 ),
1267 ]);
1268
1269 dialect.config(|this| this.expand())
1270}