Skip to main content

sqruff_lib_dialects/
athena.rs

1// The AWS Athena dialect.
2// https://docs.aws.amazon.com/athena/latest/ug/what-is.html
3
4use itertools::Itertools;
5use sqruff_lib_core::dialects::Dialect;
6use sqruff_lib_core::dialects::init::DialectConfig;
7use sqruff_lib_core::dialects::init::DialectKind;
8use sqruff_lib_core::dialects::syntax::SyntaxKind;
9use sqruff_lib_core::helpers::{Config, ToMatchable};
10use sqruff_lib_core::parser::grammar::anyof::{AnyNumberOf, one_of, optionally_bracketed};
11use sqruff_lib_core::parser::grammar::delimited::Delimited;
12use sqruff_lib_core::parser::grammar::sequence::{Bracketed, Sequence};
13use sqruff_lib_core::parser::grammar::{Nothing, Ref};
14use sqruff_lib_core::parser::lexer::Matcher;
15use sqruff_lib_core::parser::matchable::MatchableTrait;
16use sqruff_lib_core::parser::node_matcher::NodeMatcher;
17use sqruff_lib_core::parser::parsers::{RegexParser, StringParser, TypedParser};
18use sqruff_lib_core::parser::segments::generator::SegmentGenerator;
19use sqruff_lib_core::parser::segments::meta::MetaSegment;
20use sqruff_lib_core::value::Value;
21
22sqruff_lib_core::dialect_config!(AthenaDialectConfig {});
23
24pub fn dialect(config: Option<&Value>) -> Dialect {
25    // Parse and validate dialect configuration, falling back to defaults on failure
26    let _dialect_config: AthenaDialectConfig = config
27        .map(AthenaDialectConfig::from_value)
28        .unwrap_or_default();
29
30    let ansi_dialect = super::ansi::dialect(None);
31    let mut dialect = super::ansi::raw_dialect();
32    dialect.name = DialectKind::Athena;
33
34    dialect
35        .sets_mut("unreserved_keywords")
36        .extend(super::athena_keywords::ATHENA_UNRESERVED_KEYWORDS);
37    dialect
38        .sets_mut("reserved_keywords")
39        .extend(super::athena_keywords::ATHENA_RESERVED_KEYWORDS);
40
41    dialect.insert_lexer_matchers(
42        // Array Operations: https://prestodb.io/docs/0.217/functions/array.html
43        vec![Matcher::string("right_arrow", "->", SyntaxKind::RightArrow)],
44        "like_operator",
45    );
46
47    dialect
48        .bracket_sets_mut("angle_bracket_pairs")
49        .extend(vec![(
50            "angle",
51            "StartAngleBracketSegment",
52            "EndAngleBracketSegment",
53            false,
54        )]);
55
56    // Athena supports CTEs with DML statements (INSERT, UPDATE, DELETE, MERGE)
57    // We add these to NonWithSelectableGrammar so WithCompoundStatementSegment can use them
58    dialect.add([(
59        "NonWithSelectableGrammar".into(),
60        one_of(vec![
61            Ref::new("SetExpressionSegment").to_matchable(),
62            optionally_bracketed(vec![Ref::new("SelectStatementSegment").to_matchable()])
63                .to_matchable(),
64            Ref::new("NonSetSelectableGrammar").to_matchable(),
65            Ref::new("UpdateStatementSegment").to_matchable(),
66            Ref::new("InsertStatementSegment").to_matchable(),
67            Ref::new("DeleteStatementSegment").to_matchable(),
68            Ref::new("MergeStatementSegment").to_matchable(),
69        ])
70        .to_matchable()
71        .into(),
72    )]);
73
74    dialect.add([
75        (
76            "StartAngleBracketSegment".into(),
77            StringParser::new("<", SyntaxKind::StartAngleBracket)
78                .to_matchable()
79                .into(),
80        ),
81        (
82            "EndAngleBracketSegment".into(),
83            StringParser::new(">", SyntaxKind::EndAngleBracket)
84                .to_matchable()
85                .into(),
86        ),
87        (
88            "RightArrowOperator".into(),
89            StringParser::new("->", SyntaxKind::BinaryOperator)
90                .to_matchable()
91                .into(),
92        ),
93        (
94            "JSONFILE".into(),
95            StringParser::new("JSONFILE", SyntaxKind::FileFormat)
96                .to_matchable()
97                .into(),
98        ),
99        (
100            "RCFILE".into(),
101            StringParser::new("RCFILE", SyntaxKind::FileFormat)
102                .to_matchable()
103                .into(),
104        ),
105        (
106            "ORC".into(),
107            StringParser::new("ORCFILE", SyntaxKind::FileFormat)
108                .to_matchable()
109                .into(),
110        ),
111        (
112            "PARQUET".into(),
113            StringParser::new("PARQUETFILE", SyntaxKind::FileFormat)
114                .to_matchable()
115                .into(),
116        ),
117        (
118            "AVRO".into(),
119            StringParser::new("AVROFILE", SyntaxKind::FileFormat)
120                .to_matchable()
121                .into(),
122        ),
123        (
124            "ION".into(),
125            StringParser::new("IONFILE", SyntaxKind::FileFormat)
126                .to_matchable()
127                .into(),
128        ),
129        (
130            "SEQUENCEFILE".into(),
131            StringParser::new("SEQUENCEFILE", SyntaxKind::FileFormat)
132                .to_matchable()
133                .into(),
134        ),
135        (
136            "TEXTFILE".into(),
137            StringParser::new("TEXTFILE", SyntaxKind::FileFormat)
138                .to_matchable()
139                .into(),
140        ),
141        (
142            "PropertyGrammar".into(),
143            Sequence::new(vec![
144                Ref::new("QuotedLiteralSegment").to_matchable(),
145                Ref::new("EqualsSegment").to_matchable(),
146                Ref::new("QuotedLiteralSegment").to_matchable(),
147            ])
148            .to_matchable()
149            .into(),
150        ),
151        (
152            "LocationGrammar".into(),
153            Sequence::new(vec![
154                Ref::keyword("LOCATION").to_matchable(),
155                Ref::new("QuotedLiteralSegment").to_matchable(),
156            ])
157            .to_matchable()
158            .into(),
159        ),
160        (
161            "BracketedPropertyListGrammar".into(),
162            Bracketed::new(vec![
163                Delimited::new(vec![Ref::new("PropertyGrammar").to_matchable()]).to_matchable(),
164            ])
165            .to_matchable()
166            .into(),
167        ),
168        (
169            "CTASPropertyGrammar".into(),
170            Sequence::new(vec![
171                one_of(vec![
172                    Ref::keyword("FORMAT").to_matchable(),
173                    Ref::keyword("PARTITIONED_BY").to_matchable(),
174                    Ref::keyword("BUCKETED_BY").to_matchable(),
175                    Ref::keyword("BUCKET_COUNT").to_matchable(),
176                    Ref::keyword("WRITE_COMPRESSION").to_matchable(),
177                    Ref::keyword("ORC_COMPRESSION").to_matchable(),
178                    Ref::keyword("PARQUET_COMPRESSION").to_matchable(),
179                    Ref::keyword("COMPRESSION_LEVEL").to_matchable(),
180                    Ref::keyword("FIELD_DELIMITER").to_matchable(),
181                    Ref::keyword("IS_EXTERNAL").to_matchable(),
182                    Ref::keyword("TABLE_TYPE").to_matchable(),
183                    Ref::keyword("EXTERNAL_LOCATION").to_matchable(),
184                ])
185                .to_matchable(),
186                Ref::new("EqualsSegment").to_matchable(),
187                Ref::new("LiteralGrammar").to_matchable(),
188            ])
189            .to_matchable()
190            .into(),
191        ),
192        (
193            "CTASIcebergPropertyGrammar".into(),
194            Sequence::new(vec![
195                one_of(vec![
196                    Ref::keyword("FORMAT").to_matchable(),
197                    Ref::keyword("PARTITIONED_BY").to_matchable(),
198                    Ref::keyword("BUCKETED_BY").to_matchable(),
199                    Ref::keyword("BUCKET_COUNT").to_matchable(),
200                    Ref::keyword("WRITE_COMPRESSION").to_matchable(),
201                    Ref::keyword("ORC_COMPRESSION").to_matchable(),
202                    Ref::keyword("PARQUET_COMPRESSION").to_matchable(),
203                    Ref::keyword("COMPRESSION_LEVEL").to_matchable(),
204                    Ref::keyword("FIELD_DELIMITER").to_matchable(),
205                    Ref::keyword("IS_EXTERNAL").to_matchable(),
206                    Ref::keyword("TABLE_TYPE").to_matchable(),
207                    // Iceberg-specific properties
208                    Ref::keyword("LOCATION").to_matchable(),
209                    Ref::keyword("PARTITIONING").to_matchable(),
210                    Ref::keyword("VACUUM_MAX_SNAPSHOT_AGE_SECONDS").to_matchable(),
211                    Ref::keyword("VACUUM_MIN_SNAPSHOTS_TO_KEEP").to_matchable(),
212                    Ref::keyword("OPTIMIZE_REWRITE_MIN_DATA_FILE_SIZE_BYTES").to_matchable(),
213                    Ref::keyword("OPTIMIZE_REWRITE_MAX_DATA_FILE_SIZE_BYTES").to_matchable(),
214                    Ref::keyword("OPTIMIZE_REWRITE_DATA_FILE_THRESHOLD").to_matchable(),
215                    Ref::keyword("OPTIMIZE_REWRITE_DELETE_FILE_THRESHOLD").to_matchable(),
216                ])
217                .to_matchable(),
218                Ref::new("EqualsSegment").to_matchable(),
219                Ref::new("LiteralGrammar").to_matchable(),
220            ])
221            .to_matchable()
222            .into(),
223        ),
224        (
225            "BracketedCTASPropertyGrammar".into(),
226            Bracketed::new(vec![
227                one_of(vec![
228                    Delimited::new(vec![Ref::new("CTASPropertyGrammar").to_matchable()])
229                        .to_matchable(),
230                    Delimited::new(vec![Ref::new("CTASIcebergPropertyGrammar").to_matchable()])
231                        .to_matchable(),
232                ])
233                .to_matchable(),
234            ])
235            .to_matchable()
236            .into(),
237        ),
238        (
239            "UnloadPropertyGrammar".into(),
240            Sequence::new(vec![
241                one_of(vec![
242                    Ref::keyword("FORMAT").to_matchable(),
243                    Ref::keyword("PARTITIONED_BY").to_matchable(),
244                    Ref::keyword("COMPRESSION").to_matchable(),
245                    Ref::keyword("FIELD_DELIMITER").to_matchable(),
246                ])
247                .to_matchable(),
248                Ref::new("EqualsSegment").to_matchable(),
249                Ref::new("LiteralGrammar").to_matchable(),
250            ])
251            .to_matchable()
252            .into(),
253        ),
254        (
255            "BracketedUnloadPropertyGrammar".into(),
256            Bracketed::new(vec![
257                Delimited::new(vec![Ref::new("UnloadPropertyGrammar").to_matchable()])
258                    .to_matchable(),
259            ])
260            .to_matchable()
261            .into(),
262        ),
263        (
264            "TablePropertiesGrammar".into(),
265            Sequence::new(vec![
266                Ref::keyword("TBLPROPERTIES").to_matchable(),
267                Ref::new("BracketedPropertyListGrammar").to_matchable(),
268            ])
269            .to_matchable()
270            .into(),
271        ),
272        (
273            "SerdePropertiesGrammar".into(),
274            Sequence::new(vec![
275                Ref::keyword("WITH").to_matchable(),
276                Ref::keyword("SERDEPROPERTIES").to_matchable(),
277                Ref::new("BracketedPropertyListGrammar").to_matchable(),
278            ])
279            .to_matchable()
280            .into(),
281        ),
282        (
283            "TerminatedByGrammar".into(),
284            Sequence::new(vec![
285                Ref::keyword("TERMINATED").to_matchable(),
286                Ref::keyword("BY").to_matchable(),
287                Ref::new("QuotedLiteralSegment").to_matchable(),
288            ])
289            .to_matchable()
290            .into(),
291        ),
292        (
293            "FileFormatGrammar".into(),
294            one_of(vec![
295                Ref::keyword("SEQUENCEFILE").to_matchable(),
296                Ref::keyword("TEXTFILE").to_matchable(),
297                Ref::keyword("RCFILE").to_matchable(),
298                Ref::keyword("ORC").to_matchable(),
299                Ref::keyword("PARQUET").to_matchable(),
300                Ref::keyword("AVRO").to_matchable(),
301                Ref::keyword("JSONFILE").to_matchable(),
302                Ref::keyword("ION").to_matchable(),
303                Sequence::new(vec![
304                    Ref::keyword("INPUTFORMAT").to_matchable(),
305                    Ref::new("QuotedLiteralSegment").to_matchable(),
306                    Ref::keyword("OUTPUTFORMAT").to_matchable(),
307                    Ref::new("QuotedLiteralSegment").to_matchable(),
308                ])
309                .to_matchable(),
310            ])
311            .to_matchable()
312            .into(),
313        ),
314        (
315            "StoredAsGrammar".into(),
316            Sequence::new(vec![
317                Ref::keyword("STORED").to_matchable(),
318                Ref::keyword("AS").to_matchable(),
319                Ref::new("FileFormatGrammar").to_matchable(),
320            ])
321            .to_matchable()
322            .into(),
323        ),
324        (
325            "StoredByGrammar".into(),
326            Sequence::new(vec![
327                Ref::keyword("STORED").to_matchable(),
328                Ref::keyword("BY").to_matchable(),
329                Ref::new("QuotedLiteralSegment").to_matchable(),
330                Ref::new("SerdePropertiesGrammar").optional().to_matchable(),
331            ])
332            .to_matchable()
333            .into(),
334        ),
335        (
336            "StorageFormatGrammar".into(),
337            one_of(vec![
338                Sequence::new(vec![
339                    Ref::new("RowFormatClauseSegment").optional().to_matchable(),
340                    Ref::new("StoredAsGrammar").optional().to_matchable(),
341                ])
342                .to_matchable(),
343                Ref::new("StoredByGrammar").to_matchable(),
344            ])
345            .to_matchable()
346            .into(),
347        ),
348        (
349            "CommentGrammar".into(),
350            Sequence::new(vec![
351                Ref::keyword("COMMENT").to_matchable(),
352                Ref::new("QuotedLiteralSegment").to_matchable(),
353            ])
354            .to_matchable()
355            .into(),
356        ),
357        (
358            "PartitionSpecGrammar".into(),
359            Sequence::new(vec![
360                Ref::keyword("PARTITION").to_matchable(),
361                Bracketed::new(vec![
362                    Delimited::new(vec![
363                        Sequence::new(vec![
364                            Ref::new("ColumnReferenceSegment").to_matchable(),
365                            Sequence::new(vec![
366                                Ref::new("EqualsSegment").to_matchable(),
367                                Ref::new("LiteralGrammar").to_matchable(),
368                            ])
369                            .config(|config| {
370                                config.optional();
371                            })
372                            .to_matchable(),
373                        ])
374                        .to_matchable(),
375                    ])
376                    .to_matchable(),
377                ])
378                .to_matchable(),
379            ])
380            .to_matchable()
381            .into(),
382        ),
383        (
384            "BackQuotedIdentifierSegment".into(),
385            TypedParser::new(SyntaxKind::BackQuote, SyntaxKind::QuotedIdentifier)
386                .to_matchable()
387                .into(),
388        ),
389    ]);
390
391    dialect.add([
392        (
393            "LiteralGrammar".into(),
394            ansi_dialect
395                .grammar("LiteralGrammar")
396                .copy(
397                    Some(vec![Ref::new("ParameterSegment").to_matchable()]),
398                    None,
399                    None,
400                    None,
401                    Vec::new(),
402                    false,
403                )
404                .into(),
405        ),
406        (
407            "AccessorGrammar".into(),
408            Sequence::new(vec![
409                AnyNumberOf::new(vec![Ref::new("ArrayAccessorSegment").to_matchable()])
410                    .config(|config| {
411                        config.optional();
412                    })
413                    .to_matchable(),
414                AnyNumberOf::new(vec![
415                    Sequence::new(vec![
416                        Ref::new("ObjectReferenceDelimiterGrammar").to_matchable(),
417                        Ref::new("ObjectReferenceSegment").to_matchable(),
418                    ])
419                    .to_matchable(),
420                ])
421                .config(|config| {
422                    config.optional();
423                })
424                .to_matchable(),
425            ])
426            .to_matchable()
427            .into(),
428        ),
429        (
430            "QuotedLiteralSegment".into(),
431            one_of(vec![
432                TypedParser::new(SyntaxKind::SingleQuote, SyntaxKind::QuotedLiteral).to_matchable(),
433                TypedParser::new(SyntaxKind::DoubleQuote, SyntaxKind::QuotedLiteral).to_matchable(),
434                TypedParser::new(SyntaxKind::BackQuote, SyntaxKind::QuotedLiteral).to_matchable(),
435            ])
436            .to_matchable()
437            .into(),
438        ),
439        (
440            "TrimParametersGrammar".into(),
441            Nothing::new().to_matchable().into(),
442        ),
443        (
444            "NakedIdentifierSegment".into(),
445            SegmentGenerator::new(|dialect| {
446                let reserved_keywords = dialect.sets("reserved_keywords");
447                let pattern = reserved_keywords.iter().join("|");
448                let anti_template = format!("^({pattern})$");
449
450                RegexParser::new("[A-Z0-9_]*[A-Z_][A-Z0-9_]*", SyntaxKind::NakedIdentifier)
451                    .anti_template(&anti_template)
452                    .to_matchable()
453            })
454            .into(),
455        ),
456        (
457            "SingleIdentifierGrammar".into(),
458            ansi_dialect
459                .grammar("SingleIdentifierGrammar")
460                .copy(
461                    Some(vec![Ref::new("BackQuotedIdentifierSegment").to_matchable()]),
462                    None,
463                    None,
464                    None,
465                    Vec::new(),
466                    false,
467                )
468                .into(),
469        ),
470        (
471            "BinaryOperatorGrammar".into(),
472            one_of(vec![
473                Ref::new("ArithmeticBinaryOperatorGrammar").to_matchable(),
474                Ref::new("StringBinaryOperatorGrammar").to_matchable(),
475                Ref::new("BooleanBinaryOperatorGrammar").to_matchable(),
476                Ref::new("ComparisonOperatorGrammar").to_matchable(),
477                Ref::new("RightArrowOperator").to_matchable(),
478            ])
479            .to_matchable()
480            .into(),
481        ),
482        (
483            "PostFunctionGrammar".into(),
484            ansi_dialect
485                .grammar("PostFunctionGrammar")
486                .copy(
487                    Some(vec![
488                        Sequence::new(vec![
489                            Ref::keyword("WITH").to_matchable(),
490                            Ref::keyword("ORDINALITY").to_matchable(),
491                        ])
492                        .config(|config| config.optional())
493                        .to_matchable(),
494                        Ref::new("WithinGroupClauseSegment").to_matchable(),
495                    ]),
496                    None,
497                    None,
498                    None,
499                    Vec::new(),
500                    false,
501                )
502                .into(),
503        ),
504        (
505            "FunctionContentsGrammar".into(),
506            ansi_dialect
507                .grammar("FunctionContentsGrammar")
508                .copy(
509                    Some(vec![
510                        Ref::new("ListaggOverflowClauseSegment").to_matchable(),
511                    ]),
512                    None,
513                    None,
514                    None,
515                    Vec::new(),
516                    false,
517                )
518                .into(),
519        ),
520    ]);
521
522    // Add support for WITHIN GROUP and LISTAGG overflow clauses
523    dialect.add([
524        (
525            "WithinGroupClauseSegment".into(),
526            Sequence::new(vec![
527                Ref::keyword("WITHIN").to_matchable(),
528                Ref::keyword("GROUP").to_matchable(),
529                Bracketed::new(vec![Ref::new("OrderByClauseSegment").to_matchable()])
530                    .to_matchable(),
531                Ref::new("FilterClauseGrammar").optional().to_matchable(),
532            ])
533            .to_matchable()
534            .into(),
535        ),
536        (
537            "ListaggOverflowClauseSegment".into(),
538            Sequence::new(vec![
539                Ref::keyword("ON").to_matchable(),
540                Ref::keyword("OVERFLOW").to_matchable(),
541                one_of(vec![
542                    Ref::keyword("ERROR").to_matchable(),
543                    Sequence::new(vec![
544                        Ref::keyword("TRUNCATE").to_matchable(),
545                        Ref::new("QuotedLiteralSegment").optional().to_matchable(),
546                        one_of(vec![
547                            Ref::keyword("WITH").to_matchable(),
548                            Ref::keyword("WITHOUT").to_matchable(),
549                        ])
550                        .config(|config| {
551                            config.optional();
552                        })
553                        .to_matchable(),
554                        Ref::keyword("COUNT").optional().to_matchable(),
555                    ])
556                    .to_matchable(),
557                ])
558                .to_matchable(),
559            ])
560            .to_matchable()
561            .into(),
562        ),
563        (
564            "ValuesClauseSegment".into(),
565            NodeMatcher::new(SyntaxKind::ValuesClause, |_| {
566                Sequence::new(vec![
567                    Ref::keyword("VALUES").to_matchable(),
568                    Delimited::new(vec![Ref::new("ExpressionSegment").to_matchable()])
569                        .to_matchable(),
570                ])
571                .to_matchable()
572            })
573            .to_matchable()
574            .into(),
575        ),
576    ]);
577
578    dialect.replace_grammar(
579        "ArrayTypeSegment",
580        Sequence::new(vec![
581            Ref::keyword("ARRAY").to_matchable(),
582            Ref::new("ArrayTypeSchemaSegment").optional().to_matchable(),
583        ])
584        .to_matchable(),
585    );
586
587    dialect.replace_grammar(
588        "ArrayTypeSchemaSegment",
589        Bracketed::new(vec![Ref::new("DatatypeSegment").to_matchable()])
590            .config(|config| {
591                config.bracket_pairs_set = "angle_bracket_pairs";
592                config.bracket_type = "angle";
593            })
594            .to_matchable(),
595    );
596
597    dialect.replace_grammar(
598        "StructTypeSegment",
599        Sequence::new(vec![
600            Ref::keyword("STRUCT").to_matchable(),
601            Ref::new("StructTypeSchemaSegment")
602                .optional()
603                .to_matchable(),
604        ])
605        .to_matchable(),
606    );
607
608    dialect.add([
609        (
610            "MapTypeSegment".into(),
611            NodeMatcher::new(SyntaxKind::MapType, |_| {
612                Sequence::new(vec![
613                    Ref::keyword("MAP").to_matchable(),
614                    Ref::new("MapTypeSchemaSegment").optional().to_matchable(),
615                ])
616                .to_matchable()
617            })
618            .to_matchable()
619            .into(),
620        ),
621        (
622            "MapTypeSchemaSegment".into(),
623            NodeMatcher::new(SyntaxKind::MapTypeSchema, |_| {
624                Bracketed::new(vec![
625                    Sequence::new(vec![
626                        Ref::new("PrimitiveTypeSegment").to_matchable(),
627                        Ref::new("CommaSegment").to_matchable(),
628                        Ref::new("DatatypeSegment").to_matchable(),
629                    ])
630                    .to_matchable(),
631                ])
632                .config(|config| {
633                    config.bracket_pairs_set = "angle_bracket_pairs";
634                    config.bracket_type = "angle";
635                })
636                .to_matchable()
637            })
638            .to_matchable()
639            .into(),
640        ),
641    ]);
642
643    dialect.replace_grammar(
644        "StatementSegment",
645        super::ansi::statement_segment().copy(
646            Some(vec![
647                Ref::new("MsckRepairTableStatementSegment").to_matchable(),
648                Ref::new("UnloadStatementSegment").to_matchable(),
649                Ref::new("PrepareStatementSegment").to_matchable(),
650                Ref::new("ExecuteStatementSegment").to_matchable(),
651                Ref::new("ShowStatementSegment").to_matchable(),
652            ]),
653            None,
654            None,
655            Some(vec![
656                Ref::new("TransactionStatementSegment").to_matchable(),
657                Ref::new("CreateSchemaStatementSegment").to_matchable(),
658                Ref::new("SetSchemaStatementSegment").to_matchable(),
659                Ref::new("CreateModelStatementSegment").to_matchable(),
660                Ref::new("DropModelStatementSegment").to_matchable(),
661            ]),
662            Vec::new(),
663            false,
664        ),
665    );
666
667    dialect.add([
668        (
669            "StructTypeSchemaSegment".into(),
670            NodeMatcher::new(SyntaxKind::StructTypeSchema, |_| {
671                Bracketed::new(vec![
672                    Delimited::new(vec![
673                        Sequence::new(vec![
674                            Ref::new("NakedIdentifierSegment").to_matchable(),
675                            Ref::new("ColonSegment").to_matchable(),
676                            Ref::new("DatatypeSegment").to_matchable(),
677                            Ref::new("CommentGrammar").optional().to_matchable(),
678                        ])
679                        .to_matchable(),
680                    ])
681                    .to_matchable(),
682                ])
683                .config(|config| {
684                    config.bracket_pairs_set = "angle_bracket_pairs";
685                    config.bracket_type = "angle";
686                })
687                .to_matchable()
688            })
689            .to_matchable()
690            .into(),
691        ),
692        (
693            "PrimitiveTypeSegment".into(),
694            NodeMatcher::new(SyntaxKind::PrimitiveType, |_| {
695                one_of(vec![
696                    Ref::keyword("BOOLEAN").to_matchable(),
697                    Ref::keyword("TINYINT").to_matchable(),
698                    Ref::keyword("SMALLINT").to_matchable(),
699                    Ref::keyword("INTEGER").to_matchable(),
700                    Ref::keyword("INT").to_matchable(),
701                    Ref::keyword("BIGINT").to_matchable(),
702                    Ref::keyword("DOUBLE").to_matchable(),
703                    Ref::keyword("FLOAT").to_matchable(),
704                    Ref::keyword("REAL").to_matchable(),
705                    Sequence::new(vec![
706                        one_of(vec![
707                            Ref::keyword("DECIMAL").to_matchable(),
708                            Ref::keyword("CHAR").to_matchable(),
709                            Ref::keyword("VARCHAR").to_matchable(),
710                        ])
711                        .to_matchable(),
712                        Ref::new("BracketedArguments").optional().to_matchable(),
713                    ])
714                    .to_matchable(),
715                    Ref::keyword("STRING").to_matchable(),
716                    Ref::keyword("BINARY").to_matchable(),
717                    Ref::keyword("DATE").to_matchable(),
718                    Ref::keyword("TIMESTAMP").to_matchable(),
719                    Ref::keyword("VARBINARY").to_matchable(),
720                    Ref::keyword("JSON").to_matchable(),
721                    Ref::keyword("TIME").to_matchable(),
722                    Ref::keyword("IPADDRESS").to_matchable(),
723                    Ref::keyword("HYPERLOGLOG").to_matchable(),
724                    Ref::keyword("P4HYPERLOGLOG").to_matchable(),
725                ])
726                .to_matchable()
727            })
728            .to_matchable()
729            .into(),
730        ),
731        (
732            "DatatypeSegment".into(),
733            NodeMatcher::new(SyntaxKind::DataType, |_| {
734                one_of(vec![
735                    Ref::new("PrimitiveTypeSegment").to_matchable(),
736                    Ref::new("StructTypeSegment").to_matchable(),
737                    Ref::new("ArrayTypeSegment").to_matchable(),
738                    Ref::new("MapTypeSegment").to_matchable(),
739                    Sequence::new(vec![
740                        Ref::keyword("ROW").to_matchable(),
741                        Bracketed::new(vec![
742                            Delimited::new(vec![
743                                AnyNumberOf::new(vec![
744                                    Sequence::new(vec![
745                                        Ref::new("NakedIdentifierSegment").to_matchable(),
746                                        Ref::new("DatatypeSegment").to_matchable(),
747                                    ])
748                                    .to_matchable(),
749                                    Ref::new("LiteralGrammar").to_matchable(),
750                                ])
751                                .to_matchable(),
752                            ])
753                            .to_matchable(),
754                        ])
755                        .to_matchable(),
756                    ])
757                    .to_matchable(),
758                    Ref::new("TimeWithTZGrammar").to_matchable(),
759                ])
760                .to_matchable()
761            })
762            .to_matchable()
763            .into(),
764        ),
765    ]);
766
767    dialect.replace_grammar(
768        "GroupByClauseSegment",
769        Sequence::new(vec![
770            Ref::keyword("GROUP").to_matchable(),
771            Ref::keyword("BY").to_matchable(),
772            MetaSegment::indent().to_matchable(),
773            Delimited::new(vec![
774                one_of(vec![
775                    Ref::new("CubeRollupClauseSegment").to_matchable(),
776                    Ref::new("GroupingSetsClauseSegment").to_matchable(),
777                    Ref::new("ColumnReferenceSegment").to_matchable(),
778                    Ref::new("NumericLiteralSegment").to_matchable(),
779                    Ref::new("ExpressionSegment").to_matchable(),
780                ])
781                .to_matchable(),
782            ])
783            .to_matchable(),
784            MetaSegment::dedent().to_matchable(),
785        ])
786        .to_matchable(),
787    );
788
789    dialect.add([
790        (
791            "CreateTableStatementSegment".into(),
792            NodeMatcher::new(SyntaxKind::CreateTableStatement, |_| {
793                Sequence::new(vec![
794                    Ref::keyword("CREATE").to_matchable(),
795                    Ref::keyword("EXTERNAL").optional().to_matchable(),
796                    Ref::keyword("TABLE").to_matchable(),
797                    Ref::new("IfNotExistsGrammar").optional().to_matchable(),
798                    Ref::new("TableReferenceSegment").to_matchable(),
799                    one_of(vec![
800                        Sequence::new(vec![
801                            Bracketed::new(vec![
802                                Delimited::new(vec![
803                                    one_of(vec![
804                                        Ref::new("TableConstraintSegment")
805                                            .optional()
806                                            .to_matchable(),
807                                        Sequence::new(vec![
808                                            Ref::new("ColumnDefinitionSegment").to_matchable(),
809                                            Ref::new("CommentGrammar").optional().to_matchable(),
810                                        ])
811                                        .to_matchable(),
812                                    ])
813                                    .to_matchable(),
814                                ])
815                                .to_matchable(),
816                            ])
817                            .config(|config| {
818                                config.optional();
819                            })
820                            .to_matchable(),
821                            Ref::new("CommentGrammar").optional().to_matchable(),
822                            Ref::new("StoredAsGrammar").optional().to_matchable(),
823                            Sequence::new(vec![
824                                Ref::keyword("PARTITIONED").to_matchable(),
825                                Ref::keyword("BY").to_matchable(),
826                                Bracketed::new(vec![
827                                    Delimited::new(vec![
828                                        Sequence::new(vec![
829                                            one_of(vec![
830                                                // External tables expect types...
831                                                Ref::new("ColumnDefinitionSegment").to_matchable(),
832                                                // Iceberg tables don't expect types.
833                                                Ref::new("SingleIdentifierGrammar").to_matchable(),
834                                                // Iceberg tables also allow partition transforms
835                                                Ref::new("FunctionSegment").to_matchable(),
836                                            ])
837                                            .to_matchable(),
838                                            Ref::new("CommentGrammar").optional().to_matchable(),
839                                        ])
840                                        .to_matchable(),
841                                    ])
842                                    .to_matchable(),
843                                ])
844                                .to_matchable(),
845                            ])
846                            .config(|config| {
847                                config.optional();
848                            })
849                            .to_matchable(),
850                            Sequence::new(vec![
851                                Ref::keyword("CLUSTERED").to_matchable(),
852                                Ref::keyword("BY").to_matchable(),
853                                Ref::new("BracketedColumnReferenceListGrammar").to_matchable(),
854                                Ref::keyword("INTO").to_matchable(),
855                                Ref::new("NumericLiteralSegment").to_matchable(),
856                                Ref::keyword("BUCKETS").to_matchable(),
857                            ])
858                            .config(|config| {
859                                config.optional();
860                            })
861                            .to_matchable(),
862                            Ref::new("StoredAsGrammar").optional().to_matchable(),
863                            Ref::new("StorageFormatGrammar").optional().to_matchable(),
864                            Ref::new("LocationGrammar").optional().to_matchable(),
865                            Ref::new("TablePropertiesGrammar").optional().to_matchable(),
866                            Ref::new("CommentGrammar").optional().to_matchable(),
867                        ])
868                        .to_matchable(),
869                        Sequence::new(vec![
870                            Sequence::new(vec![
871                                Ref::keyword("WITH").to_matchable(),
872                                Ref::new("BracketedCTASPropertyGrammar").to_matchable(),
873                            ])
874                            .config(|config| {
875                                config.optional();
876                            })
877                            .to_matchable(),
878                            Ref::keyword("AS").to_matchable(),
879                            optionally_bracketed(vec![
880                                Ref::new("SelectableGrammar").to_matchable(),
881                            ])
882                            .to_matchable(),
883                            Sequence::new(vec![
884                                Ref::keyword("WITH").to_matchable(),
885                                Ref::keyword("NO").to_matchable(),
886                                Ref::keyword("DATA").to_matchable(),
887                            ])
888                            .config(|config| {
889                                config.optional();
890                            })
891                            .to_matchable(),
892                        ])
893                        .to_matchable(),
894                    ])
895                    .to_matchable(),
896                ])
897                .to_matchable()
898            })
899            .to_matchable()
900            .into(),
901        ),
902        (
903            "MsckRepairTableStatementSegment".into(),
904            NodeMatcher::new(SyntaxKind::MsckRepairTableStatement, |_| {
905                Sequence::new(vec![
906                    Ref::keyword("MSCK").to_matchable(),
907                    Ref::keyword("REPAIR").to_matchable(),
908                    Ref::keyword("TABLE").to_matchable(),
909                    Ref::new("TableReferenceSegment").to_matchable(),
910                ])
911                .to_matchable()
912            })
913            .to_matchable()
914            .into(),
915        ),
916        (
917            "RowFormatClauseSegment".into(),
918            NodeMatcher::new(SyntaxKind::RowFormatClause, |_| {
919                Sequence::new(vec![
920                    Ref::keyword("ROW").to_matchable(),
921                    Ref::keyword("FORMAT").to_matchable(),
922                    one_of(vec![
923                        Sequence::new(vec![
924                            Ref::keyword("DELIMITED").to_matchable(),
925                            Sequence::new(vec![
926                                Ref::keyword("FIELDS").to_matchable(),
927                                Ref::new("TerminatedByGrammar").to_matchable(),
928                                Sequence::new(vec![
929                                    Ref::keyword("ESCAPED").to_matchable(),
930                                    Ref::keyword("BY").to_matchable(),
931                                    Ref::new("QuotedLiteralSegment").to_matchable(),
932                                ])
933                                .config(|config| {
934                                    config.optional();
935                                })
936                                .to_matchable(),
937                            ])
938                            .config(|config| {
939                                config.optional();
940                            })
941                            .to_matchable(),
942                            Sequence::new(vec![
943                                Ref::keyword("COLLECTION").to_matchable(),
944                                Ref::keyword("ITEMS").to_matchable(),
945                                Ref::new("TerminatedByGrammar").to_matchable(),
946                            ])
947                            .config(|config| {
948                                config.optional();
949                            })
950                            .to_matchable(),
951                            Sequence::new(vec![
952                                Ref::keyword("MAP").to_matchable(),
953                                Ref::keyword("KEYS").to_matchable(),
954                                Ref::new("TerminatedByGrammar").to_matchable(),
955                            ])
956                            .config(|config| {
957                                config.optional();
958                            })
959                            .to_matchable(),
960                            Sequence::new(vec![
961                                Ref::keyword("LINES").to_matchable(),
962                                Ref::new("TerminatedByGrammar").to_matchable(),
963                            ])
964                            .config(|config| {
965                                config.optional();
966                            })
967                            .to_matchable(),
968                            Sequence::new(vec![
969                                Ref::keyword("NULL").to_matchable(),
970                                Ref::keyword("DEFINED").to_matchable(),
971                                Ref::keyword("AS").to_matchable(),
972                                Ref::new("QuotedLiteralSegment").to_matchable(),
973                            ])
974                            .config(|config| {
975                                config.optional();
976                            })
977                            .to_matchable(),
978                        ])
979                        .to_matchable(),
980                        Sequence::new(vec![
981                            Ref::keyword("SERDE").to_matchable(),
982                            Ref::new("QuotedLiteralSegment").to_matchable(),
983                            Ref::new("SerdePropertiesGrammar").optional().to_matchable(),
984                        ])
985                        .to_matchable(),
986                    ])
987                    .to_matchable(),
988                ])
989                .to_matchable()
990            })
991            .to_matchable()
992            .into(),
993        ),
994        (
995            "InsertStatementSegment".into(),
996            NodeMatcher::new(SyntaxKind::InsertStatement, |_| {
997                Sequence::new(vec![
998                    Ref::keyword("INSERT").to_matchable(),
999                    Ref::keyword("INTO").to_matchable(),
1000                    Ref::new("TableReferenceSegment").to_matchable(),
1001                    one_of(vec![
1002                        optionally_bracketed(vec![Ref::new("SelectableGrammar").to_matchable()])
1003                            .to_matchable(),
1004                        Sequence::new(vec![
1005                            Ref::keyword("DEFAULT").to_matchable(),
1006                            Ref::keyword("VALUES").to_matchable(),
1007                        ])
1008                        .to_matchable(),
1009                        Sequence::new(vec![
1010                            Ref::new("BracketedColumnReferenceListGrammar")
1011                                .optional()
1012                                .to_matchable(),
1013                            one_of(vec![
1014                                Ref::new("ValuesClauseSegment").to_matchable(),
1015                                optionally_bracketed(vec![
1016                                    Ref::new("SelectableGrammar").to_matchable(),
1017                                ])
1018                                .to_matchable(),
1019                            ])
1020                            .to_matchable(),
1021                        ])
1022                        .to_matchable(),
1023                    ])
1024                    .to_matchable(),
1025                ])
1026                .to_matchable()
1027            })
1028            .to_matchable()
1029            .into(),
1030        ),
1031        (
1032            "UnloadStatementSegment".into(),
1033            NodeMatcher::new(SyntaxKind::UnloadStatement, |_| {
1034                Sequence::new(vec![
1035                    Ref::keyword("UNLOAD").to_matchable(),
1036                    Bracketed::new(vec![Ref::new("SelectableGrammar").to_matchable()])
1037                        .to_matchable(),
1038                    Ref::keyword("TO").to_matchable(),
1039                    Ref::new("QuotedLiteralSegment").to_matchable(),
1040                    Sequence::new(vec![
1041                        Ref::keyword("WITH").to_matchable(),
1042                        Ref::new("BracketedUnloadPropertyGrammar").to_matchable(),
1043                    ])
1044                    .config(|config| {
1045                        config.optional();
1046                    })
1047                    .to_matchable(),
1048                ])
1049                .to_matchable()
1050            })
1051            .to_matchable()
1052            .into(),
1053        ),
1054        (
1055            "PrepareStatementSegment".into(),
1056            NodeMatcher::new(SyntaxKind::PrepareStatement, |_| {
1057                Sequence::new(vec![
1058                    Ref::keyword("PREPARE").to_matchable(),
1059                    Ref::new("TableReferenceSegment").to_matchable(),
1060                    Ref::keyword("FROM").to_matchable(),
1061                    optionally_bracketed(vec![
1062                        one_of(vec![
1063                            Ref::new("SelectableGrammar").to_matchable(),
1064                            Ref::new("UnloadStatementSegment").to_matchable(),
1065                            Ref::new("InsertStatementSegment").to_matchable(),
1066                        ])
1067                        .to_matchable(),
1068                    ])
1069                    .to_matchable(),
1070                ])
1071                .to_matchable()
1072            })
1073            .to_matchable()
1074            .into(),
1075        ),
1076        (
1077            "ExecuteStatementSegment".into(),
1078            NodeMatcher::new(SyntaxKind::ExecuteStatement, |_| {
1079                Sequence::new(vec![
1080                    Ref::keyword("EXECUTE").to_matchable(),
1081                    Ref::new("TableReferenceSegment").to_matchable(),
1082                    one_of(vec![
1083                        Sequence::new(vec![
1084                            Ref::keyword("USING").to_matchable(),
1085                            Delimited::new(vec![Ref::new("LiteralGrammar").to_matchable()])
1086                                .to_matchable(),
1087                        ])
1088                        .to_matchable(),
1089                    ])
1090                    .config(|config| {
1091                        config.optional();
1092                    })
1093                    .to_matchable(),
1094                ])
1095                .to_matchable()
1096            })
1097            .to_matchable()
1098            .into(),
1099        ),
1100        (
1101            "IntervalExpressionSegment".into(),
1102            NodeMatcher::new(SyntaxKind::IntervalExpression, |_| {
1103                Sequence::new(vec![
1104                    Ref::keyword("INTERVAL").optional().to_matchable(),
1105                    one_of(vec![
1106                        Sequence::new(vec![
1107                            one_of(vec![
1108                                Ref::new("QuotedLiteralSegment").to_matchable(),
1109                                Ref::new("NumericLiteralSegment").to_matchable(),
1110                                Bracketed::new(vec![Ref::new("ExpressionSegment").to_matchable()])
1111                                    .to_matchable(),
1112                            ])
1113                            .to_matchable(),
1114                            Ref::new("DatetimeUnitSegment").to_matchable(),
1115                            Sequence::new(vec![
1116                                Ref::keyword("TO").to_matchable(),
1117                                Ref::new("DatetimeUnitSegment").to_matchable(),
1118                            ])
1119                            .config(|config| {
1120                                config.optional();
1121                            })
1122                            .to_matchable(),
1123                        ])
1124                        .to_matchable(),
1125                    ])
1126                    .to_matchable(),
1127                ])
1128                .to_matchable()
1129            })
1130            .to_matchable()
1131            .into(),
1132        ),
1133    ]);
1134
1135    dialect.add([
1136        (
1137            "AlterTableDropColumnGrammar".into(),
1138            Sequence::new(vec![
1139                Ref::keyword("DROP").to_matchable(),
1140                Ref::keyword("COLUMN").to_matchable(),
1141                Ref::new("SingleIdentifierGrammar").to_matchable(),
1142            ])
1143            .to_matchable()
1144            .into(),
1145        ),
1146        (
1147            "ShowStatementSegment".into(),
1148            NodeMatcher::new(SyntaxKind::ShowStatement, |_| {
1149                Sequence::new(vec![
1150                    Ref::keyword("SHOW").to_matchable(),
1151                    one_of(vec![
1152                        Sequence::new(vec![
1153                            Ref::keyword("COLUMNS").to_matchable(),
1154                            one_of(vec![
1155                                Ref::keyword("FROM").to_matchable(),
1156                                Ref::keyword("IN").to_matchable(),
1157                            ])
1158                            .to_matchable(),
1159                            one_of(vec![
1160                                Sequence::new(vec![
1161                                    Ref::new("DatabaseReferenceSegment").to_matchable(),
1162                                    Ref::new("TableReferenceSegment").to_matchable(),
1163                                ])
1164                                .to_matchable(),
1165                                Sequence::new(vec![
1166                                    Ref::new("TableReferenceSegment").to_matchable(),
1167                                    Sequence::new(vec![
1168                                        one_of(vec![
1169                                            Ref::keyword("FROM").to_matchable(),
1170                                            Ref::keyword("IN").to_matchable(),
1171                                        ])
1172                                        .to_matchable(),
1173                                        Ref::new("DatabaseReferenceSegment").to_matchable(),
1174                                    ])
1175                                    .config(|config| {
1176                                        config.optional();
1177                                    })
1178                                    .to_matchable(),
1179                                ])
1180                                .to_matchable(),
1181                            ])
1182                            .to_matchable(),
1183                        ])
1184                        .to_matchable(),
1185                        Sequence::new(vec![
1186                            Ref::keyword("CREATE").to_matchable(),
1187                            one_of(vec![
1188                                Ref::keyword("TABLE").to_matchable(),
1189                                Ref::keyword("VIEW").to_matchable(),
1190                            ])
1191                            .to_matchable(),
1192                            Ref::new("TableReferenceSegment").to_matchable(),
1193                        ])
1194                        .to_matchable(),
1195                        Sequence::new(vec![
1196                            one_of(vec![
1197                                Ref::keyword("DATABASES").to_matchable(),
1198                                Ref::keyword("SCHEMAS").to_matchable(),
1199                            ])
1200                            .to_matchable(),
1201                            Sequence::new(vec![
1202                                Ref::keyword("LIKE").to_matchable(),
1203                                Ref::new("QuotedLiteralSegment").to_matchable(),
1204                            ])
1205                            .config(|config| {
1206                                config.optional();
1207                            })
1208                            .to_matchable(),
1209                        ])
1210                        .to_matchable(),
1211                        Sequence::new(vec![
1212                            Ref::keyword("PARTITIONS").to_matchable(),
1213                            Ref::new("TableReferenceSegment").to_matchable(),
1214                        ])
1215                        .to_matchable(),
1216                        Sequence::new(vec![
1217                            Ref::keyword("TABLES").to_matchable(),
1218                            Sequence::new(vec![
1219                                Ref::keyword("IN").to_matchable(),
1220                                Ref::new("DatabaseReferenceSegment").to_matchable(),
1221                            ])
1222                            .config(|config| {
1223                                config.optional();
1224                            })
1225                            .to_matchable(),
1226                            Ref::new("QuotedLiteralSegment").optional().to_matchable(),
1227                        ])
1228                        .to_matchable(),
1229                        Sequence::new(vec![
1230                            Ref::keyword("TBLPROPERTIES").to_matchable(),
1231                            Ref::new("TableReferenceSegment").to_matchable(),
1232                            Bracketed::new(vec![Ref::new("QuotedLiteralSegment").to_matchable()])
1233                                .config(|config| {
1234                                    config.optional();
1235                                })
1236                                .to_matchable(),
1237                        ])
1238                        .to_matchable(),
1239                        Sequence::new(vec![
1240                            Ref::keyword("VIEWS").to_matchable(),
1241                            Sequence::new(vec![
1242                                Ref::keyword("IN").to_matchable(),
1243                                Ref::new("DatabaseReferenceSegment").to_matchable(),
1244                            ])
1245                            .config(|config| {
1246                                config.optional();
1247                            })
1248                            .to_matchable(),
1249                            Sequence::new(vec![
1250                                Ref::keyword("LIKE").to_matchable(),
1251                                Ref::new("QuotedLiteralSegment").to_matchable(),
1252                            ])
1253                            .config(|config| {
1254                                config.optional();
1255                            })
1256                            .to_matchable(),
1257                        ])
1258                        .to_matchable(),
1259                    ])
1260                    .to_matchable(),
1261                ])
1262                .to_matchable()
1263            })
1264            .to_matchable()
1265            .into(),
1266        ),
1267    ]);
1268
1269    dialect.config(|this| this.expand())
1270}