Skip to main content

sqruff_lib_dialects/
duckdb.rs

1use sqruff_lib_core::dialects::Dialect;
2use sqruff_lib_core::dialects::init::DialectKind;
3use sqruff_lib_core::dialects::syntax::SyntaxKind;
4use sqruff_lib_core::helpers::{Config, ToMatchable};
5use sqruff_lib_core::parser::grammar::Ref;
6use sqruff_lib_core::parser::grammar::anyof::one_of;
7use sqruff_lib_core::parser::grammar::delimited::Delimited;
8use sqruff_lib_core::parser::grammar::sequence::{Bracketed, Sequence};
9use sqruff_lib_core::parser::lexer::Matcher;
10use sqruff_lib_core::parser::matchable::MatchableTrait;
11use sqruff_lib_core::parser::parsers::StringParser;
12use sqruff_lib_core::parser::segments::meta::MetaSegment;
13
14use crate::{ansi, postgres};
15use sqruff_lib_core::dialects::init::DialectConfig;
16use sqruff_lib_core::value::Value;
17
18sqruff_lib_core::dialect_config!(DuckDBDialectConfig {});
19
20pub fn dialect(config: Option<&Value>) -> Dialect {
21    // Parse and validate dialect configuration, falling back to defaults on failure
22    let _dialect_config: DuckDBDialectConfig = config
23        .map(DuckDBDialectConfig::from_value)
24        .unwrap_or_default();
25
26    raw_dialect().config(|dialect| dialect.expand())
27}
28
29pub fn raw_dialect() -> Dialect {
30    let ansi_dialect = ansi::raw_dialect();
31    let postgres_dialect = postgres::dialect(None);
32    let mut duckdb_dialect = postgres_dialect;
33    duckdb_dialect.name = DialectKind::Duckdb;
34
35    duckdb_dialect.add_keyword_to_set("reserved_keywords", "SUMMARIZE");
36    duckdb_dialect.add_keyword_to_set("reserved_keywords", "MACRO");
37
38    duckdb_dialect.add([
39        (
40            "SingleIdentifierGrammar".into(),
41            one_of(vec![
42                Ref::new("NakedIdentifierSegment").to_matchable(),
43                Ref::new("QuotedIdentifierSegment").to_matchable(),
44                Ref::new("SingleQuotedIdentifierSegment").to_matchable(),
45            ])
46            .to_matchable()
47            .into(),
48        ),
49        (
50            "DivideSegment".into(),
51            one_of(vec![
52                StringParser::new("//", SyntaxKind::BinaryOperator).to_matchable(),
53                StringParser::new("/", SyntaxKind::BinaryOperator).to_matchable(),
54            ])
55            .to_matchable()
56            .into(),
57        ),
58        (
59            "UnionGrammar".into(),
60            ansi_dialect
61                .grammar("UnionGrammar")
62                .copy(
63                    Some(vec![
64                        Sequence::new(vec![
65                            Ref::keyword("BY").to_matchable(),
66                            Ref::keyword("NAME").to_matchable(),
67                        ])
68                        .config(|this| this.optional())
69                        .to_matchable(),
70                    ]),
71                    None,
72                    None,
73                    None,
74                    Vec::new(),
75                    false,
76                )
77                .into(),
78        ),
79        (
80            "LoadStatementSegment".into(),
81            Sequence::new(vec![
82                Ref::keyword("LOAD").to_matchable(),
83                Ref::new("SingleIdentifierGrammar").to_matchable(),
84            ])
85            .to_matchable()
86            .into(),
87        ),
88        (
89            "SummarizeStatementSegment".into(),
90            Sequence::new(vec![
91                Ref::keyword("SUMMARIZE").to_matchable(),
92                one_of(vec![
93                    Ref::new("SingleIdentifierGrammar").to_matchable(),
94                    Ref::new("SelectStatementSegment").to_matchable(),
95                ])
96                .to_matchable(),
97            ])
98            .to_matchable()
99            .into(),
100        ),
101        (
102            "DescribeStatementSegment".into(),
103            Sequence::new(vec![
104                Ref::keyword("DESCRIBE").to_matchable(),
105                one_of(vec![
106                    Ref::new("SingleIdentifierGrammar").to_matchable(),
107                    Ref::new("SelectStatementSegment").to_matchable(),
108                ])
109                .to_matchable(),
110            ])
111            .to_matchable()
112            .into(),
113        ),
114        (
115            "CreateMacroStatementSegment".into(),
116            Sequence::new(vec![
117                Ref::keyword("CREATE").to_matchable(),
118                one_of(vec![
119                    Ref::keyword("TEMP").to_matchable(),
120                    Ref::keyword("TEMPORARY").to_matchable(),
121                ])
122                .config(|config| config.optional())
123                .to_matchable(),
124                one_of(vec![
125                    Ref::keyword("MACRO").to_matchable(),
126                    Ref::keyword("FUNCTION").to_matchable(),
127                ])
128                .to_matchable(),
129                Ref::new("SingleIdentifierGrammar").to_matchable(),
130                Bracketed::new(vec![
131                    Delimited::new(vec![
132                        Ref::new("BaseExpressionElementGrammar").to_matchable(),
133                    ])
134                    .to_matchable(),
135                ])
136                .to_matchable(),
137                Ref::keyword("AS").to_matchable(),
138                one_of(vec![
139                    Ref::new("SelectStatementSegment").to_matchable(),
140                    Ref::new("BaseExpressionElementGrammar").to_matchable(),
141                ])
142                .to_matchable(),
143            ])
144            .to_matchable()
145            .into(),
146        ),
147    ]);
148
149    duckdb_dialect.insert_lexer_matchers(
150        vec![Matcher::string(
151            "double_divide",
152            "//",
153            SyntaxKind::DoubleDivide,
154        )],
155        "divide",
156    );
157
158    duckdb_dialect.replace_grammar(
159        "SelectClauseElementSegment",
160        one_of(vec![
161            Sequence::new(vec![
162                Ref::new("WildcardExpressionSegment").to_matchable(),
163                one_of(vec![
164                    Sequence::new(vec![
165                        Ref::keyword("EXCLUDE").to_matchable(),
166                        one_of(vec![
167                            Ref::new("ColumnReferenceSegment").to_matchable(),
168                            Bracketed::new(vec![
169                                Delimited::new(vec![
170                                    Ref::new("ColumnReferenceSegment").to_matchable(),
171                                ])
172                                .to_matchable(),
173                            ])
174                            .to_matchable(),
175                        ])
176                        .to_matchable(),
177                    ])
178                    .to_matchable(),
179                    Sequence::new(vec![
180                        Ref::keyword("REPLACE").to_matchable(),
181                        Bracketed::new(vec![
182                            Delimited::new(vec![
183                                Sequence::new(vec![
184                                    Ref::new("BaseExpressionElementGrammar").to_matchable(),
185                                    Ref::new("AliasExpressionSegment").optional().to_matchable(),
186                                ])
187                                .to_matchable(),
188                            ])
189                            .to_matchable(),
190                        ])
191                        .to_matchable(),
192                    ])
193                    .to_matchable(),
194                ])
195                .config(|config| {
196                    config.optional();
197                })
198                .to_matchable(),
199            ])
200            .to_matchable(),
201            Sequence::new(vec![
202                Ref::new("BaseExpressionElementGrammar").to_matchable(),
203                Ref::new("AliasExpressionSegment").optional().to_matchable(),
204            ])
205            .to_matchable(),
206        ])
207        .to_matchable(),
208    );
209
210    duckdb_dialect.replace_grammar(
211        "OrderByClauseSegment",
212        Sequence::new(vec![
213            Ref::keyword("ORDER").to_matchable(),
214            Ref::keyword("BY").to_matchable(),
215            MetaSegment::indent().to_matchable(),
216            Delimited::new(vec![
217                Sequence::new(vec![
218                    one_of(vec![
219                        Ref::keyword("ALL").to_matchable(),
220                        Ref::new("ColumnReferenceSegment").to_matchable(),
221                        Ref::new("NumericLiteralSegment").to_matchable(),
222                        Ref::new("ExpressionSegment").to_matchable(),
223                    ])
224                    .to_matchable(),
225                    one_of(vec![
226                        Ref::keyword("ASC").to_matchable(),
227                        Ref::keyword("DESC").to_matchable(),
228                    ])
229                    .config(|config| {
230                        config.optional();
231                    })
232                    .to_matchable(),
233                    Sequence::new(vec![
234                        Ref::keyword("NULLS").to_matchable(),
235                        one_of(vec![
236                            Ref::keyword("FIRST").to_matchable(),
237                            Ref::keyword("LAST").to_matchable(),
238                        ])
239                        .to_matchable(),
240                    ])
241                    .config(|config| {
242                        config.optional();
243                    })
244                    .to_matchable(),
245                ])
246                .to_matchable(),
247            ])
248            .config(|config| {
249                config.allow_trailing = true;
250                config.terminators = vec![Ref::new("OrderByClauseTerminators").to_matchable()];
251            })
252            .to_matchable(),
253            MetaSegment::dedent().to_matchable(),
254        ])
255        .to_matchable(),
256    );
257
258    duckdb_dialect.replace_grammar(
259        "GroupByClauseSegment",
260        Sequence::new(vec![
261            Ref::keyword("GROUP").to_matchable(),
262            Ref::keyword("BY").to_matchable(),
263            MetaSegment::indent().to_matchable(),
264            Delimited::new(vec![
265                one_of(vec![
266                    Ref::keyword("ALL").to_matchable(),
267                    Ref::new("ColumnReferenceSegment").to_matchable(),
268                    Ref::new("NumericLiteralSegment").to_matchable(),
269                    Ref::new("ExpressionSegment").to_matchable(),
270                ])
271                .to_matchable(),
272            ])
273            .config(|config| {
274                config.allow_trailing = true;
275                config.terminators =
276                    vec![Ref::new("GroupByClauseTerminatorGrammar").to_matchable()];
277            })
278            .to_matchable(),
279            MetaSegment::dedent().to_matchable(),
280        ])
281        .to_matchable(),
282    );
283
284    duckdb_dialect.replace_grammar(
285        "ObjectLiteralElementSegment",
286        Sequence::new(vec![
287            one_of(vec![
288                Ref::new("NakedIdentifierSegment").to_matchable(),
289                Ref::new("QuotedLiteralSegment").to_matchable(),
290            ])
291            .to_matchable(),
292            Ref::new("ColonSegment").to_matchable(),
293            Ref::new("BaseExpressionElementGrammar").to_matchable(),
294        ])
295        .to_matchable(),
296    );
297
298    duckdb_dialect.replace_grammar(
299        "StatementSegment",
300        postgres::statement_segment().copy(
301            Some(vec![
302                Ref::new("LoadStatementSegment").to_matchable(),
303                Ref::new("SummarizeStatementSegment").to_matchable(),
304                Ref::new("DescribeStatementSegment").to_matchable(),
305                Ref::new("CreateMacroStatementSegment").to_matchable(),
306            ]),
307            None,
308            None,
309            None,
310            vec![],
311            false,
312        ),
313    );
314
315    duckdb_dialect
316}