Skip to main content

datafusion_sql/unparser/
dialect.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use std::{collections::HashMap, sync::Arc};
19
20use super::{
21    Unparser, utils::character_length_to_sql, utils::date_part_to_sql,
22    utils::sqlite_date_trunc_to_sql, utils::sqlite_from_unixtime_to_sql,
23};
24use arrow::array::timezone::Tz;
25use arrow::datatypes::TimeUnit;
26use chrono::DateTime;
27use datafusion_common::{Result, internal_err};
28use datafusion_expr::Expr;
29use regex::Regex;
30use sqlparser::tokenizer::Span;
31use sqlparser::{
32    ast::{
33        self, BinaryOperator, Function, Ident, ObjectName, TimezoneInfo, WindowFrameBound,
34    },
35    keywords::ALL_KEYWORDS,
36};
37
38pub type ScalarFnToSqlHandler =
39    Box<dyn Fn(&Unparser, &[Expr]) -> Result<Option<ast::Expr>> + Send + Sync>;
40
41/// `Dialect` to use for Unparsing
42///
43/// The default dialect tries to avoid quoting identifiers unless necessary (e.g. `a` instead of `"a"`)
44/// but this behavior can be overridden as needed
45///
46/// **Note**: This trait will eventually be replaced by the Dialect in the SQLparser package
47///
48/// See <https://github.com/sqlparser-rs/sqlparser-rs/pull/1170>
49/// See also the discussion in <https://github.com/apache/datafusion/pull/10625>
50pub trait Dialect: Send + Sync {
51    /// Return the character used to quote identifiers.
52    fn identifier_quote_style(&self, _identifier: &str) -> Option<char>;
53
54    /// Whether array literals should be rendered with the `ARRAY[...]` keyword.
55    fn use_array_keyword_for_array_literals(&self) -> bool {
56        false
57    }
58
59    /// Does the dialect support specifying `NULLS FIRST/LAST` in `ORDER BY` clauses?
60    fn supports_nulls_first_in_sort(&self) -> bool {
61        true
62    }
63
64    /// Does the dialect use TIMESTAMP to represent Date64 rather than DATETIME?
65    /// E.g. Trino, Athena and Dremio does not have DATETIME data type
66    fn use_timestamp_for_date64(&self) -> bool {
67        false
68    }
69
70    fn interval_style(&self) -> IntervalStyle {
71        IntervalStyle::PostgresVerbose
72    }
73
74    /// Does the dialect use DOUBLE PRECISION to represent Float64 rather than DOUBLE?
75    /// E.g. Postgres uses DOUBLE PRECISION instead of DOUBLE
76    fn float64_ast_dtype(&self) -> ast::DataType {
77        ast::DataType::Double(ast::ExactNumberInfo::None)
78    }
79
80    /// The SQL type to use for Arrow Utf8 unparsing
81    /// Most dialects use VARCHAR, but some, like MySQL, require CHAR
82    fn utf8_cast_dtype(&self) -> ast::DataType {
83        ast::DataType::Varchar(None)
84    }
85
86    /// The SQL type to use for Arrow LargeUtf8 unparsing
87    /// Most dialects use TEXT, but some, like MySQL, require CHAR
88    fn large_utf8_cast_dtype(&self) -> ast::DataType {
89        ast::DataType::Text
90    }
91
92    /// The date field extract style to use: `DateFieldExtractStyle`
93    fn date_field_extract_style(&self) -> DateFieldExtractStyle {
94        DateFieldExtractStyle::DatePart
95    }
96
97    /// The character length extraction style to use: `CharacterLengthStyle`
98    fn character_length_style(&self) -> CharacterLengthStyle {
99        CharacterLengthStyle::CharacterLength
100    }
101
102    /// The SQL type to use for Arrow Int64 unparsing
103    /// Most dialects use BigInt, but some, like MySQL, require SIGNED
104    fn int64_cast_dtype(&self) -> ast::DataType {
105        ast::DataType::BigInt(None)
106    }
107
108    /// The SQL type to use for Arrow Int8 unparsing
109    /// Most dialects use TinyInt, but PostgreSQL prefers SmallInt
110    fn int8_cast_dtype(&self) -> ast::DataType {
111        ast::DataType::TinyInt(None)
112    }
113
114    /// The SQL type to use for Arrow Int32 unparsing
115    /// Most dialects use Integer, but some, like MySQL, require SIGNED
116    fn int32_cast_dtype(&self) -> ast::DataType {
117        ast::DataType::Integer(None)
118    }
119
120    /// The SQL type to use for Timestamp unparsing
121    /// Most dialects use Timestamp, but some, like MySQL, require Datetime
122    /// Some dialects like Dremio does not support WithTimeZone and requires always Timestamp
123    fn timestamp_cast_dtype(
124        &self,
125        _time_unit: &TimeUnit,
126        tz: &Option<Arc<str>>,
127    ) -> ast::DataType {
128        let tz_info = match tz {
129            Some(_) => TimezoneInfo::WithTimeZone,
130            None => TimezoneInfo::None,
131        };
132
133        ast::DataType::Timestamp(None, tz_info)
134    }
135
136    /// The SQL type to use for Arrow Date32 unparsing
137    /// Most dialects use Date, but some, like SQLite require TEXT
138    fn date32_cast_dtype(&self) -> ast::DataType {
139        ast::DataType::Date
140    }
141
142    /// Does the dialect support specifying column aliases as part of alias table definition?
143    /// (SELECT col1, col2 from my_table) AS my_table_alias(col1_alias, col2_alias)
144    fn supports_column_alias_in_table_alias(&self) -> bool {
145        true
146    }
147
148    /// Whether the dialect requires a table alias for any subquery in the FROM clause
149    /// This affects behavior when deriving logical plans for Sort, Limit, etc.
150    fn requires_derived_table_alias(&self) -> bool {
151        false
152    }
153
154    /// The division operator for the dialect
155    /// Most dialect uses ` BinaryOperator::Divide` (/)
156    /// But DuckDB dialect uses `BinaryOperator::DuckIntegerDivide` (//)
157    fn division_operator(&self) -> BinaryOperator {
158        BinaryOperator::Divide
159    }
160
161    /// Allows the dialect to override scalar function unparsing if the dialect has specific rules.
162    /// Returns None if the default unparsing should be used, or Some(ast::Expr) if there is
163    /// a custom implementation for the function.
164    fn scalar_function_to_sql_overrides(
165        &self,
166        _unparser: &Unparser,
167        _func_name: &str,
168        _args: &[Expr],
169    ) -> Result<Option<ast::Expr>> {
170        Ok(None)
171    }
172
173    /// Allows the dialect to override higher order function unparsing if the dialect has specific rules.
174    /// Returns None if the default unparsing should be used, or Some(ast::Expr) if there is
175    /// a custom implementation for the function.
176    fn higher_order_function_to_sql_overrides(
177        &self,
178        _unparser: &Unparser,
179        _func_name: &str,
180        _args: &[Expr],
181    ) -> Result<Option<ast::Expr>> {
182        Ok(None)
183    }
184
185    /// Allows the dialect to choose to omit window frame in unparsing
186    /// based on function name and window frame bound
187    /// Returns false if specific function name / window frame bound indicates no window frame is needed in unparsing
188    fn window_func_support_window_frame(
189        &self,
190        _func_name: &str,
191        _start_bound: &WindowFrameBound,
192        _end_bound: &WindowFrameBound,
193    ) -> bool {
194        true
195    }
196
197    /// Extends the dialect's default rules for unparsing scalar functions.
198    /// This is useful for supporting application-specific UDFs or custom engine extensions.
199    fn with_custom_scalar_overrides(
200        self,
201        _handlers: Vec<(&str, ScalarFnToSqlHandler)>,
202    ) -> Self
203    where
204        Self: Sized,
205    {
206        unimplemented!("Custom scalar overrides are not supported by this dialect yet");
207    }
208
209    /// Allow to unparse a qualified column with a full qualified name
210    /// (e.g. catalog_name.schema_name.table_name.column_name)
211    /// Otherwise, the column will be unparsed with only the table name and column name
212    /// (e.g. table_name.column_name)
213    fn full_qualified_col(&self) -> bool {
214        false
215    }
216
217    /// Allow to unparse the unnest plan as [ast::TableFactor::UNNEST].
218    ///
219    /// Some dialects like BigQuery require UNNEST to be used in the FROM clause but
220    /// the LogicalPlan planner always puts UNNEST in the SELECT clause. This flag allows
221    /// to unparse the UNNEST plan as [ast::TableFactor::UNNEST] instead of a subquery.
222    fn unnest_as_table_factor(&self) -> bool {
223        false
224    }
225
226    /// Unparse the unnest plan as `LATERAL FLATTEN(INPUT => expr, ...)`.
227    ///
228    /// Snowflake uses FLATTEN as a table function instead of the SQL-standard UNNEST.
229    /// When this returns `true`, the unparser emits
230    /// `LATERAL FLATTEN(INPUT => <col>, OUTER => <bool>)` in the FROM clause.
231    fn unnest_as_lateral_flatten(&self) -> bool {
232        false
233    }
234
235    /// Allows the dialect to override column alias unparsing if the dialect has specific rules.
236    /// Returns None if the default unparsing should be used, or Some(String) if there is
237    /// a custom implementation for the alias.
238    fn col_alias_overrides(&self, _alias: &str) -> Result<Option<String>> {
239        Ok(None)
240    }
241
242    /// Allows the dialect to support the QUALIFY clause
243    ///
244    /// Some dialects, like Postgres, do not support the QUALIFY clause
245    fn supports_qualify(&self) -> bool {
246        true
247    }
248
249    /// Allows the dialect to override logic of formatting datetime with tz into string.
250    fn timestamp_with_tz_to_string(&self, dt: DateTime<Tz>, _unit: TimeUnit) -> String {
251        dt.to_rfc3339()
252    }
253
254    /// Whether the dialect supports an empty select list such as `SELECT FROM table`.
255    ///
256    /// An empty select list returns rows without any column data, which is useful for:
257    /// - Counting rows: `SELECT FROM users WHERE active = true` (combined with `COUNT(*)`)
258    /// - Testing row existence without retrieving column data
259    /// - Performance optimization when only row counts or existence checks are needed
260    ///
261    /// # Default
262    ///
263    /// Returns `false` for maximum compatibility across SQL dialects. When `false`,
264    /// the unparser falls back to `SELECT 1 FROM table`.
265    ///
266    /// # Implementation Note
267    ///
268    /// Specific dialects should override this method to return `true` if they support
269    /// the empty select list syntax (e.g., PostgreSQL).
270    ///
271    /// # Example SQL Output
272    ///
273    /// ```sql
274    /// -- When supported:
275    /// SELECT FROM users WHERE active = true;
276    ///
277    /// -- Fallback when unsupported:
278    /// SELECT 1 FROM users WHERE active = true;
279    /// ```
280    fn supports_empty_select_list(&self) -> bool {
281        false
282    }
283
284    /// Override the default string literal unparsing.
285    ///
286    /// Returns `Some(ast::Expr)` to replace the default single-quoted string,
287    /// or `None` to use the default behavior.
288    ///
289    /// For example, MSSQL requires non-ASCII strings to use national string
290    /// literal syntax (`N'datafusion資料融合'`).
291    fn string_literal_to_sql(&self, _s: &str) -> Option<ast::Expr> {
292        None
293    }
294}
295
296/// `IntervalStyle` to use for unparsing
297///
298/// <https://www.postgresql.org/docs/current/datatype-datetime.html#DATATYPE-INTERVAL-INPUT>
299/// different DBMS follows different standards, popular ones are:
300/// postgres_verbose: '2 years 15 months 100 weeks 99 hours 123456789 milliseconds' which is
301/// compatible with arrow display format, as well as duckdb
302/// sql standard format is '1-2' for year-month, or '1 10:10:10.123456' for day-time
303/// <https://www.contrib.andrew.cmu.edu/~shadow/sql/sql1992.txt>
304#[derive(Clone, Copy)]
305pub enum IntervalStyle {
306    PostgresVerbose,
307    SQLStandard,
308    MySQL,
309}
310
311/// Datetime subfield extraction style for unparsing
312///
313/// `<https://www.postgresql.org/docs/current/functions-datetime.html#FUNCTIONS-DATETIME-EXTRACT>`
314/// Different DBMSs follow different standards; popular ones are:
315/// date_part('YEAR', date '2001-02-16')
316/// EXTRACT(YEAR from date '2001-02-16')
317/// Some DBMSs, like Postgres, support both, whereas others like MySQL require EXTRACT.
318#[derive(Clone, Copy, PartialEq)]
319pub enum DateFieldExtractStyle {
320    DatePart,
321    Extract,
322    Strftime,
323}
324
325/// `CharacterLengthStyle` to use for unparsing
326///
327/// Different DBMSs uses different names for function calculating the number of characters in the string
328/// `Length` style uses length(x)
329/// `SQLStandard` style uses character_length(x)
330#[derive(Clone, Copy, PartialEq)]
331pub enum CharacterLengthStyle {
332    Length,
333    CharacterLength,
334}
335
336pub struct DefaultDialect {}
337
338impl Dialect for DefaultDialect {
339    fn identifier_quote_style(&self, identifier: &str) -> Option<char> {
340        let identifier_regex = Regex::new(r"^[a-zA-Z_][a-zA-Z0-9_]*$").unwrap();
341        let id_upper = identifier.to_uppercase();
342        // Special case ignore "ID", see https://github.com/sqlparser-rs/sqlparser-rs/issues/1382
343        // ID is a keyword in ClickHouse, but we don't want to quote it when unparsing SQL here
344        // Also quote identifiers with uppercase letters since unquoted identifiers are
345        // normalized to lowercase by the SQL parser, which would break case-sensitive schemas
346        let needs_quote = (id_upper != "ID" && ALL_KEYWORDS.contains(&id_upper.as_str()))
347            || !identifier_regex.is_match(identifier)
348            || identifier.chars().any(|c| c.is_ascii_uppercase());
349        if needs_quote { Some('"') } else { None }
350    }
351}
352
353pub struct PostgreSqlDialect {}
354
355impl Dialect for PostgreSqlDialect {
356    fn use_array_keyword_for_array_literals(&self) -> bool {
357        true
358    }
359
360    fn supports_qualify(&self) -> bool {
361        false
362    }
363
364    fn requires_derived_table_alias(&self) -> bool {
365        true
366    }
367
368    fn supports_empty_select_list(&self) -> bool {
369        true
370    }
371
372    fn identifier_quote_style(&self, _: &str) -> Option<char> {
373        Some('"')
374    }
375
376    fn interval_style(&self) -> IntervalStyle {
377        IntervalStyle::PostgresVerbose
378    }
379
380    fn float64_ast_dtype(&self) -> ast::DataType {
381        ast::DataType::DoublePrecision
382    }
383
384    fn int8_cast_dtype(&self) -> ast::DataType {
385        ast::DataType::SmallInt(None)
386    }
387
388    fn scalar_function_to_sql_overrides(
389        &self,
390        unparser: &Unparser,
391        func_name: &str,
392        args: &[Expr],
393    ) -> Result<Option<ast::Expr>> {
394        if func_name == "array_has" {
395            return self.array_has_to_sql_any(unparser, args);
396        }
397
398        if func_name == "round" {
399            return Ok(Some(
400                self.round_to_sql_enforce_numeric(unparser, func_name, args)?,
401            ));
402        }
403
404        Ok(None)
405    }
406}
407
408impl PostgreSqlDialect {
409    fn array_has_to_sql_any(
410        &self,
411        unparser: &Unparser,
412        args: &[Expr],
413    ) -> Result<Option<ast::Expr>> {
414        let [haystack, needle] = args else {
415            return internal_err!("array_has expected 2 arguments, got {}", args.len());
416        };
417
418        Ok(Some(ast::Expr::AnyOp {
419            left: Box::new(unparser.expr_to_sql(needle)?),
420            compare_op: BinaryOperator::Eq,
421            right: Box::new(unparser.expr_to_sql(haystack)?),
422            is_some: false,
423        }))
424    }
425
426    fn round_to_sql_enforce_numeric(
427        &self,
428        unparser: &Unparser,
429        func_name: &str,
430        args: &[Expr],
431    ) -> Result<ast::Expr> {
432        let mut args = unparser.function_args_to_sql(args)?;
433
434        // Enforce the first argument to be Numeric
435        if let Some(ast::FunctionArg::Unnamed(ast::FunctionArgExpr::Expr(expr))) =
436            args.first_mut()
437        {
438            if let ast::Expr::Cast { data_type, .. } = expr {
439                // Don't create an additional cast wrapper if we can update the existing one
440                *data_type = ast::DataType::Numeric(ast::ExactNumberInfo::None);
441            } else {
442                // Wrap the expression in a new cast
443                *expr = ast::Expr::Cast {
444                    kind: ast::CastKind::Cast,
445                    expr: Box::new(expr.clone()),
446                    data_type: ast::DataType::Numeric(ast::ExactNumberInfo::None),
447                    array: false,
448                    format: None,
449                };
450            }
451        }
452
453        Ok(ast::Expr::Function(Function {
454            name: ObjectName::from(vec![Ident {
455                value: func_name.to_string(),
456                quote_style: None,
457                span: Span::empty(),
458            }]),
459            args: ast::FunctionArguments::List(ast::FunctionArgumentList {
460                duplicate_treatment: None,
461                args,
462                clauses: vec![],
463            }),
464            filter: None,
465            null_treatment: None,
466            over: None,
467            within_group: vec![],
468            parameters: ast::FunctionArguments::None,
469            uses_odbc_syntax: false,
470        }))
471    }
472}
473
474#[derive(Default)]
475pub struct DuckDBDialect {
476    custom_scalar_fn_overrides: HashMap<String, ScalarFnToSqlHandler>,
477}
478
479impl DuckDBDialect {
480    #[must_use]
481    pub fn new() -> Self {
482        Self {
483            custom_scalar_fn_overrides: HashMap::new(),
484        }
485    }
486}
487
488impl Dialect for DuckDBDialect {
489    fn identifier_quote_style(&self, _: &str) -> Option<char> {
490        Some('"')
491    }
492
493    fn character_length_style(&self) -> CharacterLengthStyle {
494        CharacterLengthStyle::Length
495    }
496
497    fn division_operator(&self) -> BinaryOperator {
498        BinaryOperator::DuckIntegerDivide
499    }
500
501    fn with_custom_scalar_overrides(
502        mut self,
503        handlers: Vec<(&str, ScalarFnToSqlHandler)>,
504    ) -> Self {
505        for (func_name, handler) in handlers {
506            self.custom_scalar_fn_overrides
507                .insert(func_name.to_string(), handler);
508        }
509        self
510    }
511
512    fn scalar_function_to_sql_overrides(
513        &self,
514        unparser: &Unparser,
515        func_name: &str,
516        args: &[Expr],
517    ) -> Result<Option<ast::Expr>> {
518        if let Some(handler) = self.custom_scalar_fn_overrides.get(func_name) {
519            return handler(unparser, args);
520        }
521
522        if func_name == "character_length" {
523            return character_length_to_sql(
524                unparser,
525                self.character_length_style(),
526                args,
527            );
528        }
529
530        Ok(None)
531    }
532}
533
534pub struct MySqlDialect {}
535
536impl Dialect for MySqlDialect {
537    fn supports_qualify(&self) -> bool {
538        false
539    }
540
541    fn identifier_quote_style(&self, _: &str) -> Option<char> {
542        Some('`')
543    }
544
545    fn supports_nulls_first_in_sort(&self) -> bool {
546        false
547    }
548
549    fn interval_style(&self) -> IntervalStyle {
550        IntervalStyle::MySQL
551    }
552
553    fn utf8_cast_dtype(&self) -> ast::DataType {
554        ast::DataType::Char(None)
555    }
556
557    fn large_utf8_cast_dtype(&self) -> ast::DataType {
558        ast::DataType::Char(None)
559    }
560
561    fn date_field_extract_style(&self) -> DateFieldExtractStyle {
562        DateFieldExtractStyle::Extract
563    }
564
565    fn int64_cast_dtype(&self) -> ast::DataType {
566        ast::DataType::Custom(ObjectName::from(vec![Ident::new("SIGNED")]), vec![])
567    }
568
569    fn int32_cast_dtype(&self) -> ast::DataType {
570        ast::DataType::Custom(ObjectName::from(vec![Ident::new("SIGNED")]), vec![])
571    }
572
573    fn timestamp_cast_dtype(
574        &self,
575        _time_unit: &TimeUnit,
576        _tz: &Option<Arc<str>>,
577    ) -> ast::DataType {
578        ast::DataType::Datetime(None)
579    }
580
581    fn requires_derived_table_alias(&self) -> bool {
582        true
583    }
584
585    fn scalar_function_to_sql_overrides(
586        &self,
587        unparser: &Unparser,
588        func_name: &str,
589        args: &[Expr],
590    ) -> Result<Option<ast::Expr>> {
591        if func_name == "date_part" {
592            return date_part_to_sql(unparser, self.date_field_extract_style(), args);
593        }
594
595        Ok(None)
596    }
597}
598
599pub struct SqliteDialect {}
600
601impl Dialect for SqliteDialect {
602    fn supports_qualify(&self) -> bool {
603        false
604    }
605
606    fn identifier_quote_style(&self, _: &str) -> Option<char> {
607        Some('`')
608    }
609
610    fn date_field_extract_style(&self) -> DateFieldExtractStyle {
611        DateFieldExtractStyle::Strftime
612    }
613
614    fn date32_cast_dtype(&self) -> ast::DataType {
615        ast::DataType::Text
616    }
617
618    fn character_length_style(&self) -> CharacterLengthStyle {
619        CharacterLengthStyle::Length
620    }
621
622    fn supports_column_alias_in_table_alias(&self) -> bool {
623        false
624    }
625
626    fn timestamp_cast_dtype(
627        &self,
628        _time_unit: &TimeUnit,
629        _tz: &Option<Arc<str>>,
630    ) -> ast::DataType {
631        ast::DataType::Text
632    }
633
634    fn scalar_function_to_sql_overrides(
635        &self,
636        unparser: &Unparser,
637        func_name: &str,
638        args: &[Expr],
639    ) -> Result<Option<ast::Expr>> {
640        match func_name {
641            "date_part" => {
642                date_part_to_sql(unparser, self.date_field_extract_style(), args)
643            }
644            "character_length" => {
645                character_length_to_sql(unparser, self.character_length_style(), args)
646            }
647            "from_unixtime" => sqlite_from_unixtime_to_sql(unparser, args),
648            "date_trunc" => sqlite_date_trunc_to_sql(unparser, args),
649            _ => Ok(None),
650        }
651    }
652}
653
654#[derive(Default)]
655pub struct BigQueryDialect {}
656
657impl Dialect for BigQueryDialect {
658    fn identifier_quote_style(&self, _: &str) -> Option<char> {
659        Some('`')
660    }
661
662    fn col_alias_overrides(&self, alias: &str) -> Result<Option<String>> {
663        // Check if alias contains any special characters not supported by BigQuery col names
664        // https://cloud.google.com/bigquery/docs/schemas#flexible-column-names
665        let special_chars: [char; 20] = [
666            '!', '"', '$', '(', ')', '*', ',', '.', '/', ';', '?', '@', '[', '\\', ']',
667            '^', '`', '{', '}', '~',
668        ];
669
670        if alias.chars().any(|c| special_chars.contains(&c)) {
671            let mut encoded_name = String::new();
672            for c in alias.chars() {
673                if special_chars.contains(&c) {
674                    encoded_name.push_str(&format!("_{}", c as u32));
675                } else {
676                    encoded_name.push(c);
677                }
678            }
679            Ok(Some(encoded_name))
680        } else {
681            Ok(Some(alias.to_string()))
682        }
683    }
684
685    fn unnest_as_table_factor(&self) -> bool {
686        true
687    }
688
689    fn supports_column_alias_in_table_alias(&self) -> bool {
690        false
691    }
692
693    fn float64_ast_dtype(&self) -> ast::DataType {
694        ast::DataType::Float64
695    }
696
697    fn utf8_cast_dtype(&self) -> ast::DataType {
698        ast::DataType::String(None)
699    }
700
701    fn large_utf8_cast_dtype(&self) -> ast::DataType {
702        ast::DataType::String(None)
703    }
704
705    fn timestamp_cast_dtype(
706        &self,
707        _time_unit: &TimeUnit,
708        _tz: &Option<Arc<str>>,
709    ) -> ast::DataType {
710        ast::DataType::Timestamp(None, TimezoneInfo::None)
711    }
712
713    fn date_field_extract_style(&self) -> DateFieldExtractStyle {
714        DateFieldExtractStyle::Extract
715    }
716
717    fn interval_style(&self) -> IntervalStyle {
718        IntervalStyle::SQLStandard
719    }
720
721    fn scalar_function_to_sql_overrides(
722        &self,
723        unparser: &Unparser,
724        func_name: &str,
725        args: &[Expr],
726    ) -> Result<Option<ast::Expr>> {
727        if func_name == "date_part" {
728            return date_part_to_sql(unparser, self.date_field_extract_style(), args);
729        }
730
731        Ok(None)
732    }
733}
734
735impl BigQueryDialect {
736    #[must_use]
737    pub fn new() -> Self {
738        Self {}
739    }
740}
741
742/// Dialect for Snowflake SQL.
743///
744/// Key differences from the default dialect:
745/// - Uses double-quote identifier quoting
746/// - Supports `NULLS FIRST`/`NULLS LAST` in `ORDER BY`
747/// - Does not support empty select lists (`SELECT FROM t`)
748/// - Does not support column aliases in table alias definitions
749///   (Snowflake accepts the syntax but silently ignores the renames in join contexts)
750/// - Unparses `UNNEST` plans as `LATERAL FLATTEN(INPUT => expr, ...)`
751pub struct SnowflakeDialect {}
752
753#[expect(clippy::new_without_default)]
754impl SnowflakeDialect {
755    #[must_use]
756    pub fn new() -> Self {
757        Self {}
758    }
759}
760
761impl Dialect for SnowflakeDialect {
762    fn identifier_quote_style(&self, _: &str) -> Option<char> {
763        Some('"')
764    }
765
766    fn supports_nulls_first_in_sort(&self) -> bool {
767        true
768    }
769
770    fn supports_empty_select_list(&self) -> bool {
771        false
772    }
773
774    fn supports_column_alias_in_table_alias(&self) -> bool {
775        false
776    }
777
778    fn timestamp_cast_dtype(
779        &self,
780        _time_unit: &TimeUnit,
781        tz: &Option<Arc<str>>,
782    ) -> ast::DataType {
783        if tz.is_some() {
784            ast::DataType::Timestamp(None, TimezoneInfo::WithTimeZone)
785        } else {
786            ast::DataType::Timestamp(None, TimezoneInfo::None)
787        }
788    }
789
790    fn unnest_as_lateral_flatten(&self) -> bool {
791        true
792    }
793}
794
795pub struct CustomDialect {
796    identifier_quote_style: Option<char>,
797    supports_nulls_first_in_sort: bool,
798    use_timestamp_for_date64: bool,
799    interval_style: IntervalStyle,
800    float64_ast_dtype: ast::DataType,
801    utf8_cast_dtype: ast::DataType,
802    large_utf8_cast_dtype: ast::DataType,
803    date_field_extract_style: DateFieldExtractStyle,
804    character_length_style: CharacterLengthStyle,
805    int8_cast_dtype: ast::DataType,
806    int64_cast_dtype: ast::DataType,
807    int32_cast_dtype: ast::DataType,
808    timestamp_cast_dtype: ast::DataType,
809    timestamp_tz_cast_dtype: ast::DataType,
810    date32_cast_dtype: ast::DataType,
811    supports_column_alias_in_table_alias: bool,
812    requires_derived_table_alias: bool,
813    division_operator: BinaryOperator,
814    window_func_support_window_frame: bool,
815    full_qualified_col: bool,
816    unnest_as_table_factor: bool,
817    unnest_as_lateral_flatten: bool,
818}
819
820impl Default for CustomDialect {
821    fn default() -> Self {
822        Self {
823            identifier_quote_style: None,
824            supports_nulls_first_in_sort: true,
825            use_timestamp_for_date64: false,
826            interval_style: IntervalStyle::SQLStandard,
827            float64_ast_dtype: ast::DataType::Double(ast::ExactNumberInfo::None),
828            utf8_cast_dtype: ast::DataType::Varchar(None),
829            large_utf8_cast_dtype: ast::DataType::Text,
830            date_field_extract_style: DateFieldExtractStyle::DatePart,
831            character_length_style: CharacterLengthStyle::CharacterLength,
832            int8_cast_dtype: ast::DataType::TinyInt(None),
833            int64_cast_dtype: ast::DataType::BigInt(None),
834            int32_cast_dtype: ast::DataType::Integer(None),
835            timestamp_cast_dtype: ast::DataType::Timestamp(None, TimezoneInfo::None),
836            timestamp_tz_cast_dtype: ast::DataType::Timestamp(
837                None,
838                TimezoneInfo::WithTimeZone,
839            ),
840            date32_cast_dtype: ast::DataType::Date,
841            supports_column_alias_in_table_alias: true,
842            requires_derived_table_alias: false,
843            division_operator: BinaryOperator::Divide,
844            window_func_support_window_frame: true,
845            full_qualified_col: false,
846            unnest_as_table_factor: false,
847            unnest_as_lateral_flatten: false,
848        }
849    }
850}
851
852impl Dialect for CustomDialect {
853    fn identifier_quote_style(&self, _: &str) -> Option<char> {
854        self.identifier_quote_style
855    }
856
857    fn supports_nulls_first_in_sort(&self) -> bool {
858        self.supports_nulls_first_in_sort
859    }
860
861    fn use_timestamp_for_date64(&self) -> bool {
862        self.use_timestamp_for_date64
863    }
864
865    fn interval_style(&self) -> IntervalStyle {
866        self.interval_style
867    }
868
869    fn float64_ast_dtype(&self) -> ast::DataType {
870        self.float64_ast_dtype.clone()
871    }
872
873    fn utf8_cast_dtype(&self) -> ast::DataType {
874        self.utf8_cast_dtype.clone()
875    }
876
877    fn large_utf8_cast_dtype(&self) -> ast::DataType {
878        self.large_utf8_cast_dtype.clone()
879    }
880
881    fn date_field_extract_style(&self) -> DateFieldExtractStyle {
882        self.date_field_extract_style
883    }
884
885    fn character_length_style(&self) -> CharacterLengthStyle {
886        self.character_length_style
887    }
888
889    fn int64_cast_dtype(&self) -> ast::DataType {
890        self.int64_cast_dtype.clone()
891    }
892
893    fn int8_cast_dtype(&self) -> ast::DataType {
894        self.int8_cast_dtype.clone()
895    }
896
897    fn int32_cast_dtype(&self) -> ast::DataType {
898        self.int32_cast_dtype.clone()
899    }
900
901    fn timestamp_cast_dtype(
902        &self,
903        _time_unit: &TimeUnit,
904        tz: &Option<Arc<str>>,
905    ) -> ast::DataType {
906        if tz.is_some() {
907            self.timestamp_tz_cast_dtype.clone()
908        } else {
909            self.timestamp_cast_dtype.clone()
910        }
911    }
912
913    fn date32_cast_dtype(&self) -> ast::DataType {
914        self.date32_cast_dtype.clone()
915    }
916
917    fn supports_column_alias_in_table_alias(&self) -> bool {
918        self.supports_column_alias_in_table_alias
919    }
920
921    fn scalar_function_to_sql_overrides(
922        &self,
923        unparser: &Unparser,
924        func_name: &str,
925        args: &[Expr],
926    ) -> Result<Option<ast::Expr>> {
927        match func_name {
928            "date_part" => {
929                date_part_to_sql(unparser, self.date_field_extract_style(), args)
930            }
931            "character_length" => {
932                character_length_to_sql(unparser, self.character_length_style(), args)
933            }
934            _ => Ok(None),
935        }
936    }
937
938    fn requires_derived_table_alias(&self) -> bool {
939        self.requires_derived_table_alias
940    }
941
942    fn division_operator(&self) -> BinaryOperator {
943        self.division_operator.clone()
944    }
945
946    fn window_func_support_window_frame(
947        &self,
948        _func_name: &str,
949        _start_bound: &WindowFrameBound,
950        _end_bound: &WindowFrameBound,
951    ) -> bool {
952        self.window_func_support_window_frame
953    }
954
955    fn full_qualified_col(&self) -> bool {
956        self.full_qualified_col
957    }
958
959    fn unnest_as_table_factor(&self) -> bool {
960        self.unnest_as_table_factor
961    }
962
963    fn unnest_as_lateral_flatten(&self) -> bool {
964        self.unnest_as_lateral_flatten
965    }
966}
967
968/// `CustomDialectBuilder` to build `CustomDialect` using builder pattern
969///
970///
971/// # Examples
972///
973/// Building a custom dialect with all default options set in CustomDialectBuilder::new()
974/// but with `use_timestamp_for_date64` overridden to `true`
975///
976/// ```
977/// use datafusion_sql::unparser::dialect::CustomDialectBuilder;
978/// let dialect = CustomDialectBuilder::new()
979///     .with_use_timestamp_for_date64(true)
980///     .build();
981/// ```
982pub struct CustomDialectBuilder {
983    identifier_quote_style: Option<char>,
984    supports_nulls_first_in_sort: bool,
985    use_timestamp_for_date64: bool,
986    interval_style: IntervalStyle,
987    float64_ast_dtype: ast::DataType,
988    utf8_cast_dtype: ast::DataType,
989    large_utf8_cast_dtype: ast::DataType,
990    date_field_extract_style: DateFieldExtractStyle,
991    character_length_style: CharacterLengthStyle,
992    int8_cast_dtype: ast::DataType,
993    int64_cast_dtype: ast::DataType,
994    int32_cast_dtype: ast::DataType,
995    timestamp_cast_dtype: ast::DataType,
996    timestamp_tz_cast_dtype: ast::DataType,
997    date32_cast_dtype: ast::DataType,
998    supports_column_alias_in_table_alias: bool,
999    requires_derived_table_alias: bool,
1000    division_operator: BinaryOperator,
1001    window_func_support_window_frame: bool,
1002    full_qualified_col: bool,
1003    unnest_as_table_factor: bool,
1004    unnest_as_lateral_flatten: bool,
1005}
1006
1007impl Default for CustomDialectBuilder {
1008    fn default() -> Self {
1009        Self::new()
1010    }
1011}
1012
1013impl CustomDialectBuilder {
1014    pub fn new() -> Self {
1015        Self {
1016            identifier_quote_style: None,
1017            supports_nulls_first_in_sort: true,
1018            use_timestamp_for_date64: false,
1019            interval_style: IntervalStyle::PostgresVerbose,
1020            float64_ast_dtype: ast::DataType::Double(ast::ExactNumberInfo::None),
1021            utf8_cast_dtype: ast::DataType::Varchar(None),
1022            large_utf8_cast_dtype: ast::DataType::Text,
1023            date_field_extract_style: DateFieldExtractStyle::DatePart,
1024            character_length_style: CharacterLengthStyle::CharacterLength,
1025            int8_cast_dtype: ast::DataType::TinyInt(None),
1026            int64_cast_dtype: ast::DataType::BigInt(None),
1027            int32_cast_dtype: ast::DataType::Integer(None),
1028            timestamp_cast_dtype: ast::DataType::Timestamp(None, TimezoneInfo::None),
1029            timestamp_tz_cast_dtype: ast::DataType::Timestamp(
1030                None,
1031                TimezoneInfo::WithTimeZone,
1032            ),
1033            date32_cast_dtype: ast::DataType::Date,
1034            supports_column_alias_in_table_alias: true,
1035            requires_derived_table_alias: false,
1036            division_operator: BinaryOperator::Divide,
1037            window_func_support_window_frame: true,
1038            full_qualified_col: false,
1039            unnest_as_table_factor: false,
1040            unnest_as_lateral_flatten: false,
1041        }
1042    }
1043
1044    pub fn build(self) -> CustomDialect {
1045        CustomDialect {
1046            identifier_quote_style: self.identifier_quote_style,
1047            supports_nulls_first_in_sort: self.supports_nulls_first_in_sort,
1048            use_timestamp_for_date64: self.use_timestamp_for_date64,
1049            interval_style: self.interval_style,
1050            float64_ast_dtype: self.float64_ast_dtype,
1051            utf8_cast_dtype: self.utf8_cast_dtype,
1052            large_utf8_cast_dtype: self.large_utf8_cast_dtype,
1053            date_field_extract_style: self.date_field_extract_style,
1054            character_length_style: self.character_length_style,
1055            int8_cast_dtype: self.int8_cast_dtype,
1056            int64_cast_dtype: self.int64_cast_dtype,
1057            int32_cast_dtype: self.int32_cast_dtype,
1058            timestamp_cast_dtype: self.timestamp_cast_dtype,
1059            timestamp_tz_cast_dtype: self.timestamp_tz_cast_dtype,
1060            date32_cast_dtype: self.date32_cast_dtype,
1061            supports_column_alias_in_table_alias: self
1062                .supports_column_alias_in_table_alias,
1063            requires_derived_table_alias: self.requires_derived_table_alias,
1064            division_operator: self.division_operator,
1065            window_func_support_window_frame: self.window_func_support_window_frame,
1066            full_qualified_col: self.full_qualified_col,
1067            unnest_as_table_factor: self.unnest_as_table_factor,
1068            unnest_as_lateral_flatten: self.unnest_as_lateral_flatten,
1069        }
1070    }
1071
1072    /// Customize the dialect with a specific identifier quote style, e.g. '`', '"'
1073    pub fn with_identifier_quote_style(mut self, identifier_quote_style: char) -> Self {
1074        self.identifier_quote_style = Some(identifier_quote_style);
1075        self
1076    }
1077
1078    /// Customize the dialect to support `NULLS FIRST` in `ORDER BY` clauses
1079    pub fn with_supports_nulls_first_in_sort(
1080        mut self,
1081        supports_nulls_first_in_sort: bool,
1082    ) -> Self {
1083        self.supports_nulls_first_in_sort = supports_nulls_first_in_sort;
1084        self
1085    }
1086
1087    /// Customize the dialect to uses TIMESTAMP when casting Date64 rather than DATETIME
1088    pub fn with_use_timestamp_for_date64(
1089        mut self,
1090        use_timestamp_for_date64: bool,
1091    ) -> Self {
1092        self.use_timestamp_for_date64 = use_timestamp_for_date64;
1093        self
1094    }
1095
1096    /// Customize the dialect with a specific interval style listed in `IntervalStyle`
1097    pub fn with_interval_style(mut self, interval_style: IntervalStyle) -> Self {
1098        self.interval_style = interval_style;
1099        self
1100    }
1101
1102    /// Customize the dialect with a specific character_length_style listed in `CharacterLengthStyle`
1103    pub fn with_character_length_style(
1104        mut self,
1105        character_length_style: CharacterLengthStyle,
1106    ) -> Self {
1107        self.character_length_style = character_length_style;
1108        self
1109    }
1110
1111    /// Customize the dialect with a specific SQL type for Int8 casting: TinyInt, SmallInt, etc.
1112    pub fn with_int8_cast_dtype(mut self, int8_cast_dtype: ast::DataType) -> Self {
1113        self.int8_cast_dtype = int8_cast_dtype;
1114        self
1115    }
1116
1117    /// Customize the dialect with a specific SQL type for Float64 casting: DOUBLE, DOUBLE PRECISION, etc.
1118    pub fn with_float64_ast_dtype(mut self, float64_ast_dtype: ast::DataType) -> Self {
1119        self.float64_ast_dtype = float64_ast_dtype;
1120        self
1121    }
1122
1123    /// Customize the dialect with a specific SQL type for Utf8 casting: VARCHAR, CHAR, etc.
1124    pub fn with_utf8_cast_dtype(mut self, utf8_cast_dtype: ast::DataType) -> Self {
1125        self.utf8_cast_dtype = utf8_cast_dtype;
1126        self
1127    }
1128
1129    /// Customize the dialect with a specific SQL type for LargeUtf8 casting: TEXT, CHAR, etc.
1130    pub fn with_large_utf8_cast_dtype(
1131        mut self,
1132        large_utf8_cast_dtype: ast::DataType,
1133    ) -> Self {
1134        self.large_utf8_cast_dtype = large_utf8_cast_dtype;
1135        self
1136    }
1137
1138    /// Customize the dialect with a specific date field extract style listed in `DateFieldExtractStyle`
1139    pub fn with_date_field_extract_style(
1140        mut self,
1141        date_field_extract_style: DateFieldExtractStyle,
1142    ) -> Self {
1143        self.date_field_extract_style = date_field_extract_style;
1144        self
1145    }
1146
1147    /// Customize the dialect with a specific SQL type for Int64 casting: BigInt, SIGNED, etc.
1148    pub fn with_int64_cast_dtype(mut self, int64_cast_dtype: ast::DataType) -> Self {
1149        self.int64_cast_dtype = int64_cast_dtype;
1150        self
1151    }
1152
1153    /// Customize the dialect with a specific SQL type for Int32 casting: Integer, SIGNED, etc.
1154    pub fn with_int32_cast_dtype(mut self, int32_cast_dtype: ast::DataType) -> Self {
1155        self.int32_cast_dtype = int32_cast_dtype;
1156        self
1157    }
1158
1159    /// Customize the dialect with a specific SQL type for Timestamp casting: Timestamp, Datetime, etc.
1160    pub fn with_timestamp_cast_dtype(
1161        mut self,
1162        timestamp_cast_dtype: ast::DataType,
1163        timestamp_tz_cast_dtype: ast::DataType,
1164    ) -> Self {
1165        self.timestamp_cast_dtype = timestamp_cast_dtype;
1166        self.timestamp_tz_cast_dtype = timestamp_tz_cast_dtype;
1167        self
1168    }
1169
1170    pub fn with_date32_cast_dtype(mut self, date32_cast_dtype: ast::DataType) -> Self {
1171        self.date32_cast_dtype = date32_cast_dtype;
1172        self
1173    }
1174
1175    /// Customize the dialect to support column aliases as part of alias table definition
1176    pub fn with_supports_column_alias_in_table_alias(
1177        mut self,
1178        supports_column_alias_in_table_alias: bool,
1179    ) -> Self {
1180        self.supports_column_alias_in_table_alias = supports_column_alias_in_table_alias;
1181        self
1182    }
1183
1184    pub fn with_requires_derived_table_alias(
1185        mut self,
1186        requires_derived_table_alias: bool,
1187    ) -> Self {
1188        self.requires_derived_table_alias = requires_derived_table_alias;
1189        self
1190    }
1191
1192    pub fn with_division_operator(mut self, division_operator: BinaryOperator) -> Self {
1193        self.division_operator = division_operator;
1194        self
1195    }
1196
1197    pub fn with_window_func_support_window_frame(
1198        mut self,
1199        window_func_support_window_frame: bool,
1200    ) -> Self {
1201        self.window_func_support_window_frame = window_func_support_window_frame;
1202        self
1203    }
1204
1205    /// Customize the dialect to allow full qualified column names
1206    pub fn with_full_qualified_col(mut self, full_qualified_col: bool) -> Self {
1207        self.full_qualified_col = full_qualified_col;
1208        self
1209    }
1210
1211    pub fn with_unnest_as_table_factor(mut self, unnest_as_table_factor: bool) -> Self {
1212        self.unnest_as_table_factor = unnest_as_table_factor;
1213        self
1214    }
1215
1216    pub fn with_unnest_as_lateral_flatten(
1217        mut self,
1218        unnest_as_lateral_flatten: bool,
1219    ) -> Self {
1220        self.unnest_as_lateral_flatten = unnest_as_lateral_flatten;
1221        self
1222    }
1223}