datafusion_sql/unparser/
dialect.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use std::{collections::HashMap, sync::Arc};
19
20use super::{
21    utils::character_length_to_sql, utils::date_part_to_sql,
22    utils::sqlite_date_trunc_to_sql, utils::sqlite_from_unixtime_to_sql, Unparser,
23};
24use arrow::array::timezone::Tz;
25use arrow::datatypes::TimeUnit;
26use chrono::DateTime;
27use datafusion_common::Result;
28use datafusion_expr::Expr;
29use regex::Regex;
30use sqlparser::tokenizer::Span;
31use sqlparser::{
32    ast::{
33        self, BinaryOperator, Function, Ident, ObjectName, TimezoneInfo, WindowFrameBound,
34    },
35    keywords::ALL_KEYWORDS,
36};
37
38pub type ScalarFnToSqlHandler =
39    Box<dyn Fn(&Unparser, &[Expr]) -> Result<Option<ast::Expr>> + Send + Sync>;
40
41/// `Dialect` to use for Unparsing
42///
43/// The default dialect tries to avoid quoting identifiers unless necessary (e.g. `a` instead of `"a"`)
44/// but this behavior can be overridden as needed
45///
46/// **Note**: This trait will eventually be replaced by the Dialect in the SQLparser package
47///
48/// See <https://github.com/sqlparser-rs/sqlparser-rs/pull/1170>
49/// See also the discussion in <https://github.com/apache/datafusion/pull/10625>
50pub trait Dialect: Send + Sync {
51    /// Return the character used to quote identifiers.
52    fn identifier_quote_style(&self, _identifier: &str) -> Option<char>;
53
54    /// Does the dialect support specifying `NULLS FIRST/LAST` in `ORDER BY` clauses?
55    fn supports_nulls_first_in_sort(&self) -> bool {
56        true
57    }
58
59    /// Does the dialect use TIMESTAMP to represent Date64 rather than DATETIME?
60    /// E.g. Trino, Athena and Dremio does not have DATETIME data type
61    fn use_timestamp_for_date64(&self) -> bool {
62        false
63    }
64
65    fn interval_style(&self) -> IntervalStyle {
66        IntervalStyle::PostgresVerbose
67    }
68
69    /// Does the dialect use DOUBLE PRECISION to represent Float64 rather than DOUBLE?
70    /// E.g. Postgres uses DOUBLE PRECISION instead of DOUBLE
71    fn float64_ast_dtype(&self) -> ast::DataType {
72        ast::DataType::Double(ast::ExactNumberInfo::None)
73    }
74
75    /// The SQL type to use for Arrow Utf8 unparsing
76    /// Most dialects use VARCHAR, but some, like MySQL, require CHAR
77    fn utf8_cast_dtype(&self) -> ast::DataType {
78        ast::DataType::Varchar(None)
79    }
80
81    /// The SQL type to use for Arrow LargeUtf8 unparsing
82    /// Most dialects use TEXT, but some, like MySQL, require CHAR
83    fn large_utf8_cast_dtype(&self) -> ast::DataType {
84        ast::DataType::Text
85    }
86
87    /// The date field extract style to use: `DateFieldExtractStyle`
88    fn date_field_extract_style(&self) -> DateFieldExtractStyle {
89        DateFieldExtractStyle::DatePart
90    }
91
92    /// The character length extraction style to use: `CharacterLengthStyle`
93    fn character_length_style(&self) -> CharacterLengthStyle {
94        CharacterLengthStyle::CharacterLength
95    }
96
97    /// The SQL type to use for Arrow Int64 unparsing
98    /// Most dialects use BigInt, but some, like MySQL, require SIGNED
99    fn int64_cast_dtype(&self) -> ast::DataType {
100        ast::DataType::BigInt(None)
101    }
102
103    /// The SQL type to use for Arrow Int32 unparsing
104    /// Most dialects use Integer, but some, like MySQL, require SIGNED
105    fn int32_cast_dtype(&self) -> ast::DataType {
106        ast::DataType::Integer(None)
107    }
108
109    /// The SQL type to use for Timestamp unparsing
110    /// Most dialects use Timestamp, but some, like MySQL, require Datetime
111    /// Some dialects like Dremio does not support WithTimeZone and requires always Timestamp
112    fn timestamp_cast_dtype(
113        &self,
114        _time_unit: &TimeUnit,
115        tz: &Option<Arc<str>>,
116    ) -> ast::DataType {
117        let tz_info = match tz {
118            Some(_) => TimezoneInfo::WithTimeZone,
119            None => TimezoneInfo::None,
120        };
121
122        ast::DataType::Timestamp(None, tz_info)
123    }
124
125    /// The SQL type to use for Arrow Date32 unparsing
126    /// Most dialects use Date, but some, like SQLite require TEXT
127    fn date32_cast_dtype(&self) -> ast::DataType {
128        ast::DataType::Date
129    }
130
131    /// Does the dialect support specifying column aliases as part of alias table definition?
132    /// (SELECT col1, col2 from my_table) AS my_table_alias(col1_alias, col2_alias)
133    fn supports_column_alias_in_table_alias(&self) -> bool {
134        true
135    }
136
137    /// Whether the dialect requires a table alias for any subquery in the FROM clause
138    /// This affects behavior when deriving logical plans for Sort, Limit, etc.
139    fn requires_derived_table_alias(&self) -> bool {
140        false
141    }
142
143    /// The division operator for the dialect
144    /// Most dialect uses ` BinaryOperator::Divide` (/)
145    /// But DuckDB dialect uses `BinaryOperator::DuckIntegerDivide` (//)
146    fn division_operator(&self) -> BinaryOperator {
147        BinaryOperator::Divide
148    }
149
150    /// Allows the dialect to override scalar function unparsing if the dialect has specific rules.
151    /// Returns None if the default unparsing should be used, or Some(ast::Expr) if there is
152    /// a custom implementation for the function.
153    fn scalar_function_to_sql_overrides(
154        &self,
155        _unparser: &Unparser,
156        _func_name: &str,
157        _args: &[Expr],
158    ) -> Result<Option<ast::Expr>> {
159        Ok(None)
160    }
161
162    /// Allows the dialect to choose to omit window frame in unparsing
163    /// based on function name and window frame bound
164    /// Returns false if specific function name / window frame bound indicates no window frame is needed in unparsing
165    fn window_func_support_window_frame(
166        &self,
167        _func_name: &str,
168        _start_bound: &WindowFrameBound,
169        _end_bound: &WindowFrameBound,
170    ) -> bool {
171        true
172    }
173
174    /// Extends the dialect's default rules for unparsing scalar functions.
175    /// This is useful for supporting application-specific UDFs or custom engine extensions.
176    fn with_custom_scalar_overrides(
177        self,
178        _handlers: Vec<(&str, ScalarFnToSqlHandler)>,
179    ) -> Self
180    where
181        Self: Sized,
182    {
183        unimplemented!("Custom scalar overrides are not supported by this dialect yet");
184    }
185
186    /// Allow to unparse a qualified column with a full qualified name
187    /// (e.g. catalog_name.schema_name.table_name.column_name)
188    /// Otherwise, the column will be unparsed with only the table name and column name
189    /// (e.g. table_name.column_name)
190    fn full_qualified_col(&self) -> bool {
191        false
192    }
193
194    /// Allow to unparse the unnest plan as [ast::TableFactor::UNNEST].
195    ///
196    /// Some dialects like BigQuery require UNNEST to be used in the FROM clause but
197    /// the LogicalPlan planner always puts UNNEST in the SELECT clause. This flag allows
198    /// to unparse the UNNEST plan as [ast::TableFactor::UNNEST] instead of a subquery.
199    fn unnest_as_table_factor(&self) -> bool {
200        false
201    }
202
203    /// Allows the dialect to override column alias unparsing if the dialect has specific rules.
204    /// Returns None if the default unparsing should be used, or Some(String) if there is
205    /// a custom implementation for the alias.
206    fn col_alias_overrides(&self, _alias: &str) -> Result<Option<String>> {
207        Ok(None)
208    }
209
210    /// Allows the dialect to support the QUALIFY clause
211    ///
212    /// Some dialects, like Postgres, do not support the QUALIFY clause
213    fn supports_qualify(&self) -> bool {
214        true
215    }
216
217    /// Allows the dialect to override logic of formatting datetime with tz into string.
218    fn timestamp_with_tz_to_string(&self, dt: DateTime<Tz>, _unit: TimeUnit) -> String {
219        dt.to_string()
220    }
221}
222
223/// `IntervalStyle` to use for unparsing
224///
225/// <https://www.postgresql.org/docs/current/datatype-datetime.html#DATATYPE-INTERVAL-INPUT>
226/// different DBMS follows different standards, popular ones are:
227/// postgres_verbose: '2 years 15 months 100 weeks 99 hours 123456789 milliseconds' which is
228/// compatible with arrow display format, as well as duckdb
229/// sql standard format is '1-2' for year-month, or '1 10:10:10.123456' for day-time
230/// <https://www.contrib.andrew.cmu.edu/~shadow/sql/sql1992.txt>
231#[derive(Clone, Copy)]
232pub enum IntervalStyle {
233    PostgresVerbose,
234    SQLStandard,
235    MySQL,
236}
237
238/// Datetime subfield extraction style for unparsing
239///
240/// `<https://www.postgresql.org/docs/current/functions-datetime.html#FUNCTIONS-DATETIME-EXTRACT>`
241/// Different DBMSs follow different standards; popular ones are:
242/// date_part('YEAR', date '2001-02-16')
243/// EXTRACT(YEAR from date '2001-02-16')
244/// Some DBMSs, like Postgres, support both, whereas others like MySQL require EXTRACT.
245#[derive(Clone, Copy, PartialEq)]
246pub enum DateFieldExtractStyle {
247    DatePart,
248    Extract,
249    Strftime,
250}
251
252/// `CharacterLengthStyle` to use for unparsing
253///
254/// Different DBMSs uses different names for function calculating the number of characters in the string
255/// `Length` style uses length(x)
256/// `SQLStandard` style uses character_length(x)
257#[derive(Clone, Copy, PartialEq)]
258pub enum CharacterLengthStyle {
259    Length,
260    CharacterLength,
261}
262
263pub struct DefaultDialect {}
264
265impl Dialect for DefaultDialect {
266    fn identifier_quote_style(&self, identifier: &str) -> Option<char> {
267        let identifier_regex = Regex::new(r"^[a-zA-Z_][a-zA-Z0-9_]*$").unwrap();
268        let id_upper = identifier.to_uppercase();
269        // Special case ignore "ID", see https://github.com/sqlparser-rs/sqlparser-rs/issues/1382
270        // ID is a keyword in ClickHouse, but we don't want to quote it when unparsing SQL here
271        if (id_upper != "ID" && ALL_KEYWORDS.contains(&id_upper.as_str()))
272            || !identifier_regex.is_match(identifier)
273        {
274            Some('"')
275        } else {
276            None
277        }
278    }
279}
280
281pub struct PostgreSqlDialect {}
282
283impl Dialect for PostgreSqlDialect {
284    fn supports_qualify(&self) -> bool {
285        false
286    }
287
288    fn requires_derived_table_alias(&self) -> bool {
289        true
290    }
291
292    fn identifier_quote_style(&self, _: &str) -> Option<char> {
293        Some('"')
294    }
295
296    fn interval_style(&self) -> IntervalStyle {
297        IntervalStyle::PostgresVerbose
298    }
299
300    fn float64_ast_dtype(&self) -> ast::DataType {
301        ast::DataType::DoublePrecision
302    }
303
304    fn scalar_function_to_sql_overrides(
305        &self,
306        unparser: &Unparser,
307        func_name: &str,
308        args: &[Expr],
309    ) -> Result<Option<ast::Expr>> {
310        if func_name == "round" {
311            return Ok(Some(
312                self.round_to_sql_enforce_numeric(unparser, func_name, args)?,
313            ));
314        }
315
316        Ok(None)
317    }
318}
319
320impl PostgreSqlDialect {
321    fn round_to_sql_enforce_numeric(
322        &self,
323        unparser: &Unparser,
324        func_name: &str,
325        args: &[Expr],
326    ) -> Result<ast::Expr> {
327        let mut args = unparser.function_args_to_sql(args)?;
328
329        // Enforce the first argument to be Numeric
330        if let Some(ast::FunctionArg::Unnamed(ast::FunctionArgExpr::Expr(expr))) =
331            args.first_mut()
332        {
333            if let ast::Expr::Cast { data_type, .. } = expr {
334                // Don't create an additional cast wrapper if we can update the existing one
335                *data_type = ast::DataType::Numeric(ast::ExactNumberInfo::None);
336            } else {
337                // Wrap the expression in a new cast
338                *expr = ast::Expr::Cast {
339                    kind: ast::CastKind::Cast,
340                    expr: Box::new(expr.clone()),
341                    data_type: ast::DataType::Numeric(ast::ExactNumberInfo::None),
342                    format: None,
343                };
344            }
345        }
346
347        Ok(ast::Expr::Function(Function {
348            name: ObjectName::from(vec![Ident {
349                value: func_name.to_string(),
350                quote_style: None,
351                span: Span::empty(),
352            }]),
353            args: ast::FunctionArguments::List(ast::FunctionArgumentList {
354                duplicate_treatment: None,
355                args,
356                clauses: vec![],
357            }),
358            filter: None,
359            null_treatment: None,
360            over: None,
361            within_group: vec![],
362            parameters: ast::FunctionArguments::None,
363            uses_odbc_syntax: false,
364        }))
365    }
366}
367
368#[derive(Default)]
369pub struct DuckDBDialect {
370    custom_scalar_fn_overrides: HashMap<String, ScalarFnToSqlHandler>,
371}
372
373impl DuckDBDialect {
374    #[must_use]
375    pub fn new() -> Self {
376        Self {
377            custom_scalar_fn_overrides: HashMap::new(),
378        }
379    }
380}
381
382impl Dialect for DuckDBDialect {
383    fn identifier_quote_style(&self, _: &str) -> Option<char> {
384        Some('"')
385    }
386
387    fn character_length_style(&self) -> CharacterLengthStyle {
388        CharacterLengthStyle::Length
389    }
390
391    fn division_operator(&self) -> BinaryOperator {
392        BinaryOperator::DuckIntegerDivide
393    }
394
395    fn with_custom_scalar_overrides(
396        mut self,
397        handlers: Vec<(&str, ScalarFnToSqlHandler)>,
398    ) -> Self {
399        for (func_name, handler) in handlers {
400            self.custom_scalar_fn_overrides
401                .insert(func_name.to_string(), handler);
402        }
403        self
404    }
405
406    fn scalar_function_to_sql_overrides(
407        &self,
408        unparser: &Unparser,
409        func_name: &str,
410        args: &[Expr],
411    ) -> Result<Option<ast::Expr>> {
412        if let Some(handler) = self.custom_scalar_fn_overrides.get(func_name) {
413            return handler(unparser, args);
414        }
415
416        if func_name == "character_length" {
417            return character_length_to_sql(
418                unparser,
419                self.character_length_style(),
420                args,
421            );
422        }
423
424        Ok(None)
425    }
426
427    fn timestamp_with_tz_to_string(&self, dt: DateTime<Tz>, unit: TimeUnit) -> String {
428        let format = match unit {
429            TimeUnit::Second => "%Y-%m-%d %H:%M:%S%:z",
430            TimeUnit::Millisecond => "%Y-%m-%d %H:%M:%S%.3f%:z",
431            TimeUnit::Microsecond => "%Y-%m-%d %H:%M:%S%.6f%:z",
432            TimeUnit::Nanosecond => "%Y-%m-%d %H:%M:%S%.9f%:z",
433        };
434
435        dt.format(format).to_string()
436    }
437}
438
439pub struct MySqlDialect {}
440
441impl Dialect for MySqlDialect {
442    fn supports_qualify(&self) -> bool {
443        false
444    }
445
446    fn identifier_quote_style(&self, _: &str) -> Option<char> {
447        Some('`')
448    }
449
450    fn supports_nulls_first_in_sort(&self) -> bool {
451        false
452    }
453
454    fn interval_style(&self) -> IntervalStyle {
455        IntervalStyle::MySQL
456    }
457
458    fn utf8_cast_dtype(&self) -> ast::DataType {
459        ast::DataType::Char(None)
460    }
461
462    fn large_utf8_cast_dtype(&self) -> ast::DataType {
463        ast::DataType::Char(None)
464    }
465
466    fn date_field_extract_style(&self) -> DateFieldExtractStyle {
467        DateFieldExtractStyle::Extract
468    }
469
470    fn int64_cast_dtype(&self) -> ast::DataType {
471        ast::DataType::Custom(ObjectName::from(vec![Ident::new("SIGNED")]), vec![])
472    }
473
474    fn int32_cast_dtype(&self) -> ast::DataType {
475        ast::DataType::Custom(ObjectName::from(vec![Ident::new("SIGNED")]), vec![])
476    }
477
478    fn timestamp_cast_dtype(
479        &self,
480        _time_unit: &TimeUnit,
481        _tz: &Option<Arc<str>>,
482    ) -> ast::DataType {
483        ast::DataType::Datetime(None)
484    }
485
486    fn requires_derived_table_alias(&self) -> bool {
487        true
488    }
489
490    fn scalar_function_to_sql_overrides(
491        &self,
492        unparser: &Unparser,
493        func_name: &str,
494        args: &[Expr],
495    ) -> Result<Option<ast::Expr>> {
496        if func_name == "date_part" {
497            return date_part_to_sql(unparser, self.date_field_extract_style(), args);
498        }
499
500        Ok(None)
501    }
502}
503
504pub struct SqliteDialect {}
505
506impl Dialect for SqliteDialect {
507    fn supports_qualify(&self) -> bool {
508        false
509    }
510
511    fn identifier_quote_style(&self, _: &str) -> Option<char> {
512        Some('`')
513    }
514
515    fn date_field_extract_style(&self) -> DateFieldExtractStyle {
516        DateFieldExtractStyle::Strftime
517    }
518
519    fn date32_cast_dtype(&self) -> ast::DataType {
520        ast::DataType::Text
521    }
522
523    fn character_length_style(&self) -> CharacterLengthStyle {
524        CharacterLengthStyle::Length
525    }
526
527    fn supports_column_alias_in_table_alias(&self) -> bool {
528        false
529    }
530
531    fn timestamp_cast_dtype(
532        &self,
533        _time_unit: &TimeUnit,
534        _tz: &Option<Arc<str>>,
535    ) -> ast::DataType {
536        ast::DataType::Text
537    }
538
539    fn scalar_function_to_sql_overrides(
540        &self,
541        unparser: &Unparser,
542        func_name: &str,
543        args: &[Expr],
544    ) -> Result<Option<ast::Expr>> {
545        match func_name {
546            "date_part" => {
547                date_part_to_sql(unparser, self.date_field_extract_style(), args)
548            }
549            "character_length" => {
550                character_length_to_sql(unparser, self.character_length_style(), args)
551            }
552            "from_unixtime" => sqlite_from_unixtime_to_sql(unparser, args),
553            "date_trunc" => sqlite_date_trunc_to_sql(unparser, args),
554            _ => Ok(None),
555        }
556    }
557}
558
559#[derive(Default)]
560pub struct BigQueryDialect {}
561
562impl Dialect for BigQueryDialect {
563    fn identifier_quote_style(&self, _: &str) -> Option<char> {
564        Some('`')
565    }
566
567    fn col_alias_overrides(&self, alias: &str) -> Result<Option<String>> {
568        // Check if alias contains any special characters not supported by BigQuery col names
569        // https://cloud.google.com/bigquery/docs/schemas#flexible-column-names
570        let special_chars: [char; 20] = [
571            '!', '"', '$', '(', ')', '*', ',', '.', '/', ';', '?', '@', '[', '\\', ']',
572            '^', '`', '{', '}', '~',
573        ];
574
575        if alias.chars().any(|c| special_chars.contains(&c)) {
576            let mut encoded_name = String::new();
577            for c in alias.chars() {
578                if special_chars.contains(&c) {
579                    encoded_name.push_str(&format!("_{}", c as u32));
580                } else {
581                    encoded_name.push(c);
582                }
583            }
584            Ok(Some(encoded_name))
585        } else {
586            Ok(Some(alias.to_string()))
587        }
588    }
589
590    fn unnest_as_table_factor(&self) -> bool {
591        true
592    }
593}
594
595impl BigQueryDialect {
596    #[must_use]
597    pub fn new() -> Self {
598        Self {}
599    }
600}
601
602pub struct CustomDialect {
603    identifier_quote_style: Option<char>,
604    supports_nulls_first_in_sort: bool,
605    use_timestamp_for_date64: bool,
606    interval_style: IntervalStyle,
607    float64_ast_dtype: ast::DataType,
608    utf8_cast_dtype: ast::DataType,
609    large_utf8_cast_dtype: ast::DataType,
610    date_field_extract_style: DateFieldExtractStyle,
611    character_length_style: CharacterLengthStyle,
612    int64_cast_dtype: ast::DataType,
613    int32_cast_dtype: ast::DataType,
614    timestamp_cast_dtype: ast::DataType,
615    timestamp_tz_cast_dtype: ast::DataType,
616    date32_cast_dtype: ast::DataType,
617    supports_column_alias_in_table_alias: bool,
618    requires_derived_table_alias: bool,
619    division_operator: BinaryOperator,
620    window_func_support_window_frame: bool,
621    full_qualified_col: bool,
622    unnest_as_table_factor: bool,
623}
624
625impl Default for CustomDialect {
626    fn default() -> Self {
627        Self {
628            identifier_quote_style: None,
629            supports_nulls_first_in_sort: true,
630            use_timestamp_for_date64: false,
631            interval_style: IntervalStyle::SQLStandard,
632            float64_ast_dtype: ast::DataType::Double(ast::ExactNumberInfo::None),
633            utf8_cast_dtype: ast::DataType::Varchar(None),
634            large_utf8_cast_dtype: ast::DataType::Text,
635            date_field_extract_style: DateFieldExtractStyle::DatePart,
636            character_length_style: CharacterLengthStyle::CharacterLength,
637            int64_cast_dtype: ast::DataType::BigInt(None),
638            int32_cast_dtype: ast::DataType::Integer(None),
639            timestamp_cast_dtype: ast::DataType::Timestamp(None, TimezoneInfo::None),
640            timestamp_tz_cast_dtype: ast::DataType::Timestamp(
641                None,
642                TimezoneInfo::WithTimeZone,
643            ),
644            date32_cast_dtype: ast::DataType::Date,
645            supports_column_alias_in_table_alias: true,
646            requires_derived_table_alias: false,
647            division_operator: BinaryOperator::Divide,
648            window_func_support_window_frame: true,
649            full_qualified_col: false,
650            unnest_as_table_factor: false,
651        }
652    }
653}
654
655impl Dialect for CustomDialect {
656    fn identifier_quote_style(&self, _: &str) -> Option<char> {
657        self.identifier_quote_style
658    }
659
660    fn supports_nulls_first_in_sort(&self) -> bool {
661        self.supports_nulls_first_in_sort
662    }
663
664    fn use_timestamp_for_date64(&self) -> bool {
665        self.use_timestamp_for_date64
666    }
667
668    fn interval_style(&self) -> IntervalStyle {
669        self.interval_style
670    }
671
672    fn float64_ast_dtype(&self) -> ast::DataType {
673        self.float64_ast_dtype.clone()
674    }
675
676    fn utf8_cast_dtype(&self) -> ast::DataType {
677        self.utf8_cast_dtype.clone()
678    }
679
680    fn large_utf8_cast_dtype(&self) -> ast::DataType {
681        self.large_utf8_cast_dtype.clone()
682    }
683
684    fn date_field_extract_style(&self) -> DateFieldExtractStyle {
685        self.date_field_extract_style
686    }
687
688    fn character_length_style(&self) -> CharacterLengthStyle {
689        self.character_length_style
690    }
691
692    fn int64_cast_dtype(&self) -> ast::DataType {
693        self.int64_cast_dtype.clone()
694    }
695
696    fn int32_cast_dtype(&self) -> ast::DataType {
697        self.int32_cast_dtype.clone()
698    }
699
700    fn timestamp_cast_dtype(
701        &self,
702        _time_unit: &TimeUnit,
703        tz: &Option<Arc<str>>,
704    ) -> ast::DataType {
705        if tz.is_some() {
706            self.timestamp_tz_cast_dtype.clone()
707        } else {
708            self.timestamp_cast_dtype.clone()
709        }
710    }
711
712    fn date32_cast_dtype(&self) -> ast::DataType {
713        self.date32_cast_dtype.clone()
714    }
715
716    fn supports_column_alias_in_table_alias(&self) -> bool {
717        self.supports_column_alias_in_table_alias
718    }
719
720    fn scalar_function_to_sql_overrides(
721        &self,
722        unparser: &Unparser,
723        func_name: &str,
724        args: &[Expr],
725    ) -> Result<Option<ast::Expr>> {
726        match func_name {
727            "date_part" => {
728                date_part_to_sql(unparser, self.date_field_extract_style(), args)
729            }
730            "character_length" => {
731                character_length_to_sql(unparser, self.character_length_style(), args)
732            }
733            _ => Ok(None),
734        }
735    }
736
737    fn requires_derived_table_alias(&self) -> bool {
738        self.requires_derived_table_alias
739    }
740
741    fn division_operator(&self) -> BinaryOperator {
742        self.division_operator.clone()
743    }
744
745    fn window_func_support_window_frame(
746        &self,
747        _func_name: &str,
748        _start_bound: &WindowFrameBound,
749        _end_bound: &WindowFrameBound,
750    ) -> bool {
751        self.window_func_support_window_frame
752    }
753
754    fn full_qualified_col(&self) -> bool {
755        self.full_qualified_col
756    }
757
758    fn unnest_as_table_factor(&self) -> bool {
759        self.unnest_as_table_factor
760    }
761}
762
763/// `CustomDialectBuilder` to build `CustomDialect` using builder pattern
764///
765///
766/// # Examples
767///
768/// Building a custom dialect with all default options set in CustomDialectBuilder::new()
769/// but with `use_timestamp_for_date64` overridden to `true`
770///
771/// ```
772/// use datafusion_sql::unparser::dialect::CustomDialectBuilder;
773/// let dialect = CustomDialectBuilder::new()
774///     .with_use_timestamp_for_date64(true)
775///     .build();
776/// ```
777pub struct CustomDialectBuilder {
778    identifier_quote_style: Option<char>,
779    supports_nulls_first_in_sort: bool,
780    use_timestamp_for_date64: bool,
781    interval_style: IntervalStyle,
782    float64_ast_dtype: ast::DataType,
783    utf8_cast_dtype: ast::DataType,
784    large_utf8_cast_dtype: ast::DataType,
785    date_field_extract_style: DateFieldExtractStyle,
786    character_length_style: CharacterLengthStyle,
787    int64_cast_dtype: ast::DataType,
788    int32_cast_dtype: ast::DataType,
789    timestamp_cast_dtype: ast::DataType,
790    timestamp_tz_cast_dtype: ast::DataType,
791    date32_cast_dtype: ast::DataType,
792    supports_column_alias_in_table_alias: bool,
793    requires_derived_table_alias: bool,
794    division_operator: BinaryOperator,
795    window_func_support_window_frame: bool,
796    full_qualified_col: bool,
797    unnest_as_table_factor: bool,
798}
799
800impl Default for CustomDialectBuilder {
801    fn default() -> Self {
802        Self::new()
803    }
804}
805
806impl CustomDialectBuilder {
807    pub fn new() -> Self {
808        Self {
809            identifier_quote_style: None,
810            supports_nulls_first_in_sort: true,
811            use_timestamp_for_date64: false,
812            interval_style: IntervalStyle::PostgresVerbose,
813            float64_ast_dtype: ast::DataType::Double(ast::ExactNumberInfo::None),
814            utf8_cast_dtype: ast::DataType::Varchar(None),
815            large_utf8_cast_dtype: ast::DataType::Text,
816            date_field_extract_style: DateFieldExtractStyle::DatePart,
817            character_length_style: CharacterLengthStyle::CharacterLength,
818            int64_cast_dtype: ast::DataType::BigInt(None),
819            int32_cast_dtype: ast::DataType::Integer(None),
820            timestamp_cast_dtype: ast::DataType::Timestamp(None, TimezoneInfo::None),
821            timestamp_tz_cast_dtype: ast::DataType::Timestamp(
822                None,
823                TimezoneInfo::WithTimeZone,
824            ),
825            date32_cast_dtype: ast::DataType::Date,
826            supports_column_alias_in_table_alias: true,
827            requires_derived_table_alias: false,
828            division_operator: BinaryOperator::Divide,
829            window_func_support_window_frame: true,
830            full_qualified_col: false,
831            unnest_as_table_factor: false,
832        }
833    }
834
835    pub fn build(self) -> CustomDialect {
836        CustomDialect {
837            identifier_quote_style: self.identifier_quote_style,
838            supports_nulls_first_in_sort: self.supports_nulls_first_in_sort,
839            use_timestamp_for_date64: self.use_timestamp_for_date64,
840            interval_style: self.interval_style,
841            float64_ast_dtype: self.float64_ast_dtype,
842            utf8_cast_dtype: self.utf8_cast_dtype,
843            large_utf8_cast_dtype: self.large_utf8_cast_dtype,
844            date_field_extract_style: self.date_field_extract_style,
845            character_length_style: self.character_length_style,
846            int64_cast_dtype: self.int64_cast_dtype,
847            int32_cast_dtype: self.int32_cast_dtype,
848            timestamp_cast_dtype: self.timestamp_cast_dtype,
849            timestamp_tz_cast_dtype: self.timestamp_tz_cast_dtype,
850            date32_cast_dtype: self.date32_cast_dtype,
851            supports_column_alias_in_table_alias: self
852                .supports_column_alias_in_table_alias,
853            requires_derived_table_alias: self.requires_derived_table_alias,
854            division_operator: self.division_operator,
855            window_func_support_window_frame: self.window_func_support_window_frame,
856            full_qualified_col: self.full_qualified_col,
857            unnest_as_table_factor: self.unnest_as_table_factor,
858        }
859    }
860
861    /// Customize the dialect with a specific identifier quote style, e.g. '`', '"'
862    pub fn with_identifier_quote_style(mut self, identifier_quote_style: char) -> Self {
863        self.identifier_quote_style = Some(identifier_quote_style);
864        self
865    }
866
867    /// Customize the dialect to support `NULLS FIRST` in `ORDER BY` clauses
868    pub fn with_supports_nulls_first_in_sort(
869        mut self,
870        supports_nulls_first_in_sort: bool,
871    ) -> Self {
872        self.supports_nulls_first_in_sort = supports_nulls_first_in_sort;
873        self
874    }
875
876    /// Customize the dialect to uses TIMESTAMP when casting Date64 rather than DATETIME
877    pub fn with_use_timestamp_for_date64(
878        mut self,
879        use_timestamp_for_date64: bool,
880    ) -> Self {
881        self.use_timestamp_for_date64 = use_timestamp_for_date64;
882        self
883    }
884
885    /// Customize the dialect with a specific interval style listed in `IntervalStyle`
886    pub fn with_interval_style(mut self, interval_style: IntervalStyle) -> Self {
887        self.interval_style = interval_style;
888        self
889    }
890
891    /// Customize the dialect with a specific character_length_style listed in `CharacterLengthStyle`
892    pub fn with_character_length_style(
893        mut self,
894        character_length_style: CharacterLengthStyle,
895    ) -> Self {
896        self.character_length_style = character_length_style;
897        self
898    }
899
900    /// Customize the dialect with a specific SQL type for Float64 casting: DOUBLE, DOUBLE PRECISION, etc.
901    pub fn with_float64_ast_dtype(mut self, float64_ast_dtype: ast::DataType) -> Self {
902        self.float64_ast_dtype = float64_ast_dtype;
903        self
904    }
905
906    /// Customize the dialect with a specific SQL type for Utf8 casting: VARCHAR, CHAR, etc.
907    pub fn with_utf8_cast_dtype(mut self, utf8_cast_dtype: ast::DataType) -> Self {
908        self.utf8_cast_dtype = utf8_cast_dtype;
909        self
910    }
911
912    /// Customize the dialect with a specific SQL type for LargeUtf8 casting: TEXT, CHAR, etc.
913    pub fn with_large_utf8_cast_dtype(
914        mut self,
915        large_utf8_cast_dtype: ast::DataType,
916    ) -> Self {
917        self.large_utf8_cast_dtype = large_utf8_cast_dtype;
918        self
919    }
920
921    /// Customize the dialect with a specific date field extract style listed in `DateFieldExtractStyle`
922    pub fn with_date_field_extract_style(
923        mut self,
924        date_field_extract_style: DateFieldExtractStyle,
925    ) -> Self {
926        self.date_field_extract_style = date_field_extract_style;
927        self
928    }
929
930    /// Customize the dialect with a specific SQL type for Int64 casting: BigInt, SIGNED, etc.
931    pub fn with_int64_cast_dtype(mut self, int64_cast_dtype: ast::DataType) -> Self {
932        self.int64_cast_dtype = int64_cast_dtype;
933        self
934    }
935
936    /// Customize the dialect with a specific SQL type for Int32 casting: Integer, SIGNED, etc.
937    pub fn with_int32_cast_dtype(mut self, int32_cast_dtype: ast::DataType) -> Self {
938        self.int32_cast_dtype = int32_cast_dtype;
939        self
940    }
941
942    /// Customize the dialect with a specific SQL type for Timestamp casting: Timestamp, Datetime, etc.
943    pub fn with_timestamp_cast_dtype(
944        mut self,
945        timestamp_cast_dtype: ast::DataType,
946        timestamp_tz_cast_dtype: ast::DataType,
947    ) -> Self {
948        self.timestamp_cast_dtype = timestamp_cast_dtype;
949        self.timestamp_tz_cast_dtype = timestamp_tz_cast_dtype;
950        self
951    }
952
953    pub fn with_date32_cast_dtype(mut self, date32_cast_dtype: ast::DataType) -> Self {
954        self.date32_cast_dtype = date32_cast_dtype;
955        self
956    }
957
958    /// Customize the dialect to support column aliases as part of alias table definition
959    pub fn with_supports_column_alias_in_table_alias(
960        mut self,
961        supports_column_alias_in_table_alias: bool,
962    ) -> Self {
963        self.supports_column_alias_in_table_alias = supports_column_alias_in_table_alias;
964        self
965    }
966
967    pub fn with_requires_derived_table_alias(
968        mut self,
969        requires_derived_table_alias: bool,
970    ) -> Self {
971        self.requires_derived_table_alias = requires_derived_table_alias;
972        self
973    }
974
975    pub fn with_division_operator(mut self, division_operator: BinaryOperator) -> Self {
976        self.division_operator = division_operator;
977        self
978    }
979
980    pub fn with_window_func_support_window_frame(
981        mut self,
982        window_func_support_window_frame: bool,
983    ) -> Self {
984        self.window_func_support_window_frame = window_func_support_window_frame;
985        self
986    }
987
988    /// Customize the dialect to allow full qualified column names
989    pub fn with_full_qualified_col(mut self, full_qualified_col: bool) -> Self {
990        self.full_qualified_col = full_qualified_col;
991        self
992    }
993
994    pub fn with_unnest_as_table_factor(mut self, unnest_as_table_factor: bool) -> Self {
995        self.unnest_as_table_factor = unnest_as_table_factor;
996        self
997    }
998}