datafusion_sql/unparser/
dialect.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use std::{collections::HashMap, sync::Arc};
19
20use super::{
21    Unparser, utils::character_length_to_sql, utils::date_part_to_sql,
22    utils::sqlite_date_trunc_to_sql, utils::sqlite_from_unixtime_to_sql,
23};
24use arrow::array::timezone::Tz;
25use arrow::datatypes::TimeUnit;
26use chrono::DateTime;
27use datafusion_common::Result;
28use datafusion_expr::Expr;
29use regex::Regex;
30use sqlparser::tokenizer::Span;
31use sqlparser::{
32    ast::{
33        self, BinaryOperator, Function, Ident, ObjectName, TimezoneInfo, WindowFrameBound,
34    },
35    keywords::ALL_KEYWORDS,
36};
37
38pub type ScalarFnToSqlHandler =
39    Box<dyn Fn(&Unparser, &[Expr]) -> Result<Option<ast::Expr>> + Send + Sync>;
40
41/// `Dialect` to use for Unparsing
42///
43/// The default dialect tries to avoid quoting identifiers unless necessary (e.g. `a` instead of `"a"`)
44/// but this behavior can be overridden as needed
45///
46/// **Note**: This trait will eventually be replaced by the Dialect in the SQLparser package
47///
48/// See <https://github.com/sqlparser-rs/sqlparser-rs/pull/1170>
49/// See also the discussion in <https://github.com/apache/datafusion/pull/10625>
50pub trait Dialect: Send + Sync {
51    /// Return the character used to quote identifiers.
52    fn identifier_quote_style(&self, _identifier: &str) -> Option<char>;
53
54    /// Does the dialect support specifying `NULLS FIRST/LAST` in `ORDER BY` clauses?
55    fn supports_nulls_first_in_sort(&self) -> bool {
56        true
57    }
58
59    /// Does the dialect use TIMESTAMP to represent Date64 rather than DATETIME?
60    /// E.g. Trino, Athena and Dremio does not have DATETIME data type
61    fn use_timestamp_for_date64(&self) -> bool {
62        false
63    }
64
65    fn interval_style(&self) -> IntervalStyle {
66        IntervalStyle::PostgresVerbose
67    }
68
69    /// Does the dialect use DOUBLE PRECISION to represent Float64 rather than DOUBLE?
70    /// E.g. Postgres uses DOUBLE PRECISION instead of DOUBLE
71    fn float64_ast_dtype(&self) -> ast::DataType {
72        ast::DataType::Double(ast::ExactNumberInfo::None)
73    }
74
75    /// The SQL type to use for Arrow Utf8 unparsing
76    /// Most dialects use VARCHAR, but some, like MySQL, require CHAR
77    fn utf8_cast_dtype(&self) -> ast::DataType {
78        ast::DataType::Varchar(None)
79    }
80
81    /// The SQL type to use for Arrow LargeUtf8 unparsing
82    /// Most dialects use TEXT, but some, like MySQL, require CHAR
83    fn large_utf8_cast_dtype(&self) -> ast::DataType {
84        ast::DataType::Text
85    }
86
87    /// The date field extract style to use: `DateFieldExtractStyle`
88    fn date_field_extract_style(&self) -> DateFieldExtractStyle {
89        DateFieldExtractStyle::DatePart
90    }
91
92    /// The character length extraction style to use: `CharacterLengthStyle`
93    fn character_length_style(&self) -> CharacterLengthStyle {
94        CharacterLengthStyle::CharacterLength
95    }
96
97    /// The SQL type to use for Arrow Int64 unparsing
98    /// Most dialects use BigInt, but some, like MySQL, require SIGNED
99    fn int64_cast_dtype(&self) -> ast::DataType {
100        ast::DataType::BigInt(None)
101    }
102
103    /// The SQL type to use for Arrow Int32 unparsing
104    /// Most dialects use Integer, but some, like MySQL, require SIGNED
105    fn int32_cast_dtype(&self) -> ast::DataType {
106        ast::DataType::Integer(None)
107    }
108
109    /// The SQL type to use for Timestamp unparsing
110    /// Most dialects use Timestamp, but some, like MySQL, require Datetime
111    /// Some dialects like Dremio does not support WithTimeZone and requires always Timestamp
112    fn timestamp_cast_dtype(
113        &self,
114        _time_unit: &TimeUnit,
115        tz: &Option<Arc<str>>,
116    ) -> ast::DataType {
117        let tz_info = match tz {
118            Some(_) => TimezoneInfo::WithTimeZone,
119            None => TimezoneInfo::None,
120        };
121
122        ast::DataType::Timestamp(None, tz_info)
123    }
124
125    /// The SQL type to use for Arrow Date32 unparsing
126    /// Most dialects use Date, but some, like SQLite require TEXT
127    fn date32_cast_dtype(&self) -> ast::DataType {
128        ast::DataType::Date
129    }
130
131    /// Does the dialect support specifying column aliases as part of alias table definition?
132    /// (SELECT col1, col2 from my_table) AS my_table_alias(col1_alias, col2_alias)
133    fn supports_column_alias_in_table_alias(&self) -> bool {
134        true
135    }
136
137    /// Whether the dialect requires a table alias for any subquery in the FROM clause
138    /// This affects behavior when deriving logical plans for Sort, Limit, etc.
139    fn requires_derived_table_alias(&self) -> bool {
140        false
141    }
142
143    /// The division operator for the dialect
144    /// Most dialect uses ` BinaryOperator::Divide` (/)
145    /// But DuckDB dialect uses `BinaryOperator::DuckIntegerDivide` (//)
146    fn division_operator(&self) -> BinaryOperator {
147        BinaryOperator::Divide
148    }
149
150    /// Allows the dialect to override scalar function unparsing if the dialect has specific rules.
151    /// Returns None if the default unparsing should be used, or Some(ast::Expr) if there is
152    /// a custom implementation for the function.
153    fn scalar_function_to_sql_overrides(
154        &self,
155        _unparser: &Unparser,
156        _func_name: &str,
157        _args: &[Expr],
158    ) -> Result<Option<ast::Expr>> {
159        Ok(None)
160    }
161
162    /// Allows the dialect to choose to omit window frame in unparsing
163    /// based on function name and window frame bound
164    /// Returns false if specific function name / window frame bound indicates no window frame is needed in unparsing
165    fn window_func_support_window_frame(
166        &self,
167        _func_name: &str,
168        _start_bound: &WindowFrameBound,
169        _end_bound: &WindowFrameBound,
170    ) -> bool {
171        true
172    }
173
174    /// Extends the dialect's default rules for unparsing scalar functions.
175    /// This is useful for supporting application-specific UDFs or custom engine extensions.
176    fn with_custom_scalar_overrides(
177        self,
178        _handlers: Vec<(&str, ScalarFnToSqlHandler)>,
179    ) -> Self
180    where
181        Self: Sized,
182    {
183        unimplemented!("Custom scalar overrides are not supported by this dialect yet");
184    }
185
186    /// Allow to unparse a qualified column with a full qualified name
187    /// (e.g. catalog_name.schema_name.table_name.column_name)
188    /// Otherwise, the column will be unparsed with only the table name and column name
189    /// (e.g. table_name.column_name)
190    fn full_qualified_col(&self) -> bool {
191        false
192    }
193
194    /// Allow to unparse the unnest plan as [ast::TableFactor::UNNEST].
195    ///
196    /// Some dialects like BigQuery require UNNEST to be used in the FROM clause but
197    /// the LogicalPlan planner always puts UNNEST in the SELECT clause. This flag allows
198    /// to unparse the UNNEST plan as [ast::TableFactor::UNNEST] instead of a subquery.
199    fn unnest_as_table_factor(&self) -> bool {
200        false
201    }
202
203    /// Allows the dialect to override column alias unparsing if the dialect has specific rules.
204    /// Returns None if the default unparsing should be used, or Some(String) if there is
205    /// a custom implementation for the alias.
206    fn col_alias_overrides(&self, _alias: &str) -> Result<Option<String>> {
207        Ok(None)
208    }
209
210    /// Allows the dialect to support the QUALIFY clause
211    ///
212    /// Some dialects, like Postgres, do not support the QUALIFY clause
213    fn supports_qualify(&self) -> bool {
214        true
215    }
216
217    /// Allows the dialect to override logic of formatting datetime with tz into string.
218    fn timestamp_with_tz_to_string(&self, dt: DateTime<Tz>, _unit: TimeUnit) -> String {
219        dt.to_string()
220    }
221
222    /// Whether the dialect supports an empty select list such as `SELECT FROM table`.
223    ///
224    /// An empty select list returns rows without any column data, which is useful for:
225    /// - Counting rows: `SELECT FROM users WHERE active = true` (combined with `COUNT(*)`)
226    /// - Testing row existence without retrieving column data
227    /// - Performance optimization when only row counts or existence checks are needed
228    ///
229    /// # Default
230    ///
231    /// Returns `false` for maximum compatibility across SQL dialects. When `false`,
232    /// the unparser falls back to `SELECT 1 FROM table`.
233    ///
234    /// # Implementation Note
235    ///
236    /// Specific dialects should override this method to return `true` if they support
237    /// the empty select list syntax (e.g., PostgreSQL).
238    ///
239    /// # Example SQL Output
240    ///
241    /// ```sql
242    /// -- When supported:
243    /// SELECT FROM users WHERE active = true;
244    ///
245    /// -- Fallback when unsupported:
246    /// SELECT 1 FROM users WHERE active = true;
247    /// ```
248    fn supports_empty_select_list(&self) -> bool {
249        false
250    }
251}
252
253/// `IntervalStyle` to use for unparsing
254///
255/// <https://www.postgresql.org/docs/current/datatype-datetime.html#DATATYPE-INTERVAL-INPUT>
256/// different DBMS follows different standards, popular ones are:
257/// postgres_verbose: '2 years 15 months 100 weeks 99 hours 123456789 milliseconds' which is
258/// compatible with arrow display format, as well as duckdb
259/// sql standard format is '1-2' for year-month, or '1 10:10:10.123456' for day-time
260/// <https://www.contrib.andrew.cmu.edu/~shadow/sql/sql1992.txt>
261#[derive(Clone, Copy)]
262pub enum IntervalStyle {
263    PostgresVerbose,
264    SQLStandard,
265    MySQL,
266}
267
268/// Datetime subfield extraction style for unparsing
269///
270/// `<https://www.postgresql.org/docs/current/functions-datetime.html#FUNCTIONS-DATETIME-EXTRACT>`
271/// Different DBMSs follow different standards; popular ones are:
272/// date_part('YEAR', date '2001-02-16')
273/// EXTRACT(YEAR from date '2001-02-16')
274/// Some DBMSs, like Postgres, support both, whereas others like MySQL require EXTRACT.
275#[derive(Clone, Copy, PartialEq)]
276pub enum DateFieldExtractStyle {
277    DatePart,
278    Extract,
279    Strftime,
280}
281
282/// `CharacterLengthStyle` to use for unparsing
283///
284/// Different DBMSs uses different names for function calculating the number of characters in the string
285/// `Length` style uses length(x)
286/// `SQLStandard` style uses character_length(x)
287#[derive(Clone, Copy, PartialEq)]
288pub enum CharacterLengthStyle {
289    Length,
290    CharacterLength,
291}
292
293pub struct DefaultDialect {}
294
295impl Dialect for DefaultDialect {
296    fn identifier_quote_style(&self, identifier: &str) -> Option<char> {
297        let identifier_regex = Regex::new(r"^[a-zA-Z_][a-zA-Z0-9_]*$").unwrap();
298        let id_upper = identifier.to_uppercase();
299        // Special case ignore "ID", see https://github.com/sqlparser-rs/sqlparser-rs/issues/1382
300        // ID is a keyword in ClickHouse, but we don't want to quote it when unparsing SQL here
301        // Also quote identifiers with uppercase letters since unquoted identifiers are
302        // normalized to lowercase by the SQL parser, which would break case-sensitive schemas
303        let needs_quote = (id_upper != "ID" && ALL_KEYWORDS.contains(&id_upper.as_str()))
304            || !identifier_regex.is_match(identifier)
305            || identifier.chars().any(|c| c.is_ascii_uppercase());
306        if needs_quote { Some('"') } else { None }
307    }
308}
309
310pub struct PostgreSqlDialect {}
311
312impl Dialect for PostgreSqlDialect {
313    fn supports_qualify(&self) -> bool {
314        false
315    }
316
317    fn requires_derived_table_alias(&self) -> bool {
318        true
319    }
320
321    fn supports_empty_select_list(&self) -> bool {
322        true
323    }
324
325    fn identifier_quote_style(&self, _: &str) -> Option<char> {
326        Some('"')
327    }
328
329    fn interval_style(&self) -> IntervalStyle {
330        IntervalStyle::PostgresVerbose
331    }
332
333    fn float64_ast_dtype(&self) -> ast::DataType {
334        ast::DataType::DoublePrecision
335    }
336
337    fn scalar_function_to_sql_overrides(
338        &self,
339        unparser: &Unparser,
340        func_name: &str,
341        args: &[Expr],
342    ) -> Result<Option<ast::Expr>> {
343        if func_name == "round" {
344            return Ok(Some(
345                self.round_to_sql_enforce_numeric(unparser, func_name, args)?,
346            ));
347        }
348
349        Ok(None)
350    }
351}
352
353impl PostgreSqlDialect {
354    fn round_to_sql_enforce_numeric(
355        &self,
356        unparser: &Unparser,
357        func_name: &str,
358        args: &[Expr],
359    ) -> Result<ast::Expr> {
360        let mut args = unparser.function_args_to_sql(args)?;
361
362        // Enforce the first argument to be Numeric
363        if let Some(ast::FunctionArg::Unnamed(ast::FunctionArgExpr::Expr(expr))) =
364            args.first_mut()
365        {
366            if let ast::Expr::Cast { data_type, .. } = expr {
367                // Don't create an additional cast wrapper if we can update the existing one
368                *data_type = ast::DataType::Numeric(ast::ExactNumberInfo::None);
369            } else {
370                // Wrap the expression in a new cast
371                *expr = ast::Expr::Cast {
372                    kind: ast::CastKind::Cast,
373                    expr: Box::new(expr.clone()),
374                    data_type: ast::DataType::Numeric(ast::ExactNumberInfo::None),
375                    format: None,
376                };
377            }
378        }
379
380        Ok(ast::Expr::Function(Function {
381            name: ObjectName::from(vec![Ident {
382                value: func_name.to_string(),
383                quote_style: None,
384                span: Span::empty(),
385            }]),
386            args: ast::FunctionArguments::List(ast::FunctionArgumentList {
387                duplicate_treatment: None,
388                args,
389                clauses: vec![],
390            }),
391            filter: None,
392            null_treatment: None,
393            over: None,
394            within_group: vec![],
395            parameters: ast::FunctionArguments::None,
396            uses_odbc_syntax: false,
397        }))
398    }
399}
400
401#[derive(Default)]
402pub struct DuckDBDialect {
403    custom_scalar_fn_overrides: HashMap<String, ScalarFnToSqlHandler>,
404}
405
406impl DuckDBDialect {
407    #[must_use]
408    pub fn new() -> Self {
409        Self {
410            custom_scalar_fn_overrides: HashMap::new(),
411        }
412    }
413}
414
415impl Dialect for DuckDBDialect {
416    fn identifier_quote_style(&self, _: &str) -> Option<char> {
417        Some('"')
418    }
419
420    fn character_length_style(&self) -> CharacterLengthStyle {
421        CharacterLengthStyle::Length
422    }
423
424    fn division_operator(&self) -> BinaryOperator {
425        BinaryOperator::DuckIntegerDivide
426    }
427
428    fn with_custom_scalar_overrides(
429        mut self,
430        handlers: Vec<(&str, ScalarFnToSqlHandler)>,
431    ) -> Self {
432        for (func_name, handler) in handlers {
433            self.custom_scalar_fn_overrides
434                .insert(func_name.to_string(), handler);
435        }
436        self
437    }
438
439    fn scalar_function_to_sql_overrides(
440        &self,
441        unparser: &Unparser,
442        func_name: &str,
443        args: &[Expr],
444    ) -> Result<Option<ast::Expr>> {
445        if let Some(handler) = self.custom_scalar_fn_overrides.get(func_name) {
446            return handler(unparser, args);
447        }
448
449        if func_name == "character_length" {
450            return character_length_to_sql(
451                unparser,
452                self.character_length_style(),
453                args,
454            );
455        }
456
457        Ok(None)
458    }
459
460    fn timestamp_with_tz_to_string(&self, dt: DateTime<Tz>, unit: TimeUnit) -> String {
461        let format = match unit {
462            TimeUnit::Second => "%Y-%m-%d %H:%M:%S%:z",
463            TimeUnit::Millisecond => "%Y-%m-%d %H:%M:%S%.3f%:z",
464            TimeUnit::Microsecond => "%Y-%m-%d %H:%M:%S%.6f%:z",
465            TimeUnit::Nanosecond => "%Y-%m-%d %H:%M:%S%.9f%:z",
466        };
467
468        dt.format(format).to_string()
469    }
470}
471
472pub struct MySqlDialect {}
473
474impl Dialect for MySqlDialect {
475    fn supports_qualify(&self) -> bool {
476        false
477    }
478
479    fn identifier_quote_style(&self, _: &str) -> Option<char> {
480        Some('`')
481    }
482
483    fn supports_nulls_first_in_sort(&self) -> bool {
484        false
485    }
486
487    fn interval_style(&self) -> IntervalStyle {
488        IntervalStyle::MySQL
489    }
490
491    fn utf8_cast_dtype(&self) -> ast::DataType {
492        ast::DataType::Char(None)
493    }
494
495    fn large_utf8_cast_dtype(&self) -> ast::DataType {
496        ast::DataType::Char(None)
497    }
498
499    fn date_field_extract_style(&self) -> DateFieldExtractStyle {
500        DateFieldExtractStyle::Extract
501    }
502
503    fn int64_cast_dtype(&self) -> ast::DataType {
504        ast::DataType::Custom(ObjectName::from(vec![Ident::new("SIGNED")]), vec![])
505    }
506
507    fn int32_cast_dtype(&self) -> ast::DataType {
508        ast::DataType::Custom(ObjectName::from(vec![Ident::new("SIGNED")]), vec![])
509    }
510
511    fn timestamp_cast_dtype(
512        &self,
513        _time_unit: &TimeUnit,
514        _tz: &Option<Arc<str>>,
515    ) -> ast::DataType {
516        ast::DataType::Datetime(None)
517    }
518
519    fn requires_derived_table_alias(&self) -> bool {
520        true
521    }
522
523    fn scalar_function_to_sql_overrides(
524        &self,
525        unparser: &Unparser,
526        func_name: &str,
527        args: &[Expr],
528    ) -> Result<Option<ast::Expr>> {
529        if func_name == "date_part" {
530            return date_part_to_sql(unparser, self.date_field_extract_style(), args);
531        }
532
533        Ok(None)
534    }
535}
536
537pub struct SqliteDialect {}
538
539impl Dialect for SqliteDialect {
540    fn supports_qualify(&self) -> bool {
541        false
542    }
543
544    fn identifier_quote_style(&self, _: &str) -> Option<char> {
545        Some('`')
546    }
547
548    fn date_field_extract_style(&self) -> DateFieldExtractStyle {
549        DateFieldExtractStyle::Strftime
550    }
551
552    fn date32_cast_dtype(&self) -> ast::DataType {
553        ast::DataType::Text
554    }
555
556    fn character_length_style(&self) -> CharacterLengthStyle {
557        CharacterLengthStyle::Length
558    }
559
560    fn supports_column_alias_in_table_alias(&self) -> bool {
561        false
562    }
563
564    fn timestamp_cast_dtype(
565        &self,
566        _time_unit: &TimeUnit,
567        _tz: &Option<Arc<str>>,
568    ) -> ast::DataType {
569        ast::DataType::Text
570    }
571
572    fn scalar_function_to_sql_overrides(
573        &self,
574        unparser: &Unparser,
575        func_name: &str,
576        args: &[Expr],
577    ) -> Result<Option<ast::Expr>> {
578        match func_name {
579            "date_part" => {
580                date_part_to_sql(unparser, self.date_field_extract_style(), args)
581            }
582            "character_length" => {
583                character_length_to_sql(unparser, self.character_length_style(), args)
584            }
585            "from_unixtime" => sqlite_from_unixtime_to_sql(unparser, args),
586            "date_trunc" => sqlite_date_trunc_to_sql(unparser, args),
587            _ => Ok(None),
588        }
589    }
590}
591
592#[derive(Default)]
593pub struct BigQueryDialect {}
594
595impl Dialect for BigQueryDialect {
596    fn identifier_quote_style(&self, _: &str) -> Option<char> {
597        Some('`')
598    }
599
600    fn col_alias_overrides(&self, alias: &str) -> Result<Option<String>> {
601        // Check if alias contains any special characters not supported by BigQuery col names
602        // https://cloud.google.com/bigquery/docs/schemas#flexible-column-names
603        let special_chars: [char; 20] = [
604            '!', '"', '$', '(', ')', '*', ',', '.', '/', ';', '?', '@', '[', '\\', ']',
605            '^', '`', '{', '}', '~',
606        ];
607
608        if alias.chars().any(|c| special_chars.contains(&c)) {
609            let mut encoded_name = String::new();
610            for c in alias.chars() {
611                if special_chars.contains(&c) {
612                    encoded_name.push_str(&format!("_{}", c as u32));
613                } else {
614                    encoded_name.push(c);
615                }
616            }
617            Ok(Some(encoded_name))
618        } else {
619            Ok(Some(alias.to_string()))
620        }
621    }
622
623    fn unnest_as_table_factor(&self) -> bool {
624        true
625    }
626}
627
628impl BigQueryDialect {
629    #[must_use]
630    pub fn new() -> Self {
631        Self {}
632    }
633}
634
635pub struct CustomDialect {
636    identifier_quote_style: Option<char>,
637    supports_nulls_first_in_sort: bool,
638    use_timestamp_for_date64: bool,
639    interval_style: IntervalStyle,
640    float64_ast_dtype: ast::DataType,
641    utf8_cast_dtype: ast::DataType,
642    large_utf8_cast_dtype: ast::DataType,
643    date_field_extract_style: DateFieldExtractStyle,
644    character_length_style: CharacterLengthStyle,
645    int64_cast_dtype: ast::DataType,
646    int32_cast_dtype: ast::DataType,
647    timestamp_cast_dtype: ast::DataType,
648    timestamp_tz_cast_dtype: ast::DataType,
649    date32_cast_dtype: ast::DataType,
650    supports_column_alias_in_table_alias: bool,
651    requires_derived_table_alias: bool,
652    division_operator: BinaryOperator,
653    window_func_support_window_frame: bool,
654    full_qualified_col: bool,
655    unnest_as_table_factor: bool,
656}
657
658impl Default for CustomDialect {
659    fn default() -> Self {
660        Self {
661            identifier_quote_style: None,
662            supports_nulls_first_in_sort: true,
663            use_timestamp_for_date64: false,
664            interval_style: IntervalStyle::SQLStandard,
665            float64_ast_dtype: ast::DataType::Double(ast::ExactNumberInfo::None),
666            utf8_cast_dtype: ast::DataType::Varchar(None),
667            large_utf8_cast_dtype: ast::DataType::Text,
668            date_field_extract_style: DateFieldExtractStyle::DatePart,
669            character_length_style: CharacterLengthStyle::CharacterLength,
670            int64_cast_dtype: ast::DataType::BigInt(None),
671            int32_cast_dtype: ast::DataType::Integer(None),
672            timestamp_cast_dtype: ast::DataType::Timestamp(None, TimezoneInfo::None),
673            timestamp_tz_cast_dtype: ast::DataType::Timestamp(
674                None,
675                TimezoneInfo::WithTimeZone,
676            ),
677            date32_cast_dtype: ast::DataType::Date,
678            supports_column_alias_in_table_alias: true,
679            requires_derived_table_alias: false,
680            division_operator: BinaryOperator::Divide,
681            window_func_support_window_frame: true,
682            full_qualified_col: false,
683            unnest_as_table_factor: false,
684        }
685    }
686}
687
688impl Dialect for CustomDialect {
689    fn identifier_quote_style(&self, _: &str) -> Option<char> {
690        self.identifier_quote_style
691    }
692
693    fn supports_nulls_first_in_sort(&self) -> bool {
694        self.supports_nulls_first_in_sort
695    }
696
697    fn use_timestamp_for_date64(&self) -> bool {
698        self.use_timestamp_for_date64
699    }
700
701    fn interval_style(&self) -> IntervalStyle {
702        self.interval_style
703    }
704
705    fn float64_ast_dtype(&self) -> ast::DataType {
706        self.float64_ast_dtype.clone()
707    }
708
709    fn utf8_cast_dtype(&self) -> ast::DataType {
710        self.utf8_cast_dtype.clone()
711    }
712
713    fn large_utf8_cast_dtype(&self) -> ast::DataType {
714        self.large_utf8_cast_dtype.clone()
715    }
716
717    fn date_field_extract_style(&self) -> DateFieldExtractStyle {
718        self.date_field_extract_style
719    }
720
721    fn character_length_style(&self) -> CharacterLengthStyle {
722        self.character_length_style
723    }
724
725    fn int64_cast_dtype(&self) -> ast::DataType {
726        self.int64_cast_dtype.clone()
727    }
728
729    fn int32_cast_dtype(&self) -> ast::DataType {
730        self.int32_cast_dtype.clone()
731    }
732
733    fn timestamp_cast_dtype(
734        &self,
735        _time_unit: &TimeUnit,
736        tz: &Option<Arc<str>>,
737    ) -> ast::DataType {
738        if tz.is_some() {
739            self.timestamp_tz_cast_dtype.clone()
740        } else {
741            self.timestamp_cast_dtype.clone()
742        }
743    }
744
745    fn date32_cast_dtype(&self) -> ast::DataType {
746        self.date32_cast_dtype.clone()
747    }
748
749    fn supports_column_alias_in_table_alias(&self) -> bool {
750        self.supports_column_alias_in_table_alias
751    }
752
753    fn scalar_function_to_sql_overrides(
754        &self,
755        unparser: &Unparser,
756        func_name: &str,
757        args: &[Expr],
758    ) -> Result<Option<ast::Expr>> {
759        match func_name {
760            "date_part" => {
761                date_part_to_sql(unparser, self.date_field_extract_style(), args)
762            }
763            "character_length" => {
764                character_length_to_sql(unparser, self.character_length_style(), args)
765            }
766            _ => Ok(None),
767        }
768    }
769
770    fn requires_derived_table_alias(&self) -> bool {
771        self.requires_derived_table_alias
772    }
773
774    fn division_operator(&self) -> BinaryOperator {
775        self.division_operator.clone()
776    }
777
778    fn window_func_support_window_frame(
779        &self,
780        _func_name: &str,
781        _start_bound: &WindowFrameBound,
782        _end_bound: &WindowFrameBound,
783    ) -> bool {
784        self.window_func_support_window_frame
785    }
786
787    fn full_qualified_col(&self) -> bool {
788        self.full_qualified_col
789    }
790
791    fn unnest_as_table_factor(&self) -> bool {
792        self.unnest_as_table_factor
793    }
794}
795
796/// `CustomDialectBuilder` to build `CustomDialect` using builder pattern
797///
798///
799/// # Examples
800///
801/// Building a custom dialect with all default options set in CustomDialectBuilder::new()
802/// but with `use_timestamp_for_date64` overridden to `true`
803///
804/// ```
805/// use datafusion_sql::unparser::dialect::CustomDialectBuilder;
806/// let dialect = CustomDialectBuilder::new()
807///     .with_use_timestamp_for_date64(true)
808///     .build();
809/// ```
810pub struct CustomDialectBuilder {
811    identifier_quote_style: Option<char>,
812    supports_nulls_first_in_sort: bool,
813    use_timestamp_for_date64: bool,
814    interval_style: IntervalStyle,
815    float64_ast_dtype: ast::DataType,
816    utf8_cast_dtype: ast::DataType,
817    large_utf8_cast_dtype: ast::DataType,
818    date_field_extract_style: DateFieldExtractStyle,
819    character_length_style: CharacterLengthStyle,
820    int64_cast_dtype: ast::DataType,
821    int32_cast_dtype: ast::DataType,
822    timestamp_cast_dtype: ast::DataType,
823    timestamp_tz_cast_dtype: ast::DataType,
824    date32_cast_dtype: ast::DataType,
825    supports_column_alias_in_table_alias: bool,
826    requires_derived_table_alias: bool,
827    division_operator: BinaryOperator,
828    window_func_support_window_frame: bool,
829    full_qualified_col: bool,
830    unnest_as_table_factor: bool,
831}
832
833impl Default for CustomDialectBuilder {
834    fn default() -> Self {
835        Self::new()
836    }
837}
838
839impl CustomDialectBuilder {
840    pub fn new() -> Self {
841        Self {
842            identifier_quote_style: None,
843            supports_nulls_first_in_sort: true,
844            use_timestamp_for_date64: false,
845            interval_style: IntervalStyle::PostgresVerbose,
846            float64_ast_dtype: ast::DataType::Double(ast::ExactNumberInfo::None),
847            utf8_cast_dtype: ast::DataType::Varchar(None),
848            large_utf8_cast_dtype: ast::DataType::Text,
849            date_field_extract_style: DateFieldExtractStyle::DatePart,
850            character_length_style: CharacterLengthStyle::CharacterLength,
851            int64_cast_dtype: ast::DataType::BigInt(None),
852            int32_cast_dtype: ast::DataType::Integer(None),
853            timestamp_cast_dtype: ast::DataType::Timestamp(None, TimezoneInfo::None),
854            timestamp_tz_cast_dtype: ast::DataType::Timestamp(
855                None,
856                TimezoneInfo::WithTimeZone,
857            ),
858            date32_cast_dtype: ast::DataType::Date,
859            supports_column_alias_in_table_alias: true,
860            requires_derived_table_alias: false,
861            division_operator: BinaryOperator::Divide,
862            window_func_support_window_frame: true,
863            full_qualified_col: false,
864            unnest_as_table_factor: false,
865        }
866    }
867
868    pub fn build(self) -> CustomDialect {
869        CustomDialect {
870            identifier_quote_style: self.identifier_quote_style,
871            supports_nulls_first_in_sort: self.supports_nulls_first_in_sort,
872            use_timestamp_for_date64: self.use_timestamp_for_date64,
873            interval_style: self.interval_style,
874            float64_ast_dtype: self.float64_ast_dtype,
875            utf8_cast_dtype: self.utf8_cast_dtype,
876            large_utf8_cast_dtype: self.large_utf8_cast_dtype,
877            date_field_extract_style: self.date_field_extract_style,
878            character_length_style: self.character_length_style,
879            int64_cast_dtype: self.int64_cast_dtype,
880            int32_cast_dtype: self.int32_cast_dtype,
881            timestamp_cast_dtype: self.timestamp_cast_dtype,
882            timestamp_tz_cast_dtype: self.timestamp_tz_cast_dtype,
883            date32_cast_dtype: self.date32_cast_dtype,
884            supports_column_alias_in_table_alias: self
885                .supports_column_alias_in_table_alias,
886            requires_derived_table_alias: self.requires_derived_table_alias,
887            division_operator: self.division_operator,
888            window_func_support_window_frame: self.window_func_support_window_frame,
889            full_qualified_col: self.full_qualified_col,
890            unnest_as_table_factor: self.unnest_as_table_factor,
891        }
892    }
893
894    /// Customize the dialect with a specific identifier quote style, e.g. '`', '"'
895    pub fn with_identifier_quote_style(mut self, identifier_quote_style: char) -> Self {
896        self.identifier_quote_style = Some(identifier_quote_style);
897        self
898    }
899
900    /// Customize the dialect to support `NULLS FIRST` in `ORDER BY` clauses
901    pub fn with_supports_nulls_first_in_sort(
902        mut self,
903        supports_nulls_first_in_sort: bool,
904    ) -> Self {
905        self.supports_nulls_first_in_sort = supports_nulls_first_in_sort;
906        self
907    }
908
909    /// Customize the dialect to uses TIMESTAMP when casting Date64 rather than DATETIME
910    pub fn with_use_timestamp_for_date64(
911        mut self,
912        use_timestamp_for_date64: bool,
913    ) -> Self {
914        self.use_timestamp_for_date64 = use_timestamp_for_date64;
915        self
916    }
917
918    /// Customize the dialect with a specific interval style listed in `IntervalStyle`
919    pub fn with_interval_style(mut self, interval_style: IntervalStyle) -> Self {
920        self.interval_style = interval_style;
921        self
922    }
923
924    /// Customize the dialect with a specific character_length_style listed in `CharacterLengthStyle`
925    pub fn with_character_length_style(
926        mut self,
927        character_length_style: CharacterLengthStyle,
928    ) -> Self {
929        self.character_length_style = character_length_style;
930        self
931    }
932
933    /// Customize the dialect with a specific SQL type for Float64 casting: DOUBLE, DOUBLE PRECISION, etc.
934    pub fn with_float64_ast_dtype(mut self, float64_ast_dtype: ast::DataType) -> Self {
935        self.float64_ast_dtype = float64_ast_dtype;
936        self
937    }
938
939    /// Customize the dialect with a specific SQL type for Utf8 casting: VARCHAR, CHAR, etc.
940    pub fn with_utf8_cast_dtype(mut self, utf8_cast_dtype: ast::DataType) -> Self {
941        self.utf8_cast_dtype = utf8_cast_dtype;
942        self
943    }
944
945    /// Customize the dialect with a specific SQL type for LargeUtf8 casting: TEXT, CHAR, etc.
946    pub fn with_large_utf8_cast_dtype(
947        mut self,
948        large_utf8_cast_dtype: ast::DataType,
949    ) -> Self {
950        self.large_utf8_cast_dtype = large_utf8_cast_dtype;
951        self
952    }
953
954    /// Customize the dialect with a specific date field extract style listed in `DateFieldExtractStyle`
955    pub fn with_date_field_extract_style(
956        mut self,
957        date_field_extract_style: DateFieldExtractStyle,
958    ) -> Self {
959        self.date_field_extract_style = date_field_extract_style;
960        self
961    }
962
963    /// Customize the dialect with a specific SQL type for Int64 casting: BigInt, SIGNED, etc.
964    pub fn with_int64_cast_dtype(mut self, int64_cast_dtype: ast::DataType) -> Self {
965        self.int64_cast_dtype = int64_cast_dtype;
966        self
967    }
968
969    /// Customize the dialect with a specific SQL type for Int32 casting: Integer, SIGNED, etc.
970    pub fn with_int32_cast_dtype(mut self, int32_cast_dtype: ast::DataType) -> Self {
971        self.int32_cast_dtype = int32_cast_dtype;
972        self
973    }
974
975    /// Customize the dialect with a specific SQL type for Timestamp casting: Timestamp, Datetime, etc.
976    pub fn with_timestamp_cast_dtype(
977        mut self,
978        timestamp_cast_dtype: ast::DataType,
979        timestamp_tz_cast_dtype: ast::DataType,
980    ) -> Self {
981        self.timestamp_cast_dtype = timestamp_cast_dtype;
982        self.timestamp_tz_cast_dtype = timestamp_tz_cast_dtype;
983        self
984    }
985
986    pub fn with_date32_cast_dtype(mut self, date32_cast_dtype: ast::DataType) -> Self {
987        self.date32_cast_dtype = date32_cast_dtype;
988        self
989    }
990
991    /// Customize the dialect to support column aliases as part of alias table definition
992    pub fn with_supports_column_alias_in_table_alias(
993        mut self,
994        supports_column_alias_in_table_alias: bool,
995    ) -> Self {
996        self.supports_column_alias_in_table_alias = supports_column_alias_in_table_alias;
997        self
998    }
999
1000    pub fn with_requires_derived_table_alias(
1001        mut self,
1002        requires_derived_table_alias: bool,
1003    ) -> Self {
1004        self.requires_derived_table_alias = requires_derived_table_alias;
1005        self
1006    }
1007
1008    pub fn with_division_operator(mut self, division_operator: BinaryOperator) -> Self {
1009        self.division_operator = division_operator;
1010        self
1011    }
1012
1013    pub fn with_window_func_support_window_frame(
1014        mut self,
1015        window_func_support_window_frame: bool,
1016    ) -> Self {
1017        self.window_func_support_window_frame = window_func_support_window_frame;
1018        self
1019    }
1020
1021    /// Customize the dialect to allow full qualified column names
1022    pub fn with_full_qualified_col(mut self, full_qualified_col: bool) -> Self {
1023        self.full_qualified_col = full_qualified_col;
1024        self
1025    }
1026
1027    pub fn with_unnest_as_table_factor(mut self, unnest_as_table_factor: bool) -> Self {
1028        self.unnest_as_table_factor = unnest_as_table_factor;
1029        self
1030    }
1031}