Skip to main content

datafusion_sql/unparser/
dialect.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use std::{collections::HashMap, sync::Arc};
19
20use super::{
21    Unparser, utils::character_length_to_sql, utils::date_part_to_sql,
22    utils::sqlite_date_trunc_to_sql, utils::sqlite_from_unixtime_to_sql,
23};
24use arrow::array::timezone::Tz;
25use arrow::datatypes::TimeUnit;
26use chrono::DateTime;
27use datafusion_common::Result;
28use datafusion_expr::Expr;
29use regex::Regex;
30use sqlparser::tokenizer::Span;
31use sqlparser::{
32    ast::{
33        self, BinaryOperator, Function, Ident, ObjectName, TimezoneInfo, WindowFrameBound,
34    },
35    keywords::ALL_KEYWORDS,
36};
37
38pub type ScalarFnToSqlHandler =
39    Box<dyn Fn(&Unparser, &[Expr]) -> Result<Option<ast::Expr>> + Send + Sync>;
40
41/// `Dialect` to use for Unparsing
42///
43/// The default dialect tries to avoid quoting identifiers unless necessary (e.g. `a` instead of `"a"`)
44/// but this behavior can be overridden as needed
45///
46/// **Note**: This trait will eventually be replaced by the Dialect in the SQLparser package
47///
48/// See <https://github.com/sqlparser-rs/sqlparser-rs/pull/1170>
49/// See also the discussion in <https://github.com/apache/datafusion/pull/10625>
50pub trait Dialect: Send + Sync {
51    /// Return the character used to quote identifiers.
52    fn identifier_quote_style(&self, _identifier: &str) -> Option<char>;
53
54    /// Does the dialect support specifying `NULLS FIRST/LAST` in `ORDER BY` clauses?
55    fn supports_nulls_first_in_sort(&self) -> bool {
56        true
57    }
58
59    /// Does the dialect use TIMESTAMP to represent Date64 rather than DATETIME?
60    /// E.g. Trino, Athena and Dremio does not have DATETIME data type
61    fn use_timestamp_for_date64(&self) -> bool {
62        false
63    }
64
65    fn interval_style(&self) -> IntervalStyle {
66        IntervalStyle::PostgresVerbose
67    }
68
69    /// Does the dialect use DOUBLE PRECISION to represent Float64 rather than DOUBLE?
70    /// E.g. Postgres uses DOUBLE PRECISION instead of DOUBLE
71    fn float64_ast_dtype(&self) -> ast::DataType {
72        ast::DataType::Double(ast::ExactNumberInfo::None)
73    }
74
75    /// The SQL type to use for Arrow Utf8 unparsing
76    /// Most dialects use VARCHAR, but some, like MySQL, require CHAR
77    fn utf8_cast_dtype(&self) -> ast::DataType {
78        ast::DataType::Varchar(None)
79    }
80
81    /// The SQL type to use for Arrow LargeUtf8 unparsing
82    /// Most dialects use TEXT, but some, like MySQL, require CHAR
83    fn large_utf8_cast_dtype(&self) -> ast::DataType {
84        ast::DataType::Text
85    }
86
87    /// The date field extract style to use: `DateFieldExtractStyle`
88    fn date_field_extract_style(&self) -> DateFieldExtractStyle {
89        DateFieldExtractStyle::DatePart
90    }
91
92    /// The character length extraction style to use: `CharacterLengthStyle`
93    fn character_length_style(&self) -> CharacterLengthStyle {
94        CharacterLengthStyle::CharacterLength
95    }
96
97    /// The SQL type to use for Arrow Int64 unparsing
98    /// Most dialects use BigInt, but some, like MySQL, require SIGNED
99    fn int64_cast_dtype(&self) -> ast::DataType {
100        ast::DataType::BigInt(None)
101    }
102
103    /// The SQL type to use for Arrow Int32 unparsing
104    /// Most dialects use Integer, but some, like MySQL, require SIGNED
105    fn int32_cast_dtype(&self) -> ast::DataType {
106        ast::DataType::Integer(None)
107    }
108
109    /// The SQL type to use for Timestamp unparsing
110    /// Most dialects use Timestamp, but some, like MySQL, require Datetime
111    /// Some dialects like Dremio does not support WithTimeZone and requires always Timestamp
112    fn timestamp_cast_dtype(
113        &self,
114        _time_unit: &TimeUnit,
115        tz: &Option<Arc<str>>,
116    ) -> ast::DataType {
117        let tz_info = match tz {
118            Some(_) => TimezoneInfo::WithTimeZone,
119            None => TimezoneInfo::None,
120        };
121
122        ast::DataType::Timestamp(None, tz_info)
123    }
124
125    /// The SQL type to use for Arrow Date32 unparsing
126    /// Most dialects use Date, but some, like SQLite require TEXT
127    fn date32_cast_dtype(&self) -> ast::DataType {
128        ast::DataType::Date
129    }
130
131    /// Does the dialect support specifying column aliases as part of alias table definition?
132    /// (SELECT col1, col2 from my_table) AS my_table_alias(col1_alias, col2_alias)
133    fn supports_column_alias_in_table_alias(&self) -> bool {
134        true
135    }
136
137    /// Whether the dialect requires a table alias for any subquery in the FROM clause
138    /// This affects behavior when deriving logical plans for Sort, Limit, etc.
139    fn requires_derived_table_alias(&self) -> bool {
140        false
141    }
142
143    /// The division operator for the dialect
144    /// Most dialect uses ` BinaryOperator::Divide` (/)
145    /// But DuckDB dialect uses `BinaryOperator::DuckIntegerDivide` (//)
146    fn division_operator(&self) -> BinaryOperator {
147        BinaryOperator::Divide
148    }
149
150    /// Allows the dialect to override scalar function unparsing if the dialect has specific rules.
151    /// Returns None if the default unparsing should be used, or Some(ast::Expr) if there is
152    /// a custom implementation for the function.
153    fn scalar_function_to_sql_overrides(
154        &self,
155        _unparser: &Unparser,
156        _func_name: &str,
157        _args: &[Expr],
158    ) -> Result<Option<ast::Expr>> {
159        Ok(None)
160    }
161
162    /// Allows the dialect to choose to omit window frame in unparsing
163    /// based on function name and window frame bound
164    /// Returns false if specific function name / window frame bound indicates no window frame is needed in unparsing
165    fn window_func_support_window_frame(
166        &self,
167        _func_name: &str,
168        _start_bound: &WindowFrameBound,
169        _end_bound: &WindowFrameBound,
170    ) -> bool {
171        true
172    }
173
174    /// Extends the dialect's default rules for unparsing scalar functions.
175    /// This is useful for supporting application-specific UDFs or custom engine extensions.
176    fn with_custom_scalar_overrides(
177        self,
178        _handlers: Vec<(&str, ScalarFnToSqlHandler)>,
179    ) -> Self
180    where
181        Self: Sized,
182    {
183        unimplemented!("Custom scalar overrides are not supported by this dialect yet");
184    }
185
186    /// Allow to unparse a qualified column with a full qualified name
187    /// (e.g. catalog_name.schema_name.table_name.column_name)
188    /// Otherwise, the column will be unparsed with only the table name and column name
189    /// (e.g. table_name.column_name)
190    fn full_qualified_col(&self) -> bool {
191        false
192    }
193
194    /// Allow to unparse the unnest plan as [ast::TableFactor::UNNEST].
195    ///
196    /// Some dialects like BigQuery require UNNEST to be used in the FROM clause but
197    /// the LogicalPlan planner always puts UNNEST in the SELECT clause. This flag allows
198    /// to unparse the UNNEST plan as [ast::TableFactor::UNNEST] instead of a subquery.
199    fn unnest_as_table_factor(&self) -> bool {
200        false
201    }
202
203    /// Allows the dialect to override column alias unparsing if the dialect has specific rules.
204    /// Returns None if the default unparsing should be used, or Some(String) if there is
205    /// a custom implementation for the alias.
206    fn col_alias_overrides(&self, _alias: &str) -> Result<Option<String>> {
207        Ok(None)
208    }
209
210    /// Allows the dialect to support the QUALIFY clause
211    ///
212    /// Some dialects, like Postgres, do not support the QUALIFY clause
213    fn supports_qualify(&self) -> bool {
214        true
215    }
216
217    /// Allows the dialect to override logic of formatting datetime with tz into string.
218    fn timestamp_with_tz_to_string(&self, dt: DateTime<Tz>, _unit: TimeUnit) -> String {
219        dt.to_string()
220    }
221
222    /// Whether the dialect supports an empty select list such as `SELECT FROM table`.
223    ///
224    /// An empty select list returns rows without any column data, which is useful for:
225    /// - Counting rows: `SELECT FROM users WHERE active = true` (combined with `COUNT(*)`)
226    /// - Testing row existence without retrieving column data
227    /// - Performance optimization when only row counts or existence checks are needed
228    ///
229    /// # Default
230    ///
231    /// Returns `false` for maximum compatibility across SQL dialects. When `false`,
232    /// the unparser falls back to `SELECT 1 FROM table`.
233    ///
234    /// # Implementation Note
235    ///
236    /// Specific dialects should override this method to return `true` if they support
237    /// the empty select list syntax (e.g., PostgreSQL).
238    ///
239    /// # Example SQL Output
240    ///
241    /// ```sql
242    /// -- When supported:
243    /// SELECT FROM users WHERE active = true;
244    ///
245    /// -- Fallback when unsupported:
246    /// SELECT 1 FROM users WHERE active = true;
247    /// ```
248    fn supports_empty_select_list(&self) -> bool {
249        false
250    }
251}
252
253/// `IntervalStyle` to use for unparsing
254///
255/// <https://www.postgresql.org/docs/current/datatype-datetime.html#DATATYPE-INTERVAL-INPUT>
256/// different DBMS follows different standards, popular ones are:
257/// postgres_verbose: '2 years 15 months 100 weeks 99 hours 123456789 milliseconds' which is
258/// compatible with arrow display format, as well as duckdb
259/// sql standard format is '1-2' for year-month, or '1 10:10:10.123456' for day-time
260/// <https://www.contrib.andrew.cmu.edu/~shadow/sql/sql1992.txt>
261#[derive(Clone, Copy)]
262pub enum IntervalStyle {
263    PostgresVerbose,
264    SQLStandard,
265    MySQL,
266}
267
268/// Datetime subfield extraction style for unparsing
269///
270/// `<https://www.postgresql.org/docs/current/functions-datetime.html#FUNCTIONS-DATETIME-EXTRACT>`
271/// Different DBMSs follow different standards; popular ones are:
272/// date_part('YEAR', date '2001-02-16')
273/// EXTRACT(YEAR from date '2001-02-16')
274/// Some DBMSs, like Postgres, support both, whereas others like MySQL require EXTRACT.
275#[derive(Clone, Copy, PartialEq)]
276pub enum DateFieldExtractStyle {
277    DatePart,
278    Extract,
279    Strftime,
280}
281
282/// `CharacterLengthStyle` to use for unparsing
283///
284/// Different DBMSs uses different names for function calculating the number of characters in the string
285/// `Length` style uses length(x)
286/// `SQLStandard` style uses character_length(x)
287#[derive(Clone, Copy, PartialEq)]
288pub enum CharacterLengthStyle {
289    Length,
290    CharacterLength,
291}
292
293pub struct DefaultDialect {}
294
295impl Dialect for DefaultDialect {
296    fn identifier_quote_style(&self, identifier: &str) -> Option<char> {
297        let identifier_regex = Regex::new(r"^[a-zA-Z_][a-zA-Z0-9_]*$").unwrap();
298        let id_upper = identifier.to_uppercase();
299        // Special case ignore "ID", see https://github.com/sqlparser-rs/sqlparser-rs/issues/1382
300        // ID is a keyword in ClickHouse, but we don't want to quote it when unparsing SQL here
301        // Also quote identifiers with uppercase letters since unquoted identifiers are
302        // normalized to lowercase by the SQL parser, which would break case-sensitive schemas
303        let needs_quote = (id_upper != "ID" && ALL_KEYWORDS.contains(&id_upper.as_str()))
304            || !identifier_regex.is_match(identifier)
305            || identifier.chars().any(|c| c.is_ascii_uppercase());
306        if needs_quote { Some('"') } else { None }
307    }
308}
309
310pub struct PostgreSqlDialect {}
311
312impl Dialect for PostgreSqlDialect {
313    fn supports_qualify(&self) -> bool {
314        false
315    }
316
317    fn requires_derived_table_alias(&self) -> bool {
318        true
319    }
320
321    fn supports_empty_select_list(&self) -> bool {
322        true
323    }
324
325    fn identifier_quote_style(&self, _: &str) -> Option<char> {
326        Some('"')
327    }
328
329    fn interval_style(&self) -> IntervalStyle {
330        IntervalStyle::PostgresVerbose
331    }
332
333    fn float64_ast_dtype(&self) -> ast::DataType {
334        ast::DataType::DoublePrecision
335    }
336
337    fn scalar_function_to_sql_overrides(
338        &self,
339        unparser: &Unparser,
340        func_name: &str,
341        args: &[Expr],
342    ) -> Result<Option<ast::Expr>> {
343        if func_name == "round" {
344            return Ok(Some(
345                self.round_to_sql_enforce_numeric(unparser, func_name, args)?,
346            ));
347        }
348
349        Ok(None)
350    }
351}
352
353impl PostgreSqlDialect {
354    fn round_to_sql_enforce_numeric(
355        &self,
356        unparser: &Unparser,
357        func_name: &str,
358        args: &[Expr],
359    ) -> Result<ast::Expr> {
360        let mut args = unparser.function_args_to_sql(args)?;
361
362        // Enforce the first argument to be Numeric
363        if let Some(ast::FunctionArg::Unnamed(ast::FunctionArgExpr::Expr(expr))) =
364            args.first_mut()
365        {
366            if let ast::Expr::Cast { data_type, .. } = expr {
367                // Don't create an additional cast wrapper if we can update the existing one
368                *data_type = ast::DataType::Numeric(ast::ExactNumberInfo::None);
369            } else {
370                // Wrap the expression in a new cast
371                *expr = ast::Expr::Cast {
372                    kind: ast::CastKind::Cast,
373                    expr: Box::new(expr.clone()),
374                    data_type: ast::DataType::Numeric(ast::ExactNumberInfo::None),
375                    array: false,
376                    format: None,
377                };
378            }
379        }
380
381        Ok(ast::Expr::Function(Function {
382            name: ObjectName::from(vec![Ident {
383                value: func_name.to_string(),
384                quote_style: None,
385                span: Span::empty(),
386            }]),
387            args: ast::FunctionArguments::List(ast::FunctionArgumentList {
388                duplicate_treatment: None,
389                args,
390                clauses: vec![],
391            }),
392            filter: None,
393            null_treatment: None,
394            over: None,
395            within_group: vec![],
396            parameters: ast::FunctionArguments::None,
397            uses_odbc_syntax: false,
398        }))
399    }
400}
401
402#[derive(Default)]
403pub struct DuckDBDialect {
404    custom_scalar_fn_overrides: HashMap<String, ScalarFnToSqlHandler>,
405}
406
407impl DuckDBDialect {
408    #[must_use]
409    pub fn new() -> Self {
410        Self {
411            custom_scalar_fn_overrides: HashMap::new(),
412        }
413    }
414}
415
416impl Dialect for DuckDBDialect {
417    fn identifier_quote_style(&self, _: &str) -> Option<char> {
418        Some('"')
419    }
420
421    fn character_length_style(&self) -> CharacterLengthStyle {
422        CharacterLengthStyle::Length
423    }
424
425    fn division_operator(&self) -> BinaryOperator {
426        BinaryOperator::DuckIntegerDivide
427    }
428
429    fn with_custom_scalar_overrides(
430        mut self,
431        handlers: Vec<(&str, ScalarFnToSqlHandler)>,
432    ) -> Self {
433        for (func_name, handler) in handlers {
434            self.custom_scalar_fn_overrides
435                .insert(func_name.to_string(), handler);
436        }
437        self
438    }
439
440    fn scalar_function_to_sql_overrides(
441        &self,
442        unparser: &Unparser,
443        func_name: &str,
444        args: &[Expr],
445    ) -> Result<Option<ast::Expr>> {
446        if let Some(handler) = self.custom_scalar_fn_overrides.get(func_name) {
447            return handler(unparser, args);
448        }
449
450        if func_name == "character_length" {
451            return character_length_to_sql(
452                unparser,
453                self.character_length_style(),
454                args,
455            );
456        }
457
458        Ok(None)
459    }
460
461    fn timestamp_with_tz_to_string(&self, dt: DateTime<Tz>, unit: TimeUnit) -> String {
462        let format = match unit {
463            TimeUnit::Second => "%Y-%m-%d %H:%M:%S%:z",
464            TimeUnit::Millisecond => "%Y-%m-%d %H:%M:%S%.3f%:z",
465            TimeUnit::Microsecond => "%Y-%m-%d %H:%M:%S%.6f%:z",
466            TimeUnit::Nanosecond => "%Y-%m-%d %H:%M:%S%.9f%:z",
467        };
468
469        dt.format(format).to_string()
470    }
471}
472
473pub struct MySqlDialect {}
474
475impl Dialect for MySqlDialect {
476    fn supports_qualify(&self) -> bool {
477        false
478    }
479
480    fn identifier_quote_style(&self, _: &str) -> Option<char> {
481        Some('`')
482    }
483
484    fn supports_nulls_first_in_sort(&self) -> bool {
485        false
486    }
487
488    fn interval_style(&self) -> IntervalStyle {
489        IntervalStyle::MySQL
490    }
491
492    fn utf8_cast_dtype(&self) -> ast::DataType {
493        ast::DataType::Char(None)
494    }
495
496    fn large_utf8_cast_dtype(&self) -> ast::DataType {
497        ast::DataType::Char(None)
498    }
499
500    fn date_field_extract_style(&self) -> DateFieldExtractStyle {
501        DateFieldExtractStyle::Extract
502    }
503
504    fn int64_cast_dtype(&self) -> ast::DataType {
505        ast::DataType::Custom(ObjectName::from(vec![Ident::new("SIGNED")]), vec![])
506    }
507
508    fn int32_cast_dtype(&self) -> ast::DataType {
509        ast::DataType::Custom(ObjectName::from(vec![Ident::new("SIGNED")]), vec![])
510    }
511
512    fn timestamp_cast_dtype(
513        &self,
514        _time_unit: &TimeUnit,
515        _tz: &Option<Arc<str>>,
516    ) -> ast::DataType {
517        ast::DataType::Datetime(None)
518    }
519
520    fn requires_derived_table_alias(&self) -> bool {
521        true
522    }
523
524    fn scalar_function_to_sql_overrides(
525        &self,
526        unparser: &Unparser,
527        func_name: &str,
528        args: &[Expr],
529    ) -> Result<Option<ast::Expr>> {
530        if func_name == "date_part" {
531            return date_part_to_sql(unparser, self.date_field_extract_style(), args);
532        }
533
534        Ok(None)
535    }
536}
537
538pub struct SqliteDialect {}
539
540impl Dialect for SqliteDialect {
541    fn supports_qualify(&self) -> bool {
542        false
543    }
544
545    fn identifier_quote_style(&self, _: &str) -> Option<char> {
546        Some('`')
547    }
548
549    fn date_field_extract_style(&self) -> DateFieldExtractStyle {
550        DateFieldExtractStyle::Strftime
551    }
552
553    fn date32_cast_dtype(&self) -> ast::DataType {
554        ast::DataType::Text
555    }
556
557    fn character_length_style(&self) -> CharacterLengthStyle {
558        CharacterLengthStyle::Length
559    }
560
561    fn supports_column_alias_in_table_alias(&self) -> bool {
562        false
563    }
564
565    fn timestamp_cast_dtype(
566        &self,
567        _time_unit: &TimeUnit,
568        _tz: &Option<Arc<str>>,
569    ) -> ast::DataType {
570        ast::DataType::Text
571    }
572
573    fn scalar_function_to_sql_overrides(
574        &self,
575        unparser: &Unparser,
576        func_name: &str,
577        args: &[Expr],
578    ) -> Result<Option<ast::Expr>> {
579        match func_name {
580            "date_part" => {
581                date_part_to_sql(unparser, self.date_field_extract_style(), args)
582            }
583            "character_length" => {
584                character_length_to_sql(unparser, self.character_length_style(), args)
585            }
586            "from_unixtime" => sqlite_from_unixtime_to_sql(unparser, args),
587            "date_trunc" => sqlite_date_trunc_to_sql(unparser, args),
588            _ => Ok(None),
589        }
590    }
591}
592
593#[derive(Default)]
594pub struct BigQueryDialect {}
595
596impl Dialect for BigQueryDialect {
597    fn identifier_quote_style(&self, _: &str) -> Option<char> {
598        Some('`')
599    }
600
601    fn col_alias_overrides(&self, alias: &str) -> Result<Option<String>> {
602        // Check if alias contains any special characters not supported by BigQuery col names
603        // https://cloud.google.com/bigquery/docs/schemas#flexible-column-names
604        let special_chars: [char; 20] = [
605            '!', '"', '$', '(', ')', '*', ',', '.', '/', ';', '?', '@', '[', '\\', ']',
606            '^', '`', '{', '}', '~',
607        ];
608
609        if alias.chars().any(|c| special_chars.contains(&c)) {
610            let mut encoded_name = String::new();
611            for c in alias.chars() {
612                if special_chars.contains(&c) {
613                    encoded_name.push_str(&format!("_{}", c as u32));
614                } else {
615                    encoded_name.push(c);
616                }
617            }
618            Ok(Some(encoded_name))
619        } else {
620            Ok(Some(alias.to_string()))
621        }
622    }
623
624    fn unnest_as_table_factor(&self) -> bool {
625        true
626    }
627}
628
629impl BigQueryDialect {
630    #[must_use]
631    pub fn new() -> Self {
632        Self {}
633    }
634}
635
636pub struct CustomDialect {
637    identifier_quote_style: Option<char>,
638    supports_nulls_first_in_sort: bool,
639    use_timestamp_for_date64: bool,
640    interval_style: IntervalStyle,
641    float64_ast_dtype: ast::DataType,
642    utf8_cast_dtype: ast::DataType,
643    large_utf8_cast_dtype: ast::DataType,
644    date_field_extract_style: DateFieldExtractStyle,
645    character_length_style: CharacterLengthStyle,
646    int64_cast_dtype: ast::DataType,
647    int32_cast_dtype: ast::DataType,
648    timestamp_cast_dtype: ast::DataType,
649    timestamp_tz_cast_dtype: ast::DataType,
650    date32_cast_dtype: ast::DataType,
651    supports_column_alias_in_table_alias: bool,
652    requires_derived_table_alias: bool,
653    division_operator: BinaryOperator,
654    window_func_support_window_frame: bool,
655    full_qualified_col: bool,
656    unnest_as_table_factor: bool,
657}
658
659impl Default for CustomDialect {
660    fn default() -> Self {
661        Self {
662            identifier_quote_style: None,
663            supports_nulls_first_in_sort: true,
664            use_timestamp_for_date64: false,
665            interval_style: IntervalStyle::SQLStandard,
666            float64_ast_dtype: ast::DataType::Double(ast::ExactNumberInfo::None),
667            utf8_cast_dtype: ast::DataType::Varchar(None),
668            large_utf8_cast_dtype: ast::DataType::Text,
669            date_field_extract_style: DateFieldExtractStyle::DatePart,
670            character_length_style: CharacterLengthStyle::CharacterLength,
671            int64_cast_dtype: ast::DataType::BigInt(None),
672            int32_cast_dtype: ast::DataType::Integer(None),
673            timestamp_cast_dtype: ast::DataType::Timestamp(None, TimezoneInfo::None),
674            timestamp_tz_cast_dtype: ast::DataType::Timestamp(
675                None,
676                TimezoneInfo::WithTimeZone,
677            ),
678            date32_cast_dtype: ast::DataType::Date,
679            supports_column_alias_in_table_alias: true,
680            requires_derived_table_alias: false,
681            division_operator: BinaryOperator::Divide,
682            window_func_support_window_frame: true,
683            full_qualified_col: false,
684            unnest_as_table_factor: false,
685        }
686    }
687}
688
689impl Dialect for CustomDialect {
690    fn identifier_quote_style(&self, _: &str) -> Option<char> {
691        self.identifier_quote_style
692    }
693
694    fn supports_nulls_first_in_sort(&self) -> bool {
695        self.supports_nulls_first_in_sort
696    }
697
698    fn use_timestamp_for_date64(&self) -> bool {
699        self.use_timestamp_for_date64
700    }
701
702    fn interval_style(&self) -> IntervalStyle {
703        self.interval_style
704    }
705
706    fn float64_ast_dtype(&self) -> ast::DataType {
707        self.float64_ast_dtype.clone()
708    }
709
710    fn utf8_cast_dtype(&self) -> ast::DataType {
711        self.utf8_cast_dtype.clone()
712    }
713
714    fn large_utf8_cast_dtype(&self) -> ast::DataType {
715        self.large_utf8_cast_dtype.clone()
716    }
717
718    fn date_field_extract_style(&self) -> DateFieldExtractStyle {
719        self.date_field_extract_style
720    }
721
722    fn character_length_style(&self) -> CharacterLengthStyle {
723        self.character_length_style
724    }
725
726    fn int64_cast_dtype(&self) -> ast::DataType {
727        self.int64_cast_dtype.clone()
728    }
729
730    fn int32_cast_dtype(&self) -> ast::DataType {
731        self.int32_cast_dtype.clone()
732    }
733
734    fn timestamp_cast_dtype(
735        &self,
736        _time_unit: &TimeUnit,
737        tz: &Option<Arc<str>>,
738    ) -> ast::DataType {
739        if tz.is_some() {
740            self.timestamp_tz_cast_dtype.clone()
741        } else {
742            self.timestamp_cast_dtype.clone()
743        }
744    }
745
746    fn date32_cast_dtype(&self) -> ast::DataType {
747        self.date32_cast_dtype.clone()
748    }
749
750    fn supports_column_alias_in_table_alias(&self) -> bool {
751        self.supports_column_alias_in_table_alias
752    }
753
754    fn scalar_function_to_sql_overrides(
755        &self,
756        unparser: &Unparser,
757        func_name: &str,
758        args: &[Expr],
759    ) -> Result<Option<ast::Expr>> {
760        match func_name {
761            "date_part" => {
762                date_part_to_sql(unparser, self.date_field_extract_style(), args)
763            }
764            "character_length" => {
765                character_length_to_sql(unparser, self.character_length_style(), args)
766            }
767            _ => Ok(None),
768        }
769    }
770
771    fn requires_derived_table_alias(&self) -> bool {
772        self.requires_derived_table_alias
773    }
774
775    fn division_operator(&self) -> BinaryOperator {
776        self.division_operator.clone()
777    }
778
779    fn window_func_support_window_frame(
780        &self,
781        _func_name: &str,
782        _start_bound: &WindowFrameBound,
783        _end_bound: &WindowFrameBound,
784    ) -> bool {
785        self.window_func_support_window_frame
786    }
787
788    fn full_qualified_col(&self) -> bool {
789        self.full_qualified_col
790    }
791
792    fn unnest_as_table_factor(&self) -> bool {
793        self.unnest_as_table_factor
794    }
795}
796
797/// `CustomDialectBuilder` to build `CustomDialect` using builder pattern
798///
799///
800/// # Examples
801///
802/// Building a custom dialect with all default options set in CustomDialectBuilder::new()
803/// but with `use_timestamp_for_date64` overridden to `true`
804///
805/// ```
806/// use datafusion_sql::unparser::dialect::CustomDialectBuilder;
807/// let dialect = CustomDialectBuilder::new()
808///     .with_use_timestamp_for_date64(true)
809///     .build();
810/// ```
811pub struct CustomDialectBuilder {
812    identifier_quote_style: Option<char>,
813    supports_nulls_first_in_sort: bool,
814    use_timestamp_for_date64: bool,
815    interval_style: IntervalStyle,
816    float64_ast_dtype: ast::DataType,
817    utf8_cast_dtype: ast::DataType,
818    large_utf8_cast_dtype: ast::DataType,
819    date_field_extract_style: DateFieldExtractStyle,
820    character_length_style: CharacterLengthStyle,
821    int64_cast_dtype: ast::DataType,
822    int32_cast_dtype: ast::DataType,
823    timestamp_cast_dtype: ast::DataType,
824    timestamp_tz_cast_dtype: ast::DataType,
825    date32_cast_dtype: ast::DataType,
826    supports_column_alias_in_table_alias: bool,
827    requires_derived_table_alias: bool,
828    division_operator: BinaryOperator,
829    window_func_support_window_frame: bool,
830    full_qualified_col: bool,
831    unnest_as_table_factor: bool,
832}
833
834impl Default for CustomDialectBuilder {
835    fn default() -> Self {
836        Self::new()
837    }
838}
839
840impl CustomDialectBuilder {
841    pub fn new() -> Self {
842        Self {
843            identifier_quote_style: None,
844            supports_nulls_first_in_sort: true,
845            use_timestamp_for_date64: false,
846            interval_style: IntervalStyle::PostgresVerbose,
847            float64_ast_dtype: ast::DataType::Double(ast::ExactNumberInfo::None),
848            utf8_cast_dtype: ast::DataType::Varchar(None),
849            large_utf8_cast_dtype: ast::DataType::Text,
850            date_field_extract_style: DateFieldExtractStyle::DatePart,
851            character_length_style: CharacterLengthStyle::CharacterLength,
852            int64_cast_dtype: ast::DataType::BigInt(None),
853            int32_cast_dtype: ast::DataType::Integer(None),
854            timestamp_cast_dtype: ast::DataType::Timestamp(None, TimezoneInfo::None),
855            timestamp_tz_cast_dtype: ast::DataType::Timestamp(
856                None,
857                TimezoneInfo::WithTimeZone,
858            ),
859            date32_cast_dtype: ast::DataType::Date,
860            supports_column_alias_in_table_alias: true,
861            requires_derived_table_alias: false,
862            division_operator: BinaryOperator::Divide,
863            window_func_support_window_frame: true,
864            full_qualified_col: false,
865            unnest_as_table_factor: false,
866        }
867    }
868
869    pub fn build(self) -> CustomDialect {
870        CustomDialect {
871            identifier_quote_style: self.identifier_quote_style,
872            supports_nulls_first_in_sort: self.supports_nulls_first_in_sort,
873            use_timestamp_for_date64: self.use_timestamp_for_date64,
874            interval_style: self.interval_style,
875            float64_ast_dtype: self.float64_ast_dtype,
876            utf8_cast_dtype: self.utf8_cast_dtype,
877            large_utf8_cast_dtype: self.large_utf8_cast_dtype,
878            date_field_extract_style: self.date_field_extract_style,
879            character_length_style: self.character_length_style,
880            int64_cast_dtype: self.int64_cast_dtype,
881            int32_cast_dtype: self.int32_cast_dtype,
882            timestamp_cast_dtype: self.timestamp_cast_dtype,
883            timestamp_tz_cast_dtype: self.timestamp_tz_cast_dtype,
884            date32_cast_dtype: self.date32_cast_dtype,
885            supports_column_alias_in_table_alias: self
886                .supports_column_alias_in_table_alias,
887            requires_derived_table_alias: self.requires_derived_table_alias,
888            division_operator: self.division_operator,
889            window_func_support_window_frame: self.window_func_support_window_frame,
890            full_qualified_col: self.full_qualified_col,
891            unnest_as_table_factor: self.unnest_as_table_factor,
892        }
893    }
894
895    /// Customize the dialect with a specific identifier quote style, e.g. '`', '"'
896    pub fn with_identifier_quote_style(mut self, identifier_quote_style: char) -> Self {
897        self.identifier_quote_style = Some(identifier_quote_style);
898        self
899    }
900
901    /// Customize the dialect to support `NULLS FIRST` in `ORDER BY` clauses
902    pub fn with_supports_nulls_first_in_sort(
903        mut self,
904        supports_nulls_first_in_sort: bool,
905    ) -> Self {
906        self.supports_nulls_first_in_sort = supports_nulls_first_in_sort;
907        self
908    }
909
910    /// Customize the dialect to uses TIMESTAMP when casting Date64 rather than DATETIME
911    pub fn with_use_timestamp_for_date64(
912        mut self,
913        use_timestamp_for_date64: bool,
914    ) -> Self {
915        self.use_timestamp_for_date64 = use_timestamp_for_date64;
916        self
917    }
918
919    /// Customize the dialect with a specific interval style listed in `IntervalStyle`
920    pub fn with_interval_style(mut self, interval_style: IntervalStyle) -> Self {
921        self.interval_style = interval_style;
922        self
923    }
924
925    /// Customize the dialect with a specific character_length_style listed in `CharacterLengthStyle`
926    pub fn with_character_length_style(
927        mut self,
928        character_length_style: CharacterLengthStyle,
929    ) -> Self {
930        self.character_length_style = character_length_style;
931        self
932    }
933
934    /// Customize the dialect with a specific SQL type for Float64 casting: DOUBLE, DOUBLE PRECISION, etc.
935    pub fn with_float64_ast_dtype(mut self, float64_ast_dtype: ast::DataType) -> Self {
936        self.float64_ast_dtype = float64_ast_dtype;
937        self
938    }
939
940    /// Customize the dialect with a specific SQL type for Utf8 casting: VARCHAR, CHAR, etc.
941    pub fn with_utf8_cast_dtype(mut self, utf8_cast_dtype: ast::DataType) -> Self {
942        self.utf8_cast_dtype = utf8_cast_dtype;
943        self
944    }
945
946    /// Customize the dialect with a specific SQL type for LargeUtf8 casting: TEXT, CHAR, etc.
947    pub fn with_large_utf8_cast_dtype(
948        mut self,
949        large_utf8_cast_dtype: ast::DataType,
950    ) -> Self {
951        self.large_utf8_cast_dtype = large_utf8_cast_dtype;
952        self
953    }
954
955    /// Customize the dialect with a specific date field extract style listed in `DateFieldExtractStyle`
956    pub fn with_date_field_extract_style(
957        mut self,
958        date_field_extract_style: DateFieldExtractStyle,
959    ) -> Self {
960        self.date_field_extract_style = date_field_extract_style;
961        self
962    }
963
964    /// Customize the dialect with a specific SQL type for Int64 casting: BigInt, SIGNED, etc.
965    pub fn with_int64_cast_dtype(mut self, int64_cast_dtype: ast::DataType) -> Self {
966        self.int64_cast_dtype = int64_cast_dtype;
967        self
968    }
969
970    /// Customize the dialect with a specific SQL type for Int32 casting: Integer, SIGNED, etc.
971    pub fn with_int32_cast_dtype(mut self, int32_cast_dtype: ast::DataType) -> Self {
972        self.int32_cast_dtype = int32_cast_dtype;
973        self
974    }
975
976    /// Customize the dialect with a specific SQL type for Timestamp casting: Timestamp, Datetime, etc.
977    pub fn with_timestamp_cast_dtype(
978        mut self,
979        timestamp_cast_dtype: ast::DataType,
980        timestamp_tz_cast_dtype: ast::DataType,
981    ) -> Self {
982        self.timestamp_cast_dtype = timestamp_cast_dtype;
983        self.timestamp_tz_cast_dtype = timestamp_tz_cast_dtype;
984        self
985    }
986
987    pub fn with_date32_cast_dtype(mut self, date32_cast_dtype: ast::DataType) -> Self {
988        self.date32_cast_dtype = date32_cast_dtype;
989        self
990    }
991
992    /// Customize the dialect to support column aliases as part of alias table definition
993    pub fn with_supports_column_alias_in_table_alias(
994        mut self,
995        supports_column_alias_in_table_alias: bool,
996    ) -> Self {
997        self.supports_column_alias_in_table_alias = supports_column_alias_in_table_alias;
998        self
999    }
1000
1001    pub fn with_requires_derived_table_alias(
1002        mut self,
1003        requires_derived_table_alias: bool,
1004    ) -> Self {
1005        self.requires_derived_table_alias = requires_derived_table_alias;
1006        self
1007    }
1008
1009    pub fn with_division_operator(mut self, division_operator: BinaryOperator) -> Self {
1010        self.division_operator = division_operator;
1011        self
1012    }
1013
1014    pub fn with_window_func_support_window_frame(
1015        mut self,
1016        window_func_support_window_frame: bool,
1017    ) -> Self {
1018        self.window_func_support_window_frame = window_func_support_window_frame;
1019        self
1020    }
1021
1022    /// Customize the dialect to allow full qualified column names
1023    pub fn with_full_qualified_col(mut self, full_qualified_col: bool) -> Self {
1024        self.full_qualified_col = full_qualified_col;
1025        self
1026    }
1027
1028    pub fn with_unnest_as_table_factor(mut self, unnest_as_table_factor: bool) -> Self {
1029        self.unnest_as_table_factor = unnest_as_table_factor;
1030        self
1031    }
1032}