datafusion_sql/unparser/
dialect.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use std::{collections::HashMap, sync::Arc};
19
20use super::{
21    utils::character_length_to_sql, utils::date_part_to_sql,
22    utils::sqlite_date_trunc_to_sql, utils::sqlite_from_unixtime_to_sql, Unparser,
23};
24use arrow::datatypes::TimeUnit;
25use datafusion_common::Result;
26use datafusion_expr::Expr;
27use regex::Regex;
28use sqlparser::tokenizer::Span;
29use sqlparser::{
30    ast::{
31        self, BinaryOperator, Function, Ident, ObjectName, TimezoneInfo, WindowFrameBound,
32    },
33    keywords::ALL_KEYWORDS,
34};
35
36pub type ScalarFnToSqlHandler =
37    Box<dyn Fn(&Unparser, &[Expr]) -> Result<Option<ast::Expr>> + Send + Sync>;
38
39/// `Dialect` to use for Unparsing
40///
41/// The default dialect tries to avoid quoting identifiers unless necessary (e.g. `a` instead of `"a"`)
42/// but this behavior can be overridden as needed
43///
44/// **Note**: This trait will eventually be replaced by the Dialect in the SQLparser package
45///
46/// See <https://github.com/sqlparser-rs/sqlparser-rs/pull/1170>
47/// See also the discussion in <https://github.com/apache/datafusion/pull/10625>
48pub trait Dialect: Send + Sync {
49    /// Return the character used to quote identifiers.
50    fn identifier_quote_style(&self, _identifier: &str) -> Option<char>;
51
52    /// Does the dialect support specifying `NULLS FIRST/LAST` in `ORDER BY` clauses?
53    fn supports_nulls_first_in_sort(&self) -> bool {
54        true
55    }
56
57    /// Does the dialect use TIMESTAMP to represent Date64 rather than DATETIME?
58    /// E.g. Trino, Athena and Dremio does not have DATETIME data type
59    fn use_timestamp_for_date64(&self) -> bool {
60        false
61    }
62
63    fn interval_style(&self) -> IntervalStyle {
64        IntervalStyle::PostgresVerbose
65    }
66
67    /// Does the dialect use DOUBLE PRECISION to represent Float64 rather than DOUBLE?
68    /// E.g. Postgres uses DOUBLE PRECISION instead of DOUBLE
69    fn float64_ast_dtype(&self) -> ast::DataType {
70        ast::DataType::Double(ast::ExactNumberInfo::None)
71    }
72
73    /// The SQL type to use for Arrow Utf8 unparsing
74    /// Most dialects use VARCHAR, but some, like MySQL, require CHAR
75    fn utf8_cast_dtype(&self) -> ast::DataType {
76        ast::DataType::Varchar(None)
77    }
78
79    /// The SQL type to use for Arrow LargeUtf8 unparsing
80    /// Most dialects use TEXT, but some, like MySQL, require CHAR
81    fn large_utf8_cast_dtype(&self) -> ast::DataType {
82        ast::DataType::Text
83    }
84
85    /// The date field extract style to use: `DateFieldExtractStyle`
86    fn date_field_extract_style(&self) -> DateFieldExtractStyle {
87        DateFieldExtractStyle::DatePart
88    }
89
90    /// The character length extraction style to use: `CharacterLengthStyle`
91    fn character_length_style(&self) -> CharacterLengthStyle {
92        CharacterLengthStyle::CharacterLength
93    }
94
95    /// The SQL type to use for Arrow Int64 unparsing
96    /// Most dialects use BigInt, but some, like MySQL, require SIGNED
97    fn int64_cast_dtype(&self) -> ast::DataType {
98        ast::DataType::BigInt(None)
99    }
100
101    /// The SQL type to use for Arrow Int32 unparsing
102    /// Most dialects use Integer, but some, like MySQL, require SIGNED
103    fn int32_cast_dtype(&self) -> ast::DataType {
104        ast::DataType::Integer(None)
105    }
106
107    /// The SQL type to use for Timestamp unparsing
108    /// Most dialects use Timestamp, but some, like MySQL, require Datetime
109    /// Some dialects like Dremio does not support WithTimeZone and requires always Timestamp
110    fn timestamp_cast_dtype(
111        &self,
112        _time_unit: &TimeUnit,
113        tz: &Option<Arc<str>>,
114    ) -> ast::DataType {
115        let tz_info = match tz {
116            Some(_) => TimezoneInfo::WithTimeZone,
117            None => TimezoneInfo::None,
118        };
119
120        ast::DataType::Timestamp(None, tz_info)
121    }
122
123    /// The SQL type to use for Arrow Date32 unparsing
124    /// Most dialects use Date, but some, like SQLite require TEXT
125    fn date32_cast_dtype(&self) -> ast::DataType {
126        ast::DataType::Date
127    }
128
129    /// Does the dialect support specifying column aliases as part of alias table definition?
130    /// (SELECT col1, col2 from my_table) AS my_table_alias(col1_alias, col2_alias)
131    fn supports_column_alias_in_table_alias(&self) -> bool {
132        true
133    }
134
135    /// Whether the dialect requires a table alias for any subquery in the FROM clause
136    /// This affects behavior when deriving logical plans for Sort, Limit, etc.
137    fn requires_derived_table_alias(&self) -> bool {
138        false
139    }
140
141    /// The division operator for the dialect
142    /// Most dialect uses ` BinaryOperator::Divide` (/)
143    /// But DuckDB dialect uses `BinaryOperator::DuckIntegerDivide` (//)
144    fn division_operator(&self) -> BinaryOperator {
145        BinaryOperator::Divide
146    }
147
148    /// Allows the dialect to override scalar function unparsing if the dialect has specific rules.
149    /// Returns None if the default unparsing should be used, or Some(ast::Expr) if there is
150    /// a custom implementation for the function.
151    fn scalar_function_to_sql_overrides(
152        &self,
153        _unparser: &Unparser,
154        _func_name: &str,
155        _args: &[Expr],
156    ) -> Result<Option<ast::Expr>> {
157        Ok(None)
158    }
159
160    /// Allows the dialect to choose to omit window frame in unparsing
161    /// based on function name and window frame bound
162    /// Returns false if specific function name / window frame bound indicates no window frame is needed in unparsing
163    fn window_func_support_window_frame(
164        &self,
165        _func_name: &str,
166        _start_bound: &WindowFrameBound,
167        _end_bound: &WindowFrameBound,
168    ) -> bool {
169        true
170    }
171
172    /// Extends the dialect's default rules for unparsing scalar functions.
173    /// This is useful for supporting application-specific UDFs or custom engine extensions.
174    fn with_custom_scalar_overrides(
175        self,
176        _handlers: Vec<(&str, ScalarFnToSqlHandler)>,
177    ) -> Self
178    where
179        Self: Sized,
180    {
181        unimplemented!("Custom scalar overrides are not supported by this dialect yet");
182    }
183
184    /// Allow to unparse a qualified column with a full qualified name
185    /// (e.g. catalog_name.schema_name.table_name.column_name)
186    /// Otherwise, the column will be unparsed with only the table name and column name
187    /// (e.g. table_name.column_name)
188    fn full_qualified_col(&self) -> bool {
189        false
190    }
191
192    /// Allow to unparse the unnest plan as [ast::TableFactor::UNNEST].
193    ///
194    /// Some dialects like BigQuery require UNNEST to be used in the FROM clause but
195    /// the LogicalPlan planner always puts UNNEST in the SELECT clause. This flag allows
196    /// to unparse the UNNEST plan as [ast::TableFactor::UNNEST] instead of a subquery.
197    fn unnest_as_table_factor(&self) -> bool {
198        false
199    }
200
201    /// Allows the dialect to override column alias unparsing if the dialect has specific rules.
202    /// Returns None if the default unparsing should be used, or Some(String) if there is
203    /// a custom implementation for the alias.
204    fn col_alias_overrides(&self, _alias: &str) -> Result<Option<String>> {
205        Ok(None)
206    }
207}
208
209/// `IntervalStyle` to use for unparsing
210///
211/// <https://www.postgresql.org/docs/current/datatype-datetime.html#DATATYPE-INTERVAL-INPUT>
212/// different DBMS follows different standards, popular ones are:
213/// postgres_verbose: '2 years 15 months 100 weeks 99 hours 123456789 milliseconds' which is
214/// compatible with arrow display format, as well as duckdb
215/// sql standard format is '1-2' for year-month, or '1 10:10:10.123456' for day-time
216/// <https://www.contrib.andrew.cmu.edu/~shadow/sql/sql1992.txt>
217#[derive(Clone, Copy)]
218pub enum IntervalStyle {
219    PostgresVerbose,
220    SQLStandard,
221    MySQL,
222}
223
224/// Datetime subfield extraction style for unparsing
225///
226/// `<https://www.postgresql.org/docs/current/functions-datetime.html#FUNCTIONS-DATETIME-EXTRACT>`
227/// Different DBMSs follow different standards; popular ones are:
228/// date_part('YEAR', date '2001-02-16')
229/// EXTRACT(YEAR from date '2001-02-16')
230/// Some DBMSs, like Postgres, support both, whereas others like MySQL require EXTRACT.
231#[derive(Clone, Copy, PartialEq)]
232pub enum DateFieldExtractStyle {
233    DatePart,
234    Extract,
235    Strftime,
236}
237
238/// `CharacterLengthStyle` to use for unparsing
239///
240/// Different DBMSs uses different names for function calculating the number of characters in the string
241/// `Length` style uses length(x)
242/// `SQLStandard` style uses character_length(x)
243#[derive(Clone, Copy, PartialEq)]
244pub enum CharacterLengthStyle {
245    Length,
246    CharacterLength,
247}
248
249pub struct DefaultDialect {}
250
251impl Dialect for DefaultDialect {
252    fn identifier_quote_style(&self, identifier: &str) -> Option<char> {
253        let identifier_regex = Regex::new(r"^[a-zA-Z_][a-zA-Z0-9_]*$").unwrap();
254        let id_upper = identifier.to_uppercase();
255        // Special case ignore "ID", see https://github.com/sqlparser-rs/sqlparser-rs/issues/1382
256        // ID is a keyword in ClickHouse, but we don't want to quote it when unparsing SQL here
257        if (id_upper != "ID" && ALL_KEYWORDS.contains(&id_upper.as_str()))
258            || !identifier_regex.is_match(identifier)
259        {
260            Some('"')
261        } else {
262            None
263        }
264    }
265}
266
267pub struct PostgreSqlDialect {}
268
269impl Dialect for PostgreSqlDialect {
270    fn identifier_quote_style(&self, _: &str) -> Option<char> {
271        Some('"')
272    }
273
274    fn interval_style(&self) -> IntervalStyle {
275        IntervalStyle::PostgresVerbose
276    }
277
278    fn float64_ast_dtype(&self) -> ast::DataType {
279        ast::DataType::DoublePrecision
280    }
281
282    fn scalar_function_to_sql_overrides(
283        &self,
284        unparser: &Unparser,
285        func_name: &str,
286        args: &[Expr],
287    ) -> Result<Option<ast::Expr>> {
288        if func_name == "round" {
289            return Ok(Some(
290                self.round_to_sql_enforce_numeric(unparser, func_name, args)?,
291            ));
292        }
293
294        Ok(None)
295    }
296}
297
298impl PostgreSqlDialect {
299    fn round_to_sql_enforce_numeric(
300        &self,
301        unparser: &Unparser,
302        func_name: &str,
303        args: &[Expr],
304    ) -> Result<ast::Expr> {
305        let mut args = unparser.function_args_to_sql(args)?;
306
307        // Enforce the first argument to be Numeric
308        if let Some(ast::FunctionArg::Unnamed(ast::FunctionArgExpr::Expr(expr))) =
309            args.first_mut()
310        {
311            if let ast::Expr::Cast { data_type, .. } = expr {
312                // Don't create an additional cast wrapper if we can update the existing one
313                *data_type = ast::DataType::Numeric(ast::ExactNumberInfo::None);
314            } else {
315                // Wrap the expression in a new cast
316                *expr = ast::Expr::Cast {
317                    kind: ast::CastKind::Cast,
318                    expr: Box::new(expr.clone()),
319                    data_type: ast::DataType::Numeric(ast::ExactNumberInfo::None),
320                    format: None,
321                };
322            }
323        }
324
325        Ok(ast::Expr::Function(Function {
326            name: ObjectName::from(vec![Ident {
327                value: func_name.to_string(),
328                quote_style: None,
329                span: Span::empty(),
330            }]),
331            args: ast::FunctionArguments::List(ast::FunctionArgumentList {
332                duplicate_treatment: None,
333                args,
334                clauses: vec![],
335            }),
336            filter: None,
337            null_treatment: None,
338            over: None,
339            within_group: vec![],
340            parameters: ast::FunctionArguments::None,
341            uses_odbc_syntax: false,
342        }))
343    }
344}
345
346#[derive(Default)]
347pub struct DuckDBDialect {
348    custom_scalar_fn_overrides: HashMap<String, ScalarFnToSqlHandler>,
349}
350
351impl DuckDBDialect {
352    #[must_use]
353    pub fn new() -> Self {
354        Self {
355            custom_scalar_fn_overrides: HashMap::new(),
356        }
357    }
358}
359
360impl Dialect for DuckDBDialect {
361    fn identifier_quote_style(&self, _: &str) -> Option<char> {
362        Some('"')
363    }
364
365    fn character_length_style(&self) -> CharacterLengthStyle {
366        CharacterLengthStyle::Length
367    }
368
369    fn division_operator(&self) -> BinaryOperator {
370        BinaryOperator::DuckIntegerDivide
371    }
372
373    fn with_custom_scalar_overrides(
374        mut self,
375        handlers: Vec<(&str, ScalarFnToSqlHandler)>,
376    ) -> Self {
377        for (func_name, handler) in handlers {
378            self.custom_scalar_fn_overrides
379                .insert(func_name.to_string(), handler);
380        }
381        self
382    }
383
384    fn scalar_function_to_sql_overrides(
385        &self,
386        unparser: &Unparser,
387        func_name: &str,
388        args: &[Expr],
389    ) -> Result<Option<ast::Expr>> {
390        if let Some(handler) = self.custom_scalar_fn_overrides.get(func_name) {
391            return handler(unparser, args);
392        }
393
394        if func_name == "character_length" {
395            return character_length_to_sql(
396                unparser,
397                self.character_length_style(),
398                args,
399            );
400        }
401
402        Ok(None)
403    }
404}
405
406pub struct MySqlDialect {}
407
408impl Dialect for MySqlDialect {
409    fn identifier_quote_style(&self, _: &str) -> Option<char> {
410        Some('`')
411    }
412
413    fn supports_nulls_first_in_sort(&self) -> bool {
414        false
415    }
416
417    fn interval_style(&self) -> IntervalStyle {
418        IntervalStyle::MySQL
419    }
420
421    fn utf8_cast_dtype(&self) -> ast::DataType {
422        ast::DataType::Char(None)
423    }
424
425    fn large_utf8_cast_dtype(&self) -> ast::DataType {
426        ast::DataType::Char(None)
427    }
428
429    fn date_field_extract_style(&self) -> DateFieldExtractStyle {
430        DateFieldExtractStyle::Extract
431    }
432
433    fn int64_cast_dtype(&self) -> ast::DataType {
434        ast::DataType::Custom(ObjectName::from(vec![Ident::new("SIGNED")]), vec![])
435    }
436
437    fn int32_cast_dtype(&self) -> ast::DataType {
438        ast::DataType::Custom(ObjectName::from(vec![Ident::new("SIGNED")]), vec![])
439    }
440
441    fn timestamp_cast_dtype(
442        &self,
443        _time_unit: &TimeUnit,
444        _tz: &Option<Arc<str>>,
445    ) -> ast::DataType {
446        ast::DataType::Datetime(None)
447    }
448
449    fn requires_derived_table_alias(&self) -> bool {
450        true
451    }
452
453    fn scalar_function_to_sql_overrides(
454        &self,
455        unparser: &Unparser,
456        func_name: &str,
457        args: &[Expr],
458    ) -> Result<Option<ast::Expr>> {
459        if func_name == "date_part" {
460            return date_part_to_sql(unparser, self.date_field_extract_style(), args);
461        }
462
463        Ok(None)
464    }
465}
466
467pub struct SqliteDialect {}
468
469impl Dialect for SqliteDialect {
470    fn identifier_quote_style(&self, _: &str) -> Option<char> {
471        Some('`')
472    }
473
474    fn date_field_extract_style(&self) -> DateFieldExtractStyle {
475        DateFieldExtractStyle::Strftime
476    }
477
478    fn date32_cast_dtype(&self) -> ast::DataType {
479        ast::DataType::Text
480    }
481
482    fn character_length_style(&self) -> CharacterLengthStyle {
483        CharacterLengthStyle::Length
484    }
485
486    fn supports_column_alias_in_table_alias(&self) -> bool {
487        false
488    }
489
490    fn scalar_function_to_sql_overrides(
491        &self,
492        unparser: &Unparser,
493        func_name: &str,
494        args: &[Expr],
495    ) -> Result<Option<ast::Expr>> {
496        match func_name {
497            "date_part" => {
498                date_part_to_sql(unparser, self.date_field_extract_style(), args)
499            }
500            "character_length" => {
501                character_length_to_sql(unparser, self.character_length_style(), args)
502            }
503            "from_unixtime" => sqlite_from_unixtime_to_sql(unparser, args),
504            "date_trunc" => sqlite_date_trunc_to_sql(unparser, args),
505            _ => Ok(None),
506        }
507    }
508}
509
510#[derive(Default)]
511pub struct BigQueryDialect {}
512
513impl Dialect for BigQueryDialect {
514    fn identifier_quote_style(&self, _: &str) -> Option<char> {
515        Some('`')
516    }
517
518    fn col_alias_overrides(&self, alias: &str) -> Result<Option<String>> {
519        // Check if alias contains any special characters not supported by BigQuery col names
520        // https://cloud.google.com/bigquery/docs/schemas#flexible-column-names
521        let special_chars: [char; 20] = [
522            '!', '"', '$', '(', ')', '*', ',', '.', '/', ';', '?', '@', '[', '\\', ']',
523            '^', '`', '{', '}', '~',
524        ];
525
526        if alias.chars().any(|c| special_chars.contains(&c)) {
527            let mut encoded_name = String::new();
528            for c in alias.chars() {
529                if special_chars.contains(&c) {
530                    encoded_name.push_str(&format!("_{}", c as u32));
531                } else {
532                    encoded_name.push(c);
533                }
534            }
535            Ok(Some(encoded_name))
536        } else {
537            Ok(Some(alias.to_string()))
538        }
539    }
540
541    fn unnest_as_table_factor(&self) -> bool {
542        true
543    }
544}
545
546impl BigQueryDialect {
547    #[must_use]
548    pub fn new() -> Self {
549        Self {}
550    }
551}
552
553pub struct CustomDialect {
554    identifier_quote_style: Option<char>,
555    supports_nulls_first_in_sort: bool,
556    use_timestamp_for_date64: bool,
557    interval_style: IntervalStyle,
558    float64_ast_dtype: ast::DataType,
559    utf8_cast_dtype: ast::DataType,
560    large_utf8_cast_dtype: ast::DataType,
561    date_field_extract_style: DateFieldExtractStyle,
562    character_length_style: CharacterLengthStyle,
563    int64_cast_dtype: ast::DataType,
564    int32_cast_dtype: ast::DataType,
565    timestamp_cast_dtype: ast::DataType,
566    timestamp_tz_cast_dtype: ast::DataType,
567    date32_cast_dtype: ast::DataType,
568    supports_column_alias_in_table_alias: bool,
569    requires_derived_table_alias: bool,
570    division_operator: BinaryOperator,
571    window_func_support_window_frame: bool,
572    full_qualified_col: bool,
573    unnest_as_table_factor: bool,
574}
575
576impl Default for CustomDialect {
577    fn default() -> Self {
578        Self {
579            identifier_quote_style: None,
580            supports_nulls_first_in_sort: true,
581            use_timestamp_for_date64: false,
582            interval_style: IntervalStyle::SQLStandard,
583            float64_ast_dtype: ast::DataType::Double(ast::ExactNumberInfo::None),
584            utf8_cast_dtype: ast::DataType::Varchar(None),
585            large_utf8_cast_dtype: ast::DataType::Text,
586            date_field_extract_style: DateFieldExtractStyle::DatePart,
587            character_length_style: CharacterLengthStyle::CharacterLength,
588            int64_cast_dtype: ast::DataType::BigInt(None),
589            int32_cast_dtype: ast::DataType::Integer(None),
590            timestamp_cast_dtype: ast::DataType::Timestamp(None, TimezoneInfo::None),
591            timestamp_tz_cast_dtype: ast::DataType::Timestamp(
592                None,
593                TimezoneInfo::WithTimeZone,
594            ),
595            date32_cast_dtype: ast::DataType::Date,
596            supports_column_alias_in_table_alias: true,
597            requires_derived_table_alias: false,
598            division_operator: BinaryOperator::Divide,
599            window_func_support_window_frame: true,
600            full_qualified_col: false,
601            unnest_as_table_factor: false,
602        }
603    }
604}
605
606impl Dialect for CustomDialect {
607    fn identifier_quote_style(&self, _: &str) -> Option<char> {
608        self.identifier_quote_style
609    }
610
611    fn supports_nulls_first_in_sort(&self) -> bool {
612        self.supports_nulls_first_in_sort
613    }
614
615    fn use_timestamp_for_date64(&self) -> bool {
616        self.use_timestamp_for_date64
617    }
618
619    fn interval_style(&self) -> IntervalStyle {
620        self.interval_style
621    }
622
623    fn float64_ast_dtype(&self) -> ast::DataType {
624        self.float64_ast_dtype.clone()
625    }
626
627    fn utf8_cast_dtype(&self) -> ast::DataType {
628        self.utf8_cast_dtype.clone()
629    }
630
631    fn large_utf8_cast_dtype(&self) -> ast::DataType {
632        self.large_utf8_cast_dtype.clone()
633    }
634
635    fn date_field_extract_style(&self) -> DateFieldExtractStyle {
636        self.date_field_extract_style
637    }
638
639    fn character_length_style(&self) -> CharacterLengthStyle {
640        self.character_length_style
641    }
642
643    fn int64_cast_dtype(&self) -> ast::DataType {
644        self.int64_cast_dtype.clone()
645    }
646
647    fn int32_cast_dtype(&self) -> ast::DataType {
648        self.int32_cast_dtype.clone()
649    }
650
651    fn timestamp_cast_dtype(
652        &self,
653        _time_unit: &TimeUnit,
654        tz: &Option<Arc<str>>,
655    ) -> ast::DataType {
656        if tz.is_some() {
657            self.timestamp_tz_cast_dtype.clone()
658        } else {
659            self.timestamp_cast_dtype.clone()
660        }
661    }
662
663    fn date32_cast_dtype(&self) -> ast::DataType {
664        self.date32_cast_dtype.clone()
665    }
666
667    fn supports_column_alias_in_table_alias(&self) -> bool {
668        self.supports_column_alias_in_table_alias
669    }
670
671    fn scalar_function_to_sql_overrides(
672        &self,
673        unparser: &Unparser,
674        func_name: &str,
675        args: &[Expr],
676    ) -> Result<Option<ast::Expr>> {
677        match func_name {
678            "date_part" => {
679                date_part_to_sql(unparser, self.date_field_extract_style(), args)
680            }
681            "character_length" => {
682                character_length_to_sql(unparser, self.character_length_style(), args)
683            }
684            _ => Ok(None),
685        }
686    }
687
688    fn requires_derived_table_alias(&self) -> bool {
689        self.requires_derived_table_alias
690    }
691
692    fn division_operator(&self) -> BinaryOperator {
693        self.division_operator.clone()
694    }
695
696    fn window_func_support_window_frame(
697        &self,
698        _func_name: &str,
699        _start_bound: &WindowFrameBound,
700        _end_bound: &WindowFrameBound,
701    ) -> bool {
702        self.window_func_support_window_frame
703    }
704
705    fn full_qualified_col(&self) -> bool {
706        self.full_qualified_col
707    }
708
709    fn unnest_as_table_factor(&self) -> bool {
710        self.unnest_as_table_factor
711    }
712}
713
714/// `CustomDialectBuilder` to build `CustomDialect` using builder pattern
715///
716///
717/// # Examples
718///
719/// Building a custom dialect with all default options set in CustomDialectBuilder::new()
720/// but with `use_timestamp_for_date64` overridden to `true`
721///
722/// ```
723/// use datafusion_sql::unparser::dialect::CustomDialectBuilder;
724/// let dialect = CustomDialectBuilder::new()
725///     .with_use_timestamp_for_date64(true)
726///     .build();
727/// ```
728pub struct CustomDialectBuilder {
729    identifier_quote_style: Option<char>,
730    supports_nulls_first_in_sort: bool,
731    use_timestamp_for_date64: bool,
732    interval_style: IntervalStyle,
733    float64_ast_dtype: ast::DataType,
734    utf8_cast_dtype: ast::DataType,
735    large_utf8_cast_dtype: ast::DataType,
736    date_field_extract_style: DateFieldExtractStyle,
737    character_length_style: CharacterLengthStyle,
738    int64_cast_dtype: ast::DataType,
739    int32_cast_dtype: ast::DataType,
740    timestamp_cast_dtype: ast::DataType,
741    timestamp_tz_cast_dtype: ast::DataType,
742    date32_cast_dtype: ast::DataType,
743    supports_column_alias_in_table_alias: bool,
744    requires_derived_table_alias: bool,
745    division_operator: BinaryOperator,
746    window_func_support_window_frame: bool,
747    full_qualified_col: bool,
748    unnest_as_table_factor: bool,
749}
750
751impl Default for CustomDialectBuilder {
752    fn default() -> Self {
753        Self::new()
754    }
755}
756
757impl CustomDialectBuilder {
758    pub fn new() -> Self {
759        Self {
760            identifier_quote_style: None,
761            supports_nulls_first_in_sort: true,
762            use_timestamp_for_date64: false,
763            interval_style: IntervalStyle::PostgresVerbose,
764            float64_ast_dtype: ast::DataType::Double(ast::ExactNumberInfo::None),
765            utf8_cast_dtype: ast::DataType::Varchar(None),
766            large_utf8_cast_dtype: ast::DataType::Text,
767            date_field_extract_style: DateFieldExtractStyle::DatePart,
768            character_length_style: CharacterLengthStyle::CharacterLength,
769            int64_cast_dtype: ast::DataType::BigInt(None),
770            int32_cast_dtype: ast::DataType::Integer(None),
771            timestamp_cast_dtype: ast::DataType::Timestamp(None, TimezoneInfo::None),
772            timestamp_tz_cast_dtype: ast::DataType::Timestamp(
773                None,
774                TimezoneInfo::WithTimeZone,
775            ),
776            date32_cast_dtype: ast::DataType::Date,
777            supports_column_alias_in_table_alias: true,
778            requires_derived_table_alias: false,
779            division_operator: BinaryOperator::Divide,
780            window_func_support_window_frame: true,
781            full_qualified_col: false,
782            unnest_as_table_factor: false,
783        }
784    }
785
786    pub fn build(self) -> CustomDialect {
787        CustomDialect {
788            identifier_quote_style: self.identifier_quote_style,
789            supports_nulls_first_in_sort: self.supports_nulls_first_in_sort,
790            use_timestamp_for_date64: self.use_timestamp_for_date64,
791            interval_style: self.interval_style,
792            float64_ast_dtype: self.float64_ast_dtype,
793            utf8_cast_dtype: self.utf8_cast_dtype,
794            large_utf8_cast_dtype: self.large_utf8_cast_dtype,
795            date_field_extract_style: self.date_field_extract_style,
796            character_length_style: self.character_length_style,
797            int64_cast_dtype: self.int64_cast_dtype,
798            int32_cast_dtype: self.int32_cast_dtype,
799            timestamp_cast_dtype: self.timestamp_cast_dtype,
800            timestamp_tz_cast_dtype: self.timestamp_tz_cast_dtype,
801            date32_cast_dtype: self.date32_cast_dtype,
802            supports_column_alias_in_table_alias: self
803                .supports_column_alias_in_table_alias,
804            requires_derived_table_alias: self.requires_derived_table_alias,
805            division_operator: self.division_operator,
806            window_func_support_window_frame: self.window_func_support_window_frame,
807            full_qualified_col: self.full_qualified_col,
808            unnest_as_table_factor: self.unnest_as_table_factor,
809        }
810    }
811
812    /// Customize the dialect with a specific identifier quote style, e.g. '`', '"'
813    pub fn with_identifier_quote_style(mut self, identifier_quote_style: char) -> Self {
814        self.identifier_quote_style = Some(identifier_quote_style);
815        self
816    }
817
818    /// Customize the dialect to support `NULLS FIRST` in `ORDER BY` clauses
819    pub fn with_supports_nulls_first_in_sort(
820        mut self,
821        supports_nulls_first_in_sort: bool,
822    ) -> Self {
823        self.supports_nulls_first_in_sort = supports_nulls_first_in_sort;
824        self
825    }
826
827    /// Customize the dialect to uses TIMESTAMP when casting Date64 rather than DATETIME
828    pub fn with_use_timestamp_for_date64(
829        mut self,
830        use_timestamp_for_date64: bool,
831    ) -> Self {
832        self.use_timestamp_for_date64 = use_timestamp_for_date64;
833        self
834    }
835
836    /// Customize the dialect with a specific interval style listed in `IntervalStyle`
837    pub fn with_interval_style(mut self, interval_style: IntervalStyle) -> Self {
838        self.interval_style = interval_style;
839        self
840    }
841
842    /// Customize the dialect with a specific character_length_style listed in `CharacterLengthStyle`
843    pub fn with_character_length_style(
844        mut self,
845        character_length_style: CharacterLengthStyle,
846    ) -> Self {
847        self.character_length_style = character_length_style;
848        self
849    }
850
851    /// Customize the dialect with a specific SQL type for Float64 casting: DOUBLE, DOUBLE PRECISION, etc.
852    pub fn with_float64_ast_dtype(mut self, float64_ast_dtype: ast::DataType) -> Self {
853        self.float64_ast_dtype = float64_ast_dtype;
854        self
855    }
856
857    /// Customize the dialect with a specific SQL type for Utf8 casting: VARCHAR, CHAR, etc.
858    pub fn with_utf8_cast_dtype(mut self, utf8_cast_dtype: ast::DataType) -> Self {
859        self.utf8_cast_dtype = utf8_cast_dtype;
860        self
861    }
862
863    /// Customize the dialect with a specific SQL type for LargeUtf8 casting: TEXT, CHAR, etc.
864    pub fn with_large_utf8_cast_dtype(
865        mut self,
866        large_utf8_cast_dtype: ast::DataType,
867    ) -> Self {
868        self.large_utf8_cast_dtype = large_utf8_cast_dtype;
869        self
870    }
871
872    /// Customize the dialect with a specific date field extract style listed in `DateFieldExtractStyle`
873    pub fn with_date_field_extract_style(
874        mut self,
875        date_field_extract_style: DateFieldExtractStyle,
876    ) -> Self {
877        self.date_field_extract_style = date_field_extract_style;
878        self
879    }
880
881    /// Customize the dialect with a specific SQL type for Int64 casting: BigInt, SIGNED, etc.
882    pub fn with_int64_cast_dtype(mut self, int64_cast_dtype: ast::DataType) -> Self {
883        self.int64_cast_dtype = int64_cast_dtype;
884        self
885    }
886
887    /// Customize the dialect with a specific SQL type for Int32 casting: Integer, SIGNED, etc.
888    pub fn with_int32_cast_dtype(mut self, int32_cast_dtype: ast::DataType) -> Self {
889        self.int32_cast_dtype = int32_cast_dtype;
890        self
891    }
892
893    /// Customize the dialect with a specific SQL type for Timestamp casting: Timestamp, Datetime, etc.
894    pub fn with_timestamp_cast_dtype(
895        mut self,
896        timestamp_cast_dtype: ast::DataType,
897        timestamp_tz_cast_dtype: ast::DataType,
898    ) -> Self {
899        self.timestamp_cast_dtype = timestamp_cast_dtype;
900        self.timestamp_tz_cast_dtype = timestamp_tz_cast_dtype;
901        self
902    }
903
904    pub fn with_date32_cast_dtype(mut self, date32_cast_dtype: ast::DataType) -> Self {
905        self.date32_cast_dtype = date32_cast_dtype;
906        self
907    }
908
909    /// Customize the dialect to support column aliases as part of alias table definition
910    pub fn with_supports_column_alias_in_table_alias(
911        mut self,
912        supports_column_alias_in_table_alias: bool,
913    ) -> Self {
914        self.supports_column_alias_in_table_alias = supports_column_alias_in_table_alias;
915        self
916    }
917
918    pub fn with_requires_derived_table_alias(
919        mut self,
920        requires_derived_table_alias: bool,
921    ) -> Self {
922        self.requires_derived_table_alias = requires_derived_table_alias;
923        self
924    }
925
926    pub fn with_division_operator(mut self, division_operator: BinaryOperator) -> Self {
927        self.division_operator = division_operator;
928        self
929    }
930
931    pub fn with_window_func_support_window_frame(
932        mut self,
933        window_func_support_window_frame: bool,
934    ) -> Self {
935        self.window_func_support_window_frame = window_func_support_window_frame;
936        self
937    }
938
939    /// Customize the dialect to allow full qualified column names
940    pub fn with_full_qualified_col(mut self, full_qualified_col: bool) -> Self {
941        self.full_qualified_col = full_qualified_col;
942        self
943    }
944
945    pub fn with_unnest_as_table_factor(mut self, unnest_as_table_factor: bool) -> Self {
946        self.unnest_as_table_factor = unnest_as_table_factor;
947        self
948    }
949}