datafusion_sql/unparser/
dialect.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use std::{collections::HashMap, sync::Arc};
19
20use super::{
21    utils::character_length_to_sql, utils::date_part_to_sql,
22    utils::sqlite_date_trunc_to_sql, utils::sqlite_from_unixtime_to_sql, Unparser,
23};
24use arrow::datatypes::TimeUnit;
25use datafusion_common::Result;
26use datafusion_expr::Expr;
27use regex::Regex;
28use sqlparser::tokenizer::Span;
29use sqlparser::{
30    ast::{
31        self, BinaryOperator, Function, Ident, ObjectName, TimezoneInfo, WindowFrameBound,
32    },
33    keywords::ALL_KEYWORDS,
34};
35
36pub type ScalarFnToSqlHandler =
37    Box<dyn Fn(&Unparser, &[Expr]) -> Result<Option<ast::Expr>> + Send + Sync>;
38
39/// `Dialect` to use for Unparsing
40///
41/// The default dialect tries to avoid quoting identifiers unless necessary (e.g. `a` instead of `"a"`)
42/// but this behavior can be overridden as needed
43///
44/// **Note**: This trait will eventually be replaced by the Dialect in the SQLparser package
45///
46/// See <https://github.com/sqlparser-rs/sqlparser-rs/pull/1170>
47/// See also the discussion in <https://github.com/apache/datafusion/pull/10625>
48pub trait Dialect: Send + Sync {
49    /// Return the character used to quote identifiers.
50    fn identifier_quote_style(&self, _identifier: &str) -> Option<char>;
51
52    /// Does the dialect support specifying `NULLS FIRST/LAST` in `ORDER BY` clauses?
53    fn supports_nulls_first_in_sort(&self) -> bool {
54        true
55    }
56
57    /// Does the dialect use TIMESTAMP to represent Date64 rather than DATETIME?
58    /// E.g. Trino, Athena and Dremio does not have DATETIME data type
59    fn use_timestamp_for_date64(&self) -> bool {
60        false
61    }
62
63    fn interval_style(&self) -> IntervalStyle {
64        IntervalStyle::PostgresVerbose
65    }
66
67    /// Does the dialect use DOUBLE PRECISION to represent Float64 rather than DOUBLE?
68    /// E.g. Postgres uses DOUBLE PRECISION instead of DOUBLE
69    fn float64_ast_dtype(&self) -> ast::DataType {
70        ast::DataType::Double(ast::ExactNumberInfo::None)
71    }
72
73    /// The SQL type to use for Arrow Utf8 unparsing
74    /// Most dialects use VARCHAR, but some, like MySQL, require CHAR
75    fn utf8_cast_dtype(&self) -> ast::DataType {
76        ast::DataType::Varchar(None)
77    }
78
79    /// The SQL type to use for Arrow LargeUtf8 unparsing
80    /// Most dialects use TEXT, but some, like MySQL, require CHAR
81    fn large_utf8_cast_dtype(&self) -> ast::DataType {
82        ast::DataType::Text
83    }
84
85    /// The date field extract style to use: `DateFieldExtractStyle`
86    fn date_field_extract_style(&self) -> DateFieldExtractStyle {
87        DateFieldExtractStyle::DatePart
88    }
89
90    /// The character length extraction style to use: `CharacterLengthStyle`
91    fn character_length_style(&self) -> CharacterLengthStyle {
92        CharacterLengthStyle::CharacterLength
93    }
94
95    /// The SQL type to use for Arrow Int64 unparsing
96    /// Most dialects use BigInt, but some, like MySQL, require SIGNED
97    fn int64_cast_dtype(&self) -> ast::DataType {
98        ast::DataType::BigInt(None)
99    }
100
101    /// The SQL type to use for Arrow Int32 unparsing
102    /// Most dialects use Integer, but some, like MySQL, require SIGNED
103    fn int32_cast_dtype(&self) -> ast::DataType {
104        ast::DataType::Integer(None)
105    }
106
107    /// The SQL type to use for Timestamp unparsing
108    /// Most dialects use Timestamp, but some, like MySQL, require Datetime
109    /// Some dialects like Dremio does not support WithTimeZone and requires always Timestamp
110    fn timestamp_cast_dtype(
111        &self,
112        _time_unit: &TimeUnit,
113        tz: &Option<Arc<str>>,
114    ) -> ast::DataType {
115        let tz_info = match tz {
116            Some(_) => TimezoneInfo::WithTimeZone,
117            None => TimezoneInfo::None,
118        };
119
120        ast::DataType::Timestamp(None, tz_info)
121    }
122
123    /// The SQL type to use for Arrow Date32 unparsing
124    /// Most dialects use Date, but some, like SQLite require TEXT
125    fn date32_cast_dtype(&self) -> ast::DataType {
126        ast::DataType::Date
127    }
128
129    /// Does the dialect support specifying column aliases as part of alias table definition?
130    /// (SELECT col1, col2 from my_table) AS my_table_alias(col1_alias, col2_alias)
131    fn supports_column_alias_in_table_alias(&self) -> bool {
132        true
133    }
134
135    /// Whether the dialect requires a table alias for any subquery in the FROM clause
136    /// This affects behavior when deriving logical plans for Sort, Limit, etc.
137    fn requires_derived_table_alias(&self) -> bool {
138        false
139    }
140
141    /// The division operator for the dialect
142    /// Most dialect uses ` BinaryOperator::Divide` (/)
143    /// But DuckDB dialect uses `BinaryOperator::DuckIntegerDivide` (//)
144    fn division_operator(&self) -> BinaryOperator {
145        BinaryOperator::Divide
146    }
147
148    /// Allows the dialect to override scalar function unparsing if the dialect has specific rules.
149    /// Returns None if the default unparsing should be used, or Some(ast::Expr) if there is
150    /// a custom implementation for the function.
151    fn scalar_function_to_sql_overrides(
152        &self,
153        _unparser: &Unparser,
154        _func_name: &str,
155        _args: &[Expr],
156    ) -> Result<Option<ast::Expr>> {
157        Ok(None)
158    }
159
160    /// Allows the dialect to choose to omit window frame in unparsing
161    /// based on function name and window frame bound
162    /// Returns false if specific function name / window frame bound indicates no window frame is needed in unparsing
163    fn window_func_support_window_frame(
164        &self,
165        _func_name: &str,
166        _start_bound: &WindowFrameBound,
167        _end_bound: &WindowFrameBound,
168    ) -> bool {
169        true
170    }
171
172    /// Extends the dialect's default rules for unparsing scalar functions.
173    /// This is useful for supporting application-specific UDFs or custom engine extensions.
174    fn with_custom_scalar_overrides(
175        self,
176        _handlers: Vec<(&str, ScalarFnToSqlHandler)>,
177    ) -> Self
178    where
179        Self: Sized,
180    {
181        unimplemented!("Custom scalar overrides are not supported by this dialect yet");
182    }
183
184    /// Allow to unparse a qualified column with a full qualified name
185    /// (e.g. catalog_name.schema_name.table_name.column_name)
186    /// Otherwise, the column will be unparsed with only the table name and column name
187    /// (e.g. table_name.column_name)
188    fn full_qualified_col(&self) -> bool {
189        false
190    }
191
192    /// Allow to unparse the unnest plan as [ast::TableFactor::UNNEST].
193    ///
194    /// Some dialects like BigQuery require UNNEST to be used in the FROM clause but
195    /// the LogicalPlan planner always puts UNNEST in the SELECT clause. This flag allows
196    /// to unparse the UNNEST plan as [ast::TableFactor::UNNEST] instead of a subquery.
197    fn unnest_as_table_factor(&self) -> bool {
198        false
199    }
200
201    /// Allows the dialect to override column alias unparsing if the dialect has specific rules.
202    /// Returns None if the default unparsing should be used, or Some(String) if there is
203    /// a custom implementation for the alias.
204    fn col_alias_overrides(&self, _alias: &str) -> Result<Option<String>> {
205        Ok(None)
206    }
207}
208
209/// `IntervalStyle` to use for unparsing
210///
211/// <https://www.postgresql.org/docs/current/datatype-datetime.html#DATATYPE-INTERVAL-INPUT>
212/// different DBMS follows different standards, popular ones are:
213/// postgres_verbose: '2 years 15 months 100 weeks 99 hours 123456789 milliseconds' which is
214/// compatible with arrow display format, as well as duckdb
215/// sql standard format is '1-2' for year-month, or '1 10:10:10.123456' for day-time
216/// <https://www.contrib.andrew.cmu.edu/~shadow/sql/sql1992.txt>
217#[derive(Clone, Copy)]
218pub enum IntervalStyle {
219    PostgresVerbose,
220    SQLStandard,
221    MySQL,
222}
223
224/// Datetime subfield extraction style for unparsing
225///
226/// `<https://www.postgresql.org/docs/current/functions-datetime.html#FUNCTIONS-DATETIME-EXTRACT>`
227/// Different DBMSs follow different standards; popular ones are:
228/// date_part('YEAR', date '2001-02-16')
229/// EXTRACT(YEAR from date '2001-02-16')
230/// Some DBMSs, like Postgres, support both, whereas others like MySQL require EXTRACT.
231#[derive(Clone, Copy, PartialEq)]
232pub enum DateFieldExtractStyle {
233    DatePart,
234    Extract,
235    Strftime,
236}
237
238/// `CharacterLengthStyle` to use for unparsing
239///
240/// Different DBMSs uses different names for function calculating the number of characters in the string
241/// `Length` style uses length(x)
242/// `SQLStandard` style uses character_length(x)
243#[derive(Clone, Copy, PartialEq)]
244pub enum CharacterLengthStyle {
245    Length,
246    CharacterLength,
247}
248
249pub struct DefaultDialect {}
250
251impl Dialect for DefaultDialect {
252    fn identifier_quote_style(&self, identifier: &str) -> Option<char> {
253        let identifier_regex = Regex::new(r"^[a-zA-Z_][a-zA-Z0-9_]*$").unwrap();
254        let id_upper = identifier.to_uppercase();
255        // Special case ignore "ID", see https://github.com/sqlparser-rs/sqlparser-rs/issues/1382
256        // ID is a keyword in ClickHouse, but we don't want to quote it when unparsing SQL here
257        if (id_upper != "ID" && ALL_KEYWORDS.contains(&id_upper.as_str()))
258            || !identifier_regex.is_match(identifier)
259        {
260            Some('"')
261        } else {
262            None
263        }
264    }
265}
266
267pub struct PostgreSqlDialect {}
268
269impl Dialect for PostgreSqlDialect {
270    fn identifier_quote_style(&self, _: &str) -> Option<char> {
271        Some('"')
272    }
273
274    fn interval_style(&self) -> IntervalStyle {
275        IntervalStyle::PostgresVerbose
276    }
277
278    fn float64_ast_dtype(&self) -> ast::DataType {
279        ast::DataType::DoublePrecision
280    }
281
282    fn scalar_function_to_sql_overrides(
283        &self,
284        unparser: &Unparser,
285        func_name: &str,
286        args: &[Expr],
287    ) -> Result<Option<ast::Expr>> {
288        if func_name == "round" {
289            return Ok(Some(
290                self.round_to_sql_enforce_numeric(unparser, func_name, args)?,
291            ));
292        }
293
294        Ok(None)
295    }
296}
297
298impl PostgreSqlDialect {
299    fn round_to_sql_enforce_numeric(
300        &self,
301        unparser: &Unparser,
302        func_name: &str,
303        args: &[Expr],
304    ) -> Result<ast::Expr> {
305        let mut args = unparser.function_args_to_sql(args)?;
306
307        // Enforce the first argument to be Numeric
308        if let Some(ast::FunctionArg::Unnamed(ast::FunctionArgExpr::Expr(expr))) =
309            args.first_mut()
310        {
311            if let ast::Expr::Cast { data_type, .. } = expr {
312                // Don't create an additional cast wrapper if we can update the existing one
313                *data_type = ast::DataType::Numeric(ast::ExactNumberInfo::None);
314            } else {
315                // Wrap the expression in a new cast
316                *expr = ast::Expr::Cast {
317                    kind: ast::CastKind::Cast,
318                    expr: Box::new(expr.clone()),
319                    data_type: ast::DataType::Numeric(ast::ExactNumberInfo::None),
320                    format: None,
321                };
322            }
323        }
324
325        Ok(ast::Expr::Function(Function {
326            name: ObjectName::from(vec![Ident {
327                value: func_name.to_string(),
328                quote_style: None,
329                span: Span::empty(),
330            }]),
331            args: ast::FunctionArguments::List(ast::FunctionArgumentList {
332                duplicate_treatment: None,
333                args,
334                clauses: vec![],
335            }),
336            filter: None,
337            null_treatment: None,
338            over: None,
339            within_group: vec![],
340            parameters: ast::FunctionArguments::None,
341            uses_odbc_syntax: false,
342        }))
343    }
344}
345
346#[derive(Default)]
347pub struct DuckDBDialect {
348    custom_scalar_fn_overrides: HashMap<String, ScalarFnToSqlHandler>,
349}
350
351impl DuckDBDialect {
352    #[must_use]
353    pub fn new() -> Self {
354        Self {
355            custom_scalar_fn_overrides: HashMap::new(),
356        }
357    }
358}
359
360impl Dialect for DuckDBDialect {
361    fn identifier_quote_style(&self, _: &str) -> Option<char> {
362        Some('"')
363    }
364
365    fn character_length_style(&self) -> CharacterLengthStyle {
366        CharacterLengthStyle::Length
367    }
368
369    fn division_operator(&self) -> BinaryOperator {
370        BinaryOperator::DuckIntegerDivide
371    }
372
373    fn with_custom_scalar_overrides(
374        mut self,
375        handlers: Vec<(&str, ScalarFnToSqlHandler)>,
376    ) -> Self {
377        for (func_name, handler) in handlers {
378            self.custom_scalar_fn_overrides
379                .insert(func_name.to_string(), handler);
380        }
381        self
382    }
383
384    fn scalar_function_to_sql_overrides(
385        &self,
386        unparser: &Unparser,
387        func_name: &str,
388        args: &[Expr],
389    ) -> Result<Option<ast::Expr>> {
390        if let Some(handler) = self.custom_scalar_fn_overrides.get(func_name) {
391            return handler(unparser, args);
392        }
393
394        if func_name == "character_length" {
395            return character_length_to_sql(
396                unparser,
397                self.character_length_style(),
398                args,
399            );
400        }
401
402        Ok(None)
403    }
404}
405
406pub struct MySqlDialect {}
407
408impl Dialect for MySqlDialect {
409    fn identifier_quote_style(&self, _: &str) -> Option<char> {
410        Some('`')
411    }
412
413    fn supports_nulls_first_in_sort(&self) -> bool {
414        false
415    }
416
417    fn interval_style(&self) -> IntervalStyle {
418        IntervalStyle::MySQL
419    }
420
421    fn utf8_cast_dtype(&self) -> ast::DataType {
422        ast::DataType::Char(None)
423    }
424
425    fn large_utf8_cast_dtype(&self) -> ast::DataType {
426        ast::DataType::Char(None)
427    }
428
429    fn date_field_extract_style(&self) -> DateFieldExtractStyle {
430        DateFieldExtractStyle::Extract
431    }
432
433    fn int64_cast_dtype(&self) -> ast::DataType {
434        ast::DataType::Custom(ObjectName::from(vec![Ident::new("SIGNED")]), vec![])
435    }
436
437    fn int32_cast_dtype(&self) -> ast::DataType {
438        ast::DataType::Custom(ObjectName::from(vec![Ident::new("SIGNED")]), vec![])
439    }
440
441    fn timestamp_cast_dtype(
442        &self,
443        _time_unit: &TimeUnit,
444        _tz: &Option<Arc<str>>,
445    ) -> ast::DataType {
446        ast::DataType::Datetime(None)
447    }
448
449    fn requires_derived_table_alias(&self) -> bool {
450        true
451    }
452
453    fn scalar_function_to_sql_overrides(
454        &self,
455        unparser: &Unparser,
456        func_name: &str,
457        args: &[Expr],
458    ) -> Result<Option<ast::Expr>> {
459        if func_name == "date_part" {
460            return date_part_to_sql(unparser, self.date_field_extract_style(), args);
461        }
462
463        Ok(None)
464    }
465}
466
467pub struct SqliteDialect {}
468
469impl Dialect for SqliteDialect {
470    fn identifier_quote_style(&self, _: &str) -> Option<char> {
471        Some('`')
472    }
473
474    fn date_field_extract_style(&self) -> DateFieldExtractStyle {
475        DateFieldExtractStyle::Strftime
476    }
477
478    fn date32_cast_dtype(&self) -> ast::DataType {
479        ast::DataType::Text
480    }
481
482    fn character_length_style(&self) -> CharacterLengthStyle {
483        CharacterLengthStyle::Length
484    }
485
486    fn supports_column_alias_in_table_alias(&self) -> bool {
487        false
488    }
489
490    fn scalar_function_to_sql_overrides(
491        &self,
492        unparser: &Unparser,
493        func_name: &str,
494        args: &[Expr],
495    ) -> Result<Option<ast::Expr>> {
496        match func_name {
497            "date_part" => {
498                date_part_to_sql(unparser, self.date_field_extract_style(), args)
499            }
500            "character_length" => {
501                character_length_to_sql(unparser, self.character_length_style(), args)
502            }
503            "from_unixtime" => sqlite_from_unixtime_to_sql(unparser, args),
504            "date_trunc" => sqlite_date_trunc_to_sql(unparser, args),
505            _ => Ok(None),
506        }
507    }
508}
509
510#[derive(Default)]
511pub struct BigQueryDialect {}
512
513impl Dialect for BigQueryDialect {
514    fn identifier_quote_style(&self, _: &str) -> Option<char> {
515        Some('`')
516    }
517
518    fn col_alias_overrides(&self, alias: &str) -> Result<Option<String>> {
519        // Check if alias contains any special characters not supported by BigQuery col names
520        // https://cloud.google.com/bigquery/docs/schemas#flexible-column-names
521        let special_chars: [char; 20] = [
522            '!', '"', '$', '(', ')', '*', ',', '.', '/', ';', '?', '@', '[', '\\', ']',
523            '^', '`', '{', '}', '~',
524        ];
525
526        if alias.chars().any(|c| special_chars.contains(&c)) {
527            let mut encoded_name = String::new();
528            for c in alias.chars() {
529                if special_chars.contains(&c) {
530                    encoded_name.push_str(&format!("_{}", c as u32));
531                } else {
532                    encoded_name.push(c);
533                }
534            }
535            Ok(Some(encoded_name))
536        } else {
537            Ok(Some(alias.to_string()))
538        }
539    }
540
541    fn unnest_as_table_factor(&self) -> bool {
542        true
543    }
544}
545
546impl BigQueryDialect {
547    #[must_use]
548    pub fn new() -> Self {
549        Self {}
550    }
551}
552
553pub struct CustomDialect {
554    identifier_quote_style: Option<char>,
555    supports_nulls_first_in_sort: bool,
556    use_timestamp_for_date64: bool,
557    interval_style: IntervalStyle,
558    float64_ast_dtype: ast::DataType,
559    utf8_cast_dtype: ast::DataType,
560    large_utf8_cast_dtype: ast::DataType,
561    date_field_extract_style: DateFieldExtractStyle,
562    character_length_style: CharacterLengthStyle,
563    int64_cast_dtype: ast::DataType,
564    int32_cast_dtype: ast::DataType,
565    timestamp_cast_dtype: ast::DataType,
566    timestamp_tz_cast_dtype: ast::DataType,
567    date32_cast_dtype: ast::DataType,
568    supports_column_alias_in_table_alias: bool,
569    requires_derived_table_alias: bool,
570    division_operator: BinaryOperator,
571    window_func_support_window_frame: bool,
572    full_qualified_col: bool,
573    unnest_as_table_factor: bool,
574}
575
576impl Default for CustomDialect {
577    fn default() -> Self {
578        Self {
579            identifier_quote_style: None,
580            supports_nulls_first_in_sort: true,
581            use_timestamp_for_date64: false,
582            interval_style: IntervalStyle::SQLStandard,
583            float64_ast_dtype: ast::DataType::Double(ast::ExactNumberInfo::None),
584            utf8_cast_dtype: ast::DataType::Varchar(None),
585            large_utf8_cast_dtype: ast::DataType::Text,
586            date_field_extract_style: DateFieldExtractStyle::DatePart,
587            character_length_style: CharacterLengthStyle::CharacterLength,
588            int64_cast_dtype: ast::DataType::BigInt(None),
589            int32_cast_dtype: ast::DataType::Integer(None),
590            timestamp_cast_dtype: ast::DataType::Timestamp(None, TimezoneInfo::None),
591            timestamp_tz_cast_dtype: ast::DataType::Timestamp(
592                None,
593                TimezoneInfo::WithTimeZone,
594            ),
595            date32_cast_dtype: ast::DataType::Date,
596            supports_column_alias_in_table_alias: true,
597            requires_derived_table_alias: false,
598            division_operator: BinaryOperator::Divide,
599            window_func_support_window_frame: true,
600            full_qualified_col: false,
601            unnest_as_table_factor: false,
602        }
603    }
604}
605
606impl CustomDialect {
607    // Create a CustomDialect
608    #[deprecated(since = "41.0.0", note = "please use `CustomDialectBuilder` instead")]
609    pub fn new(identifier_quote_style: Option<char>) -> Self {
610        Self {
611            identifier_quote_style,
612            ..Default::default()
613        }
614    }
615}
616
617impl Dialect for CustomDialect {
618    fn identifier_quote_style(&self, _: &str) -> Option<char> {
619        self.identifier_quote_style
620    }
621
622    fn supports_nulls_first_in_sort(&self) -> bool {
623        self.supports_nulls_first_in_sort
624    }
625
626    fn use_timestamp_for_date64(&self) -> bool {
627        self.use_timestamp_for_date64
628    }
629
630    fn interval_style(&self) -> IntervalStyle {
631        self.interval_style
632    }
633
634    fn float64_ast_dtype(&self) -> ast::DataType {
635        self.float64_ast_dtype.clone()
636    }
637
638    fn utf8_cast_dtype(&self) -> ast::DataType {
639        self.utf8_cast_dtype.clone()
640    }
641
642    fn large_utf8_cast_dtype(&self) -> ast::DataType {
643        self.large_utf8_cast_dtype.clone()
644    }
645
646    fn date_field_extract_style(&self) -> DateFieldExtractStyle {
647        self.date_field_extract_style
648    }
649
650    fn character_length_style(&self) -> CharacterLengthStyle {
651        self.character_length_style
652    }
653
654    fn int64_cast_dtype(&self) -> ast::DataType {
655        self.int64_cast_dtype.clone()
656    }
657
658    fn int32_cast_dtype(&self) -> ast::DataType {
659        self.int32_cast_dtype.clone()
660    }
661
662    fn timestamp_cast_dtype(
663        &self,
664        _time_unit: &TimeUnit,
665        tz: &Option<Arc<str>>,
666    ) -> ast::DataType {
667        if tz.is_some() {
668            self.timestamp_tz_cast_dtype.clone()
669        } else {
670            self.timestamp_cast_dtype.clone()
671        }
672    }
673
674    fn date32_cast_dtype(&self) -> ast::DataType {
675        self.date32_cast_dtype.clone()
676    }
677
678    fn supports_column_alias_in_table_alias(&self) -> bool {
679        self.supports_column_alias_in_table_alias
680    }
681
682    fn scalar_function_to_sql_overrides(
683        &self,
684        unparser: &Unparser,
685        func_name: &str,
686        args: &[Expr],
687    ) -> Result<Option<ast::Expr>> {
688        match func_name {
689            "date_part" => {
690                date_part_to_sql(unparser, self.date_field_extract_style(), args)
691            }
692            "character_length" => {
693                character_length_to_sql(unparser, self.character_length_style(), args)
694            }
695            _ => Ok(None),
696        }
697    }
698
699    fn requires_derived_table_alias(&self) -> bool {
700        self.requires_derived_table_alias
701    }
702
703    fn division_operator(&self) -> BinaryOperator {
704        self.division_operator.clone()
705    }
706
707    fn window_func_support_window_frame(
708        &self,
709        _func_name: &str,
710        _start_bound: &WindowFrameBound,
711        _end_bound: &WindowFrameBound,
712    ) -> bool {
713        self.window_func_support_window_frame
714    }
715
716    fn full_qualified_col(&self) -> bool {
717        self.full_qualified_col
718    }
719
720    fn unnest_as_table_factor(&self) -> bool {
721        self.unnest_as_table_factor
722    }
723}
724
725/// `CustomDialectBuilder` to build `CustomDialect` using builder pattern
726///
727///
728/// # Examples
729///
730/// Building a custom dialect with all default options set in CustomDialectBuilder::new()
731/// but with `use_timestamp_for_date64` overridden to `true`
732///
733/// ```
734/// use datafusion_sql::unparser::dialect::CustomDialectBuilder;
735/// let dialect = CustomDialectBuilder::new()
736///     .with_use_timestamp_for_date64(true)
737///     .build();
738/// ```
739pub struct CustomDialectBuilder {
740    identifier_quote_style: Option<char>,
741    supports_nulls_first_in_sort: bool,
742    use_timestamp_for_date64: bool,
743    interval_style: IntervalStyle,
744    float64_ast_dtype: ast::DataType,
745    utf8_cast_dtype: ast::DataType,
746    large_utf8_cast_dtype: ast::DataType,
747    date_field_extract_style: DateFieldExtractStyle,
748    character_length_style: CharacterLengthStyle,
749    int64_cast_dtype: ast::DataType,
750    int32_cast_dtype: ast::DataType,
751    timestamp_cast_dtype: ast::DataType,
752    timestamp_tz_cast_dtype: ast::DataType,
753    date32_cast_dtype: ast::DataType,
754    supports_column_alias_in_table_alias: bool,
755    requires_derived_table_alias: bool,
756    division_operator: BinaryOperator,
757    window_func_support_window_frame: bool,
758    full_qualified_col: bool,
759    unnest_as_table_factor: bool,
760}
761
762impl Default for CustomDialectBuilder {
763    fn default() -> Self {
764        Self::new()
765    }
766}
767
768impl CustomDialectBuilder {
769    pub fn new() -> Self {
770        Self {
771            identifier_quote_style: None,
772            supports_nulls_first_in_sort: true,
773            use_timestamp_for_date64: false,
774            interval_style: IntervalStyle::PostgresVerbose,
775            float64_ast_dtype: ast::DataType::Double(ast::ExactNumberInfo::None),
776            utf8_cast_dtype: ast::DataType::Varchar(None),
777            large_utf8_cast_dtype: ast::DataType::Text,
778            date_field_extract_style: DateFieldExtractStyle::DatePart,
779            character_length_style: CharacterLengthStyle::CharacterLength,
780            int64_cast_dtype: ast::DataType::BigInt(None),
781            int32_cast_dtype: ast::DataType::Integer(None),
782            timestamp_cast_dtype: ast::DataType::Timestamp(None, TimezoneInfo::None),
783            timestamp_tz_cast_dtype: ast::DataType::Timestamp(
784                None,
785                TimezoneInfo::WithTimeZone,
786            ),
787            date32_cast_dtype: ast::DataType::Date,
788            supports_column_alias_in_table_alias: true,
789            requires_derived_table_alias: false,
790            division_operator: BinaryOperator::Divide,
791            window_func_support_window_frame: true,
792            full_qualified_col: false,
793            unnest_as_table_factor: false,
794        }
795    }
796
797    pub fn build(self) -> CustomDialect {
798        CustomDialect {
799            identifier_quote_style: self.identifier_quote_style,
800            supports_nulls_first_in_sort: self.supports_nulls_first_in_sort,
801            use_timestamp_for_date64: self.use_timestamp_for_date64,
802            interval_style: self.interval_style,
803            float64_ast_dtype: self.float64_ast_dtype,
804            utf8_cast_dtype: self.utf8_cast_dtype,
805            large_utf8_cast_dtype: self.large_utf8_cast_dtype,
806            date_field_extract_style: self.date_field_extract_style,
807            character_length_style: self.character_length_style,
808            int64_cast_dtype: self.int64_cast_dtype,
809            int32_cast_dtype: self.int32_cast_dtype,
810            timestamp_cast_dtype: self.timestamp_cast_dtype,
811            timestamp_tz_cast_dtype: self.timestamp_tz_cast_dtype,
812            date32_cast_dtype: self.date32_cast_dtype,
813            supports_column_alias_in_table_alias: self
814                .supports_column_alias_in_table_alias,
815            requires_derived_table_alias: self.requires_derived_table_alias,
816            division_operator: self.division_operator,
817            window_func_support_window_frame: self.window_func_support_window_frame,
818            full_qualified_col: self.full_qualified_col,
819            unnest_as_table_factor: self.unnest_as_table_factor,
820        }
821    }
822
823    /// Customize the dialect with a specific identifier quote style, e.g. '`', '"'
824    pub fn with_identifier_quote_style(mut self, identifier_quote_style: char) -> Self {
825        self.identifier_quote_style = Some(identifier_quote_style);
826        self
827    }
828
829    /// Customize the dialect to support `NULLS FIRST` in `ORDER BY` clauses
830    pub fn with_supports_nulls_first_in_sort(
831        mut self,
832        supports_nulls_first_in_sort: bool,
833    ) -> Self {
834        self.supports_nulls_first_in_sort = supports_nulls_first_in_sort;
835        self
836    }
837
838    /// Customize the dialect to uses TIMESTAMP when casting Date64 rather than DATETIME
839    pub fn with_use_timestamp_for_date64(
840        mut self,
841        use_timestamp_for_date64: bool,
842    ) -> Self {
843        self.use_timestamp_for_date64 = use_timestamp_for_date64;
844        self
845    }
846
847    /// Customize the dialect with a specific interval style listed in `IntervalStyle`
848    pub fn with_interval_style(mut self, interval_style: IntervalStyle) -> Self {
849        self.interval_style = interval_style;
850        self
851    }
852
853    /// Customize the dialect with a specific character_length_style listed in `CharacterLengthStyle`
854    pub fn with_character_length_style(
855        mut self,
856        character_length_style: CharacterLengthStyle,
857    ) -> Self {
858        self.character_length_style = character_length_style;
859        self
860    }
861
862    /// Customize the dialect with a specific SQL type for Float64 casting: DOUBLE, DOUBLE PRECISION, etc.
863    pub fn with_float64_ast_dtype(mut self, float64_ast_dtype: ast::DataType) -> Self {
864        self.float64_ast_dtype = float64_ast_dtype;
865        self
866    }
867
868    /// Customize the dialect with a specific SQL type for Utf8 casting: VARCHAR, CHAR, etc.
869    pub fn with_utf8_cast_dtype(mut self, utf8_cast_dtype: ast::DataType) -> Self {
870        self.utf8_cast_dtype = utf8_cast_dtype;
871        self
872    }
873
874    /// Customize the dialect with a specific SQL type for LargeUtf8 casting: TEXT, CHAR, etc.
875    pub fn with_large_utf8_cast_dtype(
876        mut self,
877        large_utf8_cast_dtype: ast::DataType,
878    ) -> Self {
879        self.large_utf8_cast_dtype = large_utf8_cast_dtype;
880        self
881    }
882
883    /// Customize the dialect with a specific date field extract style listed in `DateFieldExtractStyle`
884    pub fn with_date_field_extract_style(
885        mut self,
886        date_field_extract_style: DateFieldExtractStyle,
887    ) -> Self {
888        self.date_field_extract_style = date_field_extract_style;
889        self
890    }
891
892    /// Customize the dialect with a specific SQL type for Int64 casting: BigInt, SIGNED, etc.
893    pub fn with_int64_cast_dtype(mut self, int64_cast_dtype: ast::DataType) -> Self {
894        self.int64_cast_dtype = int64_cast_dtype;
895        self
896    }
897
898    /// Customize the dialect with a specific SQL type for Int32 casting: Integer, SIGNED, etc.
899    pub fn with_int32_cast_dtype(mut self, int32_cast_dtype: ast::DataType) -> Self {
900        self.int32_cast_dtype = int32_cast_dtype;
901        self
902    }
903
904    /// Customize the dialect with a specific SQL type for Timestamp casting: Timestamp, Datetime, etc.
905    pub fn with_timestamp_cast_dtype(
906        mut self,
907        timestamp_cast_dtype: ast::DataType,
908        timestamp_tz_cast_dtype: ast::DataType,
909    ) -> Self {
910        self.timestamp_cast_dtype = timestamp_cast_dtype;
911        self.timestamp_tz_cast_dtype = timestamp_tz_cast_dtype;
912        self
913    }
914
915    pub fn with_date32_cast_dtype(mut self, date32_cast_dtype: ast::DataType) -> Self {
916        self.date32_cast_dtype = date32_cast_dtype;
917        self
918    }
919
920    /// Customize the dialect to support column aliases as part of alias table definition
921    pub fn with_supports_column_alias_in_table_alias(
922        mut self,
923        supports_column_alias_in_table_alias: bool,
924    ) -> Self {
925        self.supports_column_alias_in_table_alias = supports_column_alias_in_table_alias;
926        self
927    }
928
929    pub fn with_requires_derived_table_alias(
930        mut self,
931        requires_derived_table_alias: bool,
932    ) -> Self {
933        self.requires_derived_table_alias = requires_derived_table_alias;
934        self
935    }
936
937    pub fn with_division_operator(mut self, division_operator: BinaryOperator) -> Self {
938        self.division_operator = division_operator;
939        self
940    }
941
942    pub fn with_window_func_support_window_frame(
943        mut self,
944        window_func_support_window_frame: bool,
945    ) -> Self {
946        self.window_func_support_window_frame = window_func_support_window_frame;
947        self
948    }
949
950    /// Customize the dialect to allow full qualified column names
951    pub fn with_full_qualified_col(mut self, full_qualified_col: bool) -> Self {
952        self.full_qualified_col = full_qualified_col;
953        self
954    }
955
956    pub fn with_unnest_as_table_factor(mut self, unnest_as_table_factor: bool) -> Self {
957        self.unnest_as_table_factor = unnest_as_table_factor;
958        self
959    }
960}