Skip to main content

polyglot_sql/dialects/
datafusion.rs

1//! Apache DataFusion SQL Dialect
2//!
3//! DataFusion is an Arrow-based query engine with modern SQL extensions.
4//! Reference: https://datafusion.apache.org/user-guide/sql/
5//!
6//! Key characteristics:
7//! - Arrow-native type system (Int8, Int16, Int32, Int64, Float32, Float64, Utf8, etc.)
8//! - Double-quote identifiers
9//! - Lowercase function names by default
10//! - QUALIFY clause support
11//! - EXCEPT for column exclusion (SELECT * EXCEPT(col))
12//! - LEFT SEMI JOIN / LEFT ANTI JOIN syntax
13//! - TRY_CAST support
14//! - Pipe operator (|>) for query chaining
15//! - No UPDATE/DELETE support
16//! - arrow_cast() and arrow_typeof() functions
17//! - COPY ... TO syntax (no INTO keyword)
18//! - Nested comment support
19
20use super::{DialectImpl, DialectType};
21use crate::error::Result;
22use crate::expressions::{Expression, Function};
23use crate::generator::GeneratorConfig;
24use crate::tokens::TokenizerConfig;
25
26/// Apache DataFusion dialect
27pub struct DataFusionDialect;
28
29impl DialectImpl for DataFusionDialect {
30    fn dialect_type(&self) -> DialectType {
31        DialectType::DataFusion
32    }
33
34    fn tokenizer_config(&self) -> TokenizerConfig {
35        let mut config = TokenizerConfig::default();
36        // DataFusion uses double quotes for identifiers
37        config.identifiers.insert('"', '"');
38        // DataFusion supports nested comments
39        config.nested_comments = true;
40        config
41    }
42
43    fn generator_config(&self) -> GeneratorConfig {
44        use crate::generator::{IdentifierQuoteStyle, LimitFetchStyle, NormalizeFunctions};
45        GeneratorConfig {
46            identifier_quote: '"',
47            identifier_quote_style: IdentifierQuoteStyle::DOUBLE_QUOTE,
48            dialect: Some(DialectType::DataFusion),
49            // DataFusion lowercases function names
50            normalize_functions: NormalizeFunctions::Lower,
51            // TRY_CAST is supported
52            try_supported: true,
53            // DataFusion uses EXCEPT for column exclusion: SELECT * EXCEPT(col)
54            star_except: "EXCEPT",
55            // No multi-arg DISTINCT: COUNT(DISTINCT a, b) not supported
56            multi_arg_distinct: false,
57            // Window EXCLUDE not supported
58            supports_window_exclude: false,
59            // Interval allows plural form (DAYS, HOURS, etc.)
60            interval_allows_plural_form: true,
61            // Normalize date parts in EXTRACT
62            normalize_extract_date_parts: true,
63            // LIMIT style (not FETCH)
64            limit_fetch_style: LimitFetchStyle::Limit,
65            // No hints
66            join_hints: false,
67            table_hints: false,
68            query_hints: false,
69            // LEFT SEMI JOIN / LEFT ANTI JOIN syntax
70            semi_anti_join_with_side: true,
71            // COPY does not use INTO keyword
72            copy_has_into_keyword: false,
73            // NVL2 is supported (via coalesce-like behavior)
74            nvl2_supported: true,
75            // MEDIAN is supported
76            supports_median: true,
77            // Can implement array_any
78            can_implement_array_any: true,
79            // LIKE quantifiers not supported
80            supports_like_quantifiers: false,
81            // Aggregate FILTER is supported
82            aggregate_filter_supported: true,
83            // BETWEEN flags not supported
84            supports_between_flags: false,
85            ..Default::default()
86        }
87    }
88
89    fn transform_expr(&self, expr: Expression) -> Result<Expression> {
90        match expr {
91            // Function transformations
92            Expression::Function(f) => self.transform_function(*f),
93
94            // Aggregate function transformations
95            Expression::AggregateFunction(f) => self.transform_aggregate_function(f),
96
97            // Pass through everything else
98            _ => Ok(expr),
99        }
100    }
101}
102
103impl DataFusionDialect {
104    fn transform_function(&self, f: Function) -> Result<Expression> {
105        let name_upper = f.name.to_uppercase();
106        match name_upper.as_str() {
107            // IFNULL → COALESCE (DataFusion uses COALESCE)
108            "IFNULL" => Ok(Expression::Function(Box::new(Function::new(
109                "coalesce".to_string(),
110                f.args,
111            )))),
112
113            // SQUARE(x) → POWER(x, 2)
114            "SQUARE" => {
115                let mut args = f.args;
116                args.push(Expression::Literal(crate::expressions::Literal::Number(
117                    "2".to_string(),
118                )));
119                Ok(Expression::Function(Box::new(Function::new(
120                    "power".to_string(),
121                    args,
122                ))))
123            }
124
125            // REGEXP_MATCHES → REGEXP_MATCH
126            "REGEXP_MATCHES" => Ok(Expression::Function(Box::new(Function::new(
127                "regexp_match".to_string(),
128                f.args,
129            )))),
130
131            // DATE_FORMAT / TIME_TO_STR / STRFTIME → TO_CHAR
132            "DATE_FORMAT" | "TIME_TO_STR" => Ok(Expression::Function(Box::new(Function::new(
133                "to_char".to_string(),
134                f.args,
135            )))),
136
137            // Pass through everything else
138            _ => Ok(Expression::Function(Box::new(f))),
139        }
140    }
141
142    fn transform_aggregate_function(
143        &self,
144        f: Box<crate::expressions::AggregateFunction>,
145    ) -> Result<Expression> {
146        let name_upper = f.name.to_uppercase();
147        match name_upper.as_str() {
148            // GROUP_CONCAT → STRING_AGG
149            "GROUP_CONCAT" => Ok(Expression::Function(Box::new(Function::new(
150                "string_agg".to_string(),
151                f.args,
152            )))),
153
154            // LISTAGG → STRING_AGG
155            "LISTAGG" => Ok(Expression::Function(Box::new(Function::new(
156                "string_agg".to_string(),
157                f.args,
158            )))),
159
160            // Pass through everything else
161            _ => Ok(Expression::AggregateFunction(f)),
162        }
163    }
164}