Skip to main content

polyglot_sql/dialects/
datafusion.rs

1//! Apache DataFusion SQL Dialect
2//!
3//! DataFusion is an Arrow-based query engine with modern SQL extensions.
4//! Reference: https://datafusion.apache.org/user-guide/sql/
5//!
6//! Key characteristics:
7//! - Arrow-native type system (Int8, Int16, Int32, Int64, Float32, Float64, Utf8, etc.)
8//! - Double-quote identifiers
9//! - Lowercase function names by default
10//! - QUALIFY clause support
11//! - EXCEPT for column exclusion (SELECT * EXCEPT(col))
12//! - LEFT SEMI JOIN / LEFT ANTI JOIN syntax
13//! - TRY_CAST support
14//! - Pipe operator (|>) for query chaining
15//! - No UPDATE/DELETE support
16//! - arrow_cast() and arrow_typeof() functions
17//! - COPY ... TO syntax (no INTO keyword)
18//! - Nested comment support
19
20use super::{DialectImpl, DialectType};
21use crate::error::Result;
22use crate::expressions::{Expression, Function};
23#[cfg(feature = "generate")]
24use crate::generator::GeneratorConfig;
25use crate::tokens::TokenizerConfig;
26
27/// Apache DataFusion dialect
28pub struct DataFusionDialect;
29
30impl DialectImpl for DataFusionDialect {
31    fn dialect_type(&self) -> DialectType {
32        DialectType::DataFusion
33    }
34
35    fn tokenizer_config(&self) -> TokenizerConfig {
36        let mut config = TokenizerConfig::default();
37        // DataFusion uses double quotes for identifiers
38        config.identifiers.insert('"', '"');
39        // DataFusion supports nested comments
40        config.nested_comments = true;
41        config
42    }
43
44    #[cfg(feature = "generate")]
45
46    fn generator_config(&self) -> GeneratorConfig {
47        use crate::generator::{IdentifierQuoteStyle, LimitFetchStyle, NormalizeFunctions};
48        GeneratorConfig {
49            identifier_quote: '"',
50            identifier_quote_style: IdentifierQuoteStyle::DOUBLE_QUOTE,
51            dialect: Some(DialectType::DataFusion),
52            // DataFusion lowercases function names
53            normalize_functions: NormalizeFunctions::Lower,
54            // TRY_CAST is supported
55            try_supported: true,
56            // DataFusion uses EXCEPT for column exclusion: SELECT * EXCEPT(col)
57            star_except: "EXCEPT",
58            // No multi-arg DISTINCT: COUNT(DISTINCT a, b) not supported
59            multi_arg_distinct: false,
60            // Window EXCLUDE not supported
61            supports_window_exclude: false,
62            // Interval allows plural form (DAYS, HOURS, etc.)
63            interval_allows_plural_form: true,
64            // Normalize date parts in EXTRACT
65            normalize_extract_date_parts: true,
66            // LIMIT style (not FETCH)
67            limit_fetch_style: LimitFetchStyle::Limit,
68            // No hints
69            join_hints: false,
70            table_hints: false,
71            query_hints: false,
72            // LEFT SEMI JOIN / LEFT ANTI JOIN syntax
73            semi_anti_join_with_side: true,
74            // COPY does not use INTO keyword
75            copy_has_into_keyword: false,
76            // NVL2 is supported (via coalesce-like behavior)
77            nvl2_supported: true,
78            // MEDIAN is supported
79            supports_median: true,
80            // Can implement array_any
81            can_implement_array_any: true,
82            // LIKE quantifiers not supported
83            supports_like_quantifiers: false,
84            // Aggregate FILTER is supported
85            aggregate_filter_supported: true,
86            // BETWEEN flags not supported
87            supports_between_flags: false,
88            ..Default::default()
89        }
90    }
91
92    #[cfg(feature = "transpile")]
93
94    fn transform_expr(&self, expr: Expression) -> Result<Expression> {
95        match expr {
96            // Function transformations
97            Expression::Function(f) => self.transform_function(*f),
98
99            // Aggregate function transformations
100            Expression::AggregateFunction(f) => self.transform_aggregate_function(f),
101
102            // Pass through everything else
103            _ => Ok(expr),
104        }
105    }
106}
107
108#[cfg(feature = "transpile")]
109impl DataFusionDialect {
110    fn transform_function(&self, f: Function) -> Result<Expression> {
111        let name_upper = f.name.to_uppercase();
112        match name_upper.as_str() {
113            // IFNULL → COALESCE (DataFusion uses COALESCE)
114            "IFNULL" => Ok(Expression::Function(Box::new(Function::new(
115                "coalesce".to_string(),
116                f.args,
117            )))),
118
119            // SQUARE(x) → POWER(x, 2)
120            "SQUARE" => {
121                let mut args = f.args;
122                args.push(Expression::Literal(Box::new(
123                    crate::expressions::Literal::Number("2".to_string()),
124                )));
125                Ok(Expression::Function(Box::new(Function::new(
126                    "power".to_string(),
127                    args,
128                ))))
129            }
130
131            // REGEXP_MATCHES → REGEXP_MATCH
132            "REGEXP_MATCHES" => Ok(Expression::Function(Box::new(Function::new(
133                "regexp_match".to_string(),
134                f.args,
135            )))),
136
137            // DATE_FORMAT / TIME_TO_STR / STRFTIME → TO_CHAR
138            "DATE_FORMAT" | "TIME_TO_STR" => Ok(Expression::Function(Box::new(Function::new(
139                "to_char".to_string(),
140                f.args,
141            )))),
142
143            // Pass through everything else
144            _ => Ok(Expression::Function(Box::new(f))),
145        }
146    }
147
148    fn transform_aggregate_function(
149        &self,
150        f: Box<crate::expressions::AggregateFunction>,
151    ) -> Result<Expression> {
152        let name_upper = f.name.to_uppercase();
153        match name_upper.as_str() {
154            // GROUP_CONCAT → STRING_AGG
155            "GROUP_CONCAT" => Ok(Expression::Function(Box::new(Function::new(
156                "string_agg".to_string(),
157                f.args,
158            )))),
159
160            // LISTAGG → STRING_AGG
161            "LISTAGG" => Ok(Expression::Function(Box::new(Function::new(
162                "string_agg".to_string(),
163                f.args,
164            )))),
165
166            // Pass through everything else
167            _ => Ok(Expression::AggregateFunction(f)),
168        }
169    }
170}