polyglot_sql/dialects/datafusion.rs
1//! Apache DataFusion SQL Dialect
2//!
3//! DataFusion is an Arrow-based query engine with modern SQL extensions.
4//! Reference: https://datafusion.apache.org/user-guide/sql/
5//!
6//! Key characteristics:
7//! - Arrow-native type system (Int8, Int16, Int32, Int64, Float32, Float64, Utf8, etc.)
8//! - Double-quote identifiers
9//! - Lowercase function names by default
10//! - QUALIFY clause support
11//! - EXCEPT for column exclusion (SELECT * EXCEPT(col))
12//! - LEFT SEMI JOIN / LEFT ANTI JOIN syntax
13//! - TRY_CAST support
14//! - Pipe operator (|>) for query chaining
15//! - No UPDATE/DELETE support
16//! - arrow_cast() and arrow_typeof() functions
17//! - COPY ... TO syntax (no INTO keyword)
18//! - Nested comment support
19
20use super::{DialectImpl, DialectType};
21use crate::error::Result;
22use crate::expressions::{Expression, Function};
23use crate::generator::GeneratorConfig;
24use crate::tokens::TokenizerConfig;
25
26/// Apache DataFusion dialect
27pub struct DataFusionDialect;
28
29impl DialectImpl for DataFusionDialect {
30 fn dialect_type(&self) -> DialectType {
31 DialectType::DataFusion
32 }
33
34 fn tokenizer_config(&self) -> TokenizerConfig {
35 let mut config = TokenizerConfig::default();
36 // DataFusion uses double quotes for identifiers
37 config.identifiers.insert('"', '"');
38 // DataFusion supports nested comments
39 config.nested_comments = true;
40 config
41 }
42
43 fn generator_config(&self) -> GeneratorConfig {
44 use crate::generator::{IdentifierQuoteStyle, LimitFetchStyle, NormalizeFunctions};
45 GeneratorConfig {
46 identifier_quote: '"',
47 identifier_quote_style: IdentifierQuoteStyle::DOUBLE_QUOTE,
48 dialect: Some(DialectType::DataFusion),
49 // DataFusion lowercases function names
50 normalize_functions: NormalizeFunctions::Lower,
51 // TRY_CAST is supported
52 try_supported: true,
53 // DataFusion uses EXCEPT for column exclusion: SELECT * EXCEPT(col)
54 star_except: "EXCEPT",
55 // No multi-arg DISTINCT: COUNT(DISTINCT a, b) not supported
56 multi_arg_distinct: false,
57 // Window EXCLUDE not supported
58 supports_window_exclude: false,
59 // Interval allows plural form (DAYS, HOURS, etc.)
60 interval_allows_plural_form: true,
61 // Normalize date parts in EXTRACT
62 normalize_extract_date_parts: true,
63 // LIMIT style (not FETCH)
64 limit_fetch_style: LimitFetchStyle::Limit,
65 // No hints
66 join_hints: false,
67 table_hints: false,
68 query_hints: false,
69 // LEFT SEMI JOIN / LEFT ANTI JOIN syntax
70 semi_anti_join_with_side: true,
71 // COPY does not use INTO keyword
72 copy_has_into_keyword: false,
73 // NVL2 is supported (via coalesce-like behavior)
74 nvl2_supported: true,
75 // MEDIAN is supported
76 supports_median: true,
77 // Can implement array_any
78 can_implement_array_any: true,
79 // LIKE quantifiers not supported
80 supports_like_quantifiers: false,
81 // Aggregate FILTER is supported
82 aggregate_filter_supported: true,
83 // BETWEEN flags not supported
84 supports_between_flags: false,
85 ..Default::default()
86 }
87 }
88
89 fn transform_expr(&self, expr: Expression) -> Result<Expression> {
90 match expr {
91 // Function transformations
92 Expression::Function(f) => self.transform_function(*f),
93
94 // Aggregate function transformations
95 Expression::AggregateFunction(f) => self.transform_aggregate_function(f),
96
97 // Pass through everything else
98 _ => Ok(expr),
99 }
100 }
101}
102
103impl DataFusionDialect {
104 fn transform_function(&self, f: Function) -> Result<Expression> {
105 let name_upper = f.name.to_uppercase();
106 match name_upper.as_str() {
107 // IFNULL → COALESCE (DataFusion uses COALESCE)
108 "IFNULL" => Ok(Expression::Function(Box::new(Function::new(
109 "coalesce".to_string(),
110 f.args,
111 )))),
112
113 // SQUARE(x) → POWER(x, 2)
114 "SQUARE" => {
115 let mut args = f.args;
116 args.push(Expression::Literal(crate::expressions::Literal::Number(
117 "2".to_string(),
118 )));
119 Ok(Expression::Function(Box::new(Function::new(
120 "power".to_string(),
121 args,
122 ))))
123 }
124
125 // REGEXP_MATCHES → REGEXP_MATCH
126 "REGEXP_MATCHES" => Ok(Expression::Function(Box::new(Function::new(
127 "regexp_match".to_string(),
128 f.args,
129 )))),
130
131 // DATE_FORMAT / TIME_TO_STR / STRFTIME → TO_CHAR
132 "DATE_FORMAT" | "TIME_TO_STR" => Ok(Expression::Function(Box::new(Function::new(
133 "to_char".to_string(),
134 f.args,
135 )))),
136
137 // Pass through everything else
138 _ => Ok(Expression::Function(Box::new(f))),
139 }
140 }
141
142 fn transform_aggregate_function(
143 &self,
144 f: Box<crate::expressions::AggregateFunction>,
145 ) -> Result<Expression> {
146 let name_upper = f.name.to_uppercase();
147 match name_upper.as_str() {
148 // GROUP_CONCAT → STRING_AGG
149 "GROUP_CONCAT" => Ok(Expression::Function(Box::new(Function::new(
150 "string_agg".to_string(),
151 f.args,
152 )))),
153
154 // LISTAGG → STRING_AGG
155 "LISTAGG" => Ok(Expression::Function(Box::new(Function::new(
156 "string_agg".to_string(),
157 f.args,
158 )))),
159
160 // Pass through everything else
161 _ => Ok(Expression::AggregateFunction(f)),
162 }
163 }
164}