Skip to main content

polyglot_sql/dialects/
exasol.rs

1//! Exasol SQL Dialect
2//!
3//! Exasol-specific SQL dialect based on sqlglot patterns.
4//!
5//! References:
6//! - SQL Reference: https://docs.exasol.com/db/latest/sql_references/basiclanguageelements.htm
7//! - Data Types: https://docs.exasol.com/db/latest/sql_references/data_types/datatypesoverview.htm
8//! - Functions: https://docs.exasol.com/db/latest/sql_references/functions/
9//!
10//! Key characteristics:
11//! - Uppercase normalization for identifiers
12//! - Identifiers: double quotes or square brackets
13//! - Date functions: ADD_DAYS, ADD_MONTHS, DAYS_BETWEEN, MONTHS_BETWEEN
14//! - Bitwise: BIT_AND, BIT_OR, BIT_XOR, BIT_NOT, BIT_LSHIFT, BIT_RSHIFT
15//! - Functions: ZEROIFNULL, NULLIFZERO, SYSTIMESTAMP
16//! - EVERY for ALL aggregate
17//! - No SEMI/ANTI join support
18//! - DATE_TRUNC for date truncation
19//! - IF...THEN...ELSE...ENDIF syntax
20
21use super::{DialectImpl, DialectType};
22use crate::error::Result;
23use crate::expressions::{Expression, Function, ListAggFunc, Literal, VarArgFunc};
24use crate::generator::GeneratorConfig;
25use crate::tokens::TokenizerConfig;
26
27/// Exasol dialect
28pub struct ExasolDialect;
29
30impl DialectImpl for ExasolDialect {
31    fn dialect_type(&self) -> DialectType {
32        DialectType::Exasol
33    }
34
35    fn tokenizer_config(&self) -> TokenizerConfig {
36        let mut config = TokenizerConfig::default();
37        // Exasol uses double quotes for identifiers
38        config.identifiers.insert('"', '"');
39        // Also supports square brackets
40        config.identifiers.insert('[', ']');
41        config
42    }
43
44    fn generator_config(&self) -> GeneratorConfig {
45        use crate::generator::IdentifierQuoteStyle;
46        GeneratorConfig {
47            identifier_quote: '"',
48            identifier_quote_style: IdentifierQuoteStyle::DOUBLE_QUOTE,
49            dialect: Some(DialectType::Exasol),
50            supports_column_join_marks: true,
51            // Exasol uses lowercase for window frame keywords (rows, preceding, following)
52            lowercase_window_frame_keywords: true,
53            ..Default::default()
54        }
55    }
56
57    fn transform_expr(&self, expr: Expression) -> Result<Expression> {
58        match expr {
59            // SYSTIMESTAMP -> SYSTIMESTAMP() (with parentheses in Exasol)
60            Expression::Systimestamp(_) => Ok(Expression::Function(Box::new(Function::new(
61                "SYSTIMESTAMP".to_string(),
62                vec![],
63            )))),
64
65            // WeekOfYear -> WEEK
66            Expression::WeekOfYear(f) => Ok(Expression::Function(Box::new(Function::new(
67                "WEEK".to_string(),
68                vec![f.this],
69            )))),
70
71            // COALESCE is native, but also support transformations from other forms
72            Expression::Nvl(f) => Ok(Expression::Coalesce(Box::new(VarArgFunc {
73                original_name: None,
74                expressions: vec![f.this, f.expression],
75            }))),
76
77            Expression::IfNull(f) => Ok(Expression::Coalesce(Box::new(VarArgFunc {
78                original_name: None,
79                expressions: vec![f.this, f.expression],
80            }))),
81
82            // Bitwise operations → BIT_* functions
83            Expression::BitwiseAnd(op) => Ok(Expression::Function(Box::new(Function::new(
84                "BIT_AND".to_string(),
85                vec![op.left, op.right],
86            )))),
87
88            Expression::BitwiseOr(op) => Ok(Expression::Function(Box::new(Function::new(
89                "BIT_OR".to_string(),
90                vec![op.left, op.right],
91            )))),
92
93            Expression::BitwiseXor(op) => Ok(Expression::Function(Box::new(Function::new(
94                "BIT_XOR".to_string(),
95                vec![op.left, op.right],
96            )))),
97
98            Expression::BitwiseNot(f) => Ok(Expression::Function(Box::new(Function::new(
99                "BIT_NOT".to_string(),
100                vec![f.this],
101            )))),
102
103            Expression::BitwiseLeftShift(op) => Ok(Expression::Function(Box::new(Function::new(
104                "BIT_LSHIFT".to_string(),
105                vec![op.left, op.right],
106            )))),
107
108            Expression::BitwiseRightShift(op) => Ok(Expression::Function(Box::new(Function::new(
109                "BIT_RSHIFT".to_string(),
110                vec![op.left, op.right],
111            )))),
112
113            // Modulo → MOD function
114            Expression::Mod(op) => Ok(Expression::Function(Box::new(Function::new(
115                "MOD".to_string(),
116                vec![op.left, op.right],
117            )))),
118
119            // GROUP_CONCAT -> LISTAGG in Exasol (with WITHIN GROUP for ORDER BY)
120            Expression::GroupConcat(f) => Ok(Expression::ListAgg(Box::new(ListAggFunc {
121                this: f.this,
122                separator: f.separator,
123                on_overflow: None,
124                order_by: f.order_by,
125                distinct: f.distinct,
126                filter: f.filter,
127            }))),
128
129            // Generic function transformations
130            Expression::Function(f) => self.transform_function(*f),
131
132            // Aggregate function transformations
133            Expression::AggregateFunction(f) => self.transform_aggregate_function(f),
134
135            // Pass through everything else
136            _ => Ok(expr),
137        }
138    }
139}
140
141impl ExasolDialect {
142    fn transform_function(&self, f: Function) -> Result<Expression> {
143        let name_upper = f.name.to_uppercase();
144        match name_upper.as_str() {
145            // SYSTIMESTAMP -> SYSTIMESTAMP() (with parentheses in Exasol)
146            // Exasol requires parentheses even for no-arg functions
147            // Preserve any arguments (like precision)
148            "SYSTIMESTAMP" => Ok(Expression::Function(Box::new(Function::new(
149                "SYSTIMESTAMP".to_string(),
150                f.args,
151            )))),
152
153            // ALL → EVERY
154            "ALL" => Ok(Expression::Function(Box::new(Function::new(
155                "EVERY".to_string(),
156                f.args,
157            )))),
158
159            // IFNULL/ISNULL/NVL → COALESCE (native in Exasol)
160            "IFNULL" | "ISNULL" | "NVL" if f.args.len() == 2 => {
161                Ok(Expression::Coalesce(Box::new(VarArgFunc {
162                    original_name: None,
163                    expressions: f.args,
164                })))
165            }
166
167            // DateDiff → DAYS_BETWEEN (for DAY unit) or other *_BETWEEN functions
168            "DATEDIFF" => Ok(Expression::Function(Box::new(Function::new(
169                "DAYS_BETWEEN".to_string(),
170                f.args,
171            )))),
172
173            // DateAdd → ADD_DAYS (for DAY unit) or other ADD_* functions
174            "DATEADD" | "DATE_ADD" => Ok(Expression::Function(Box::new(Function::new(
175                "ADD_DAYS".to_string(),
176                f.args,
177            )))),
178
179            // DateSub → Negate and use ADD_DAYS
180            "DATESUB" | "DATE_SUB" => {
181                // Would need to negate the interval, for now just use ADD_DAYS
182                Ok(Expression::Function(Box::new(Function::new(
183                    "ADD_DAYS".to_string(),
184                    f.args,
185                ))))
186            }
187
188            // DATE_TRUNC is native
189            "DATE_TRUNC" | "TRUNC" => Ok(Expression::Function(Box::new(f))),
190
191            // LEVENSHTEIN → EDIT_DISTANCE
192            "LEVENSHTEIN" | "LEVENSHTEIN_DISTANCE" => Ok(Expression::Function(Box::new(
193                Function::new("EDIT_DISTANCE".to_string(), f.args),
194            ))),
195
196            // REGEXP_EXTRACT → REGEXP_SUBSTR
197            "REGEXP_EXTRACT" => Ok(Expression::Function(Box::new(Function::new(
198                "REGEXP_SUBSTR".to_string(),
199                f.args,
200            )))),
201
202            // SHA/SHA1 → HASH_SHA
203            "SHA" | "SHA1" => Ok(Expression::Function(Box::new(Function::new(
204                "HASH_SHA".to_string(),
205                f.args,
206            )))),
207
208            // MD5 → HASH_MD5
209            "MD5" => Ok(Expression::Function(Box::new(Function::new(
210                "HASH_MD5".to_string(),
211                f.args,
212            )))),
213
214            // SHA256 → HASH_SHA256
215            "SHA256" | "SHA2" => {
216                // SHA2 in some dialects takes a length parameter
217                // HASH_SHA256 in Exasol just takes the value
218                let arg = f
219                    .args
220                    .into_iter()
221                    .next()
222                    .unwrap_or(Expression::Null(crate::expressions::Null));
223                Ok(Expression::Function(Box::new(Function::new(
224                    "HASH_SHA256".to_string(),
225                    vec![arg],
226                ))))
227            }
228
229            // SHA512 → HASH_SHA512
230            "SHA512" => Ok(Expression::Function(Box::new(Function::new(
231                "HASH_SHA512".to_string(),
232                f.args,
233            )))),
234
235            // VAR_POP is native
236            "VAR_POP" | "VARIANCE_POP" => Ok(Expression::Function(Box::new(Function::new(
237                "VAR_POP".to_string(),
238                f.args,
239            )))),
240
241            // APPROX_DISTINCT → APPROXIMATE_COUNT_DISTINCT
242            "APPROX_DISTINCT" | "APPROX_COUNT_DISTINCT" => Ok(Expression::Function(Box::new(
243                Function::new("APPROXIMATE_COUNT_DISTINCT".to_string(), f.args),
244            ))),
245
246            // TO_CHAR is native for date formatting
247            "TO_CHAR" | "DATE_FORMAT" | "STRFTIME" => Ok(Expression::Function(Box::new(
248                Function::new("TO_CHAR".to_string(), f.args),
249            ))),
250
251            // TO_DATE is native but format specifiers need uppercasing
252            "TO_DATE" => {
253                if f.args.len() >= 2 {
254                    // Uppercase format string if present
255                    let mut new_args = f.args.clone();
256                    if let Expression::Literal(Literal::String(fmt)) = &f.args[1] {
257                        new_args[1] = Expression::Literal(Literal::String(
258                            Self::uppercase_exasol_format(fmt),
259                        ));
260                    }
261                    Ok(Expression::Function(Box::new(Function::new(
262                        "TO_DATE".to_string(),
263                        new_args,
264                    ))))
265                } else {
266                    Ok(Expression::Function(Box::new(f)))
267                }
268            }
269
270            // TIME_TO_STR -> TO_CHAR with format conversion
271            "TIME_TO_STR" => {
272                if f.args.len() >= 2 {
273                    let mut new_args = vec![f.args[0].clone()];
274                    if let Expression::Literal(Literal::String(fmt)) = &f.args[1] {
275                        new_args.push(Expression::Literal(Literal::String(
276                            Self::convert_strptime_to_exasol_format(fmt),
277                        )));
278                    } else {
279                        new_args.push(f.args[1].clone());
280                    }
281                    Ok(Expression::Function(Box::new(Function::new(
282                        "TO_CHAR".to_string(),
283                        new_args,
284                    ))))
285                } else {
286                    Ok(Expression::Function(Box::new(Function::new(
287                        "TO_CHAR".to_string(),
288                        f.args,
289                    ))))
290                }
291            }
292
293            // STR_TO_TIME -> TO_DATE with format conversion
294            "STR_TO_TIME" => {
295                if f.args.len() >= 2 {
296                    let mut new_args = vec![f.args[0].clone()];
297                    if let Expression::Literal(Literal::String(fmt)) = &f.args[1] {
298                        new_args.push(Expression::Literal(Literal::String(
299                            Self::convert_strptime_to_exasol_format(fmt),
300                        )));
301                    } else {
302                        new_args.push(f.args[1].clone());
303                    }
304                    Ok(Expression::Function(Box::new(Function::new(
305                        "TO_DATE".to_string(),
306                        new_args,
307                    ))))
308                } else {
309                    Ok(Expression::Function(Box::new(Function::new(
310                        "TO_DATE".to_string(),
311                        f.args,
312                    ))))
313                }
314            }
315
316            // TO_TIMESTAMP is native
317            "TO_TIMESTAMP" => Ok(Expression::Function(Box::new(f))),
318
319            // CONVERT_TZ for timezone conversion
320            "CONVERT_TIMEZONE" | "AT_TIME_ZONE" => Ok(Expression::Function(Box::new(
321                Function::new("CONVERT_TZ".to_string(), f.args),
322            ))),
323
324            // STRPOS/POSITION → INSTR
325            "STRPOS" | "POSITION" | "CHARINDEX" | "LOCATE" => Ok(Expression::Function(Box::new(
326                Function::new("INSTR".to_string(), f.args),
327            ))),
328
329            // WEEK_OF_YEAR → WEEK
330            "WEEK_OF_YEAR" | "WEEKOFYEAR" => Ok(Expression::Function(Box::new(Function::new(
331                "WEEK".to_string(),
332                f.args,
333            )))),
334
335            // LAST_DAY is not native, would need complex transformation
336            "LAST_DAY" => {
337                // Exasol doesn't have LAST_DAY, but we can compute it
338                // For now, pass through
339                Ok(Expression::Function(Box::new(f)))
340            }
341
342            // Pass through everything else
343            _ => Ok(Expression::Function(Box::new(f))),
344        }
345    }
346
347    fn transform_aggregate_function(
348        &self,
349        f: Box<crate::expressions::AggregateFunction>,
350    ) -> Result<Expression> {
351        let name_upper = f.name.to_uppercase();
352        match name_upper.as_str() {
353            // ALL → EVERY
354            "ALL" | "EVERY" => Ok(Expression::Function(Box::new(Function::new(
355                "EVERY".to_string(),
356                f.args,
357            )))),
358
359            // GROUP_CONCAT / STRING_AGG → LISTAGG (native with WITHIN GROUP)
360            "GROUP_CONCAT" | "STRING_AGG" => Ok(Expression::Function(Box::new(Function::new(
361                "LISTAGG".to_string(),
362                f.args,
363            )))),
364
365            // LISTAGG is native
366            "LISTAGG" => Ok(Expression::AggregateFunction(f)),
367
368            // APPROX_DISTINCT → APPROXIMATE_COUNT_DISTINCT
369            "APPROX_DISTINCT" | "APPROX_COUNT_DISTINCT" => Ok(Expression::Function(Box::new(
370                Function::new("APPROXIMATE_COUNT_DISTINCT".to_string(), f.args),
371            ))),
372
373            // Pass through everything else
374            _ => Ok(Expression::AggregateFunction(f)),
375        }
376    }
377
378    /// Convert strptime format string to Exasol format string
379    /// Exasol TIME_MAPPING (reverse of Python sqlglot):
380    /// %Y -> YYYY, %y -> YY, %m -> MM, %d -> DD, %H -> HH, %M -> MI, %S -> SS, %a -> DY
381    fn convert_strptime_to_exasol_format(format: &str) -> String {
382        let mut result = String::new();
383        let chars: Vec<char> = format.chars().collect();
384        let mut i = 0;
385        while i < chars.len() {
386            if chars[i] == '%' && i + 1 < chars.len() {
387                let spec = chars[i + 1];
388                let exasol_spec = match spec {
389                    'Y' => "YYYY",
390                    'y' => "YY",
391                    'm' => "MM",
392                    'd' => "DD",
393                    'H' => "HH",
394                    'M' => "MI",
395                    'S' => "SS",
396                    'a' => "DY",    // abbreviated weekday name
397                    'A' => "DAY",   // full weekday name
398                    'b' => "MON",   // abbreviated month name
399                    'B' => "MONTH", // full month name
400                    'I' => "H12",   // 12-hour format
401                    'u' => "ID",    // ISO weekday (1-7)
402                    'V' => "IW",    // ISO week number
403                    'G' => "IYYY",  // ISO year
404                    'W' => "UW",    // Week number (Monday as first day)
405                    'U' => "UW",    // Week number (Sunday as first day)
406                    'z' => "Z",     // timezone offset
407                    _ => {
408                        // Unknown specifier, keep as-is
409                        result.push('%');
410                        result.push(spec);
411                        i += 2;
412                        continue;
413                    }
414                };
415                result.push_str(exasol_spec);
416                i += 2;
417            } else {
418                result.push(chars[i]);
419                i += 1;
420            }
421        }
422        result
423    }
424
425    /// Uppercase Exasol format specifiers (DD, MM, YYYY, etc.)
426    /// Converts lowercase format strings like 'dd-mm-yyyy' to 'DD-MM-YYYY'
427    fn uppercase_exasol_format(format: &str) -> String {
428        // Exasol format specifiers are always uppercase
429        format.to_uppercase()
430    }
431}
432
433// Note: Exasol type mappings (handled in generator if needed):
434// - BLOB, LONGBLOB, etc. → VARCHAR
435// - TEXT → LONG VARCHAR
436// - VARBINARY → VARCHAR
437// - TINYINT → SMALLINT
438// - MEDIUMINT → INT
439// - DECIMAL32/64/128/256 → DECIMAL
440// - DATETIME → TIMESTAMP
441// - TIMESTAMPTZ/TIMESTAMPLTZ/TIMESTAMPNTZ → TIMESTAMP
442//
443// Exasol also supports:
444// - TIMESTAMP WITH LOCAL TIME ZONE (fixed precision of 3)
445// - IF...THEN...ELSE...ENDIF syntax for conditionals