Skip to main content

polyglot_sql/dialects/
exasol.rs

1//! Exasol SQL Dialect
2//!
3//! Exasol-specific SQL dialect based on sqlglot patterns.
4//!
5//! References:
6//! - SQL Reference: https://docs.exasol.com/db/latest/sql_references/basiclanguageelements.htm
7//! - Data Types: https://docs.exasol.com/db/latest/sql_references/data_types/datatypesoverview.htm
8//! - Functions: https://docs.exasol.com/db/latest/sql_references/functions/
9//!
10//! Key characteristics:
11//! - Uppercase normalization for identifiers
12//! - Identifiers: double quotes or square brackets
13//! - Date functions: ADD_DAYS, ADD_MONTHS, DAYS_BETWEEN, MONTHS_BETWEEN
14//! - Bitwise: BIT_AND, BIT_OR, BIT_XOR, BIT_NOT, BIT_LSHIFT, BIT_RSHIFT
15//! - Functions: ZEROIFNULL, NULLIFZERO, SYSTIMESTAMP
16//! - EVERY for ALL aggregate
17//! - No SEMI/ANTI join support
18//! - DATE_TRUNC for date truncation
19//! - IF...THEN...ELSE...ENDIF syntax
20
21use super::{DialectImpl, DialectType};
22use crate::error::Result;
23use crate::expressions::{Expression, Function, ListAggFunc, Literal, VarArgFunc};
24use crate::generator::GeneratorConfig;
25use crate::tokens::TokenizerConfig;
26
27/// Exasol dialect
28pub struct ExasolDialect;
29
30impl DialectImpl for ExasolDialect {
31    fn dialect_type(&self) -> DialectType {
32        DialectType::Exasol
33    }
34
35    fn tokenizer_config(&self) -> TokenizerConfig {
36        let mut config = TokenizerConfig::default();
37        // Exasol uses double quotes for identifiers
38        config.identifiers.insert('"', '"');
39        // Also supports square brackets
40        config.identifiers.insert('[', ']');
41        config
42    }
43
44    fn generator_config(&self) -> GeneratorConfig {
45        use crate::generator::IdentifierQuoteStyle;
46        GeneratorConfig {
47            identifier_quote: '"',
48            identifier_quote_style: IdentifierQuoteStyle::DOUBLE_QUOTE,
49            dialect: Some(DialectType::Exasol),
50            supports_column_join_marks: true,
51            // Exasol uses lowercase for window frame keywords (rows, preceding, following)
52            lowercase_window_frame_keywords: true,
53            ..Default::default()
54        }
55    }
56
57    fn transform_expr(&self, expr: Expression) -> Result<Expression> {
58        match expr {
59            // SYSTIMESTAMP -> SYSTIMESTAMP() (with parentheses in Exasol)
60            Expression::Systimestamp(_) => Ok(Expression::Function(Box::new(Function::new(
61                "SYSTIMESTAMP".to_string(),
62                vec![],
63            )))),
64
65            // WeekOfYear -> WEEK
66            Expression::WeekOfYear(f) => Ok(Expression::Function(Box::new(Function::new(
67                "WEEK".to_string(),
68                vec![f.this],
69            )))),
70
71            // COALESCE is native, but also support transformations from other forms
72            Expression::Nvl(f) => Ok(Expression::Coalesce(Box::new(VarArgFunc {
73                original_name: None,
74                expressions: vec![f.this, f.expression],
75                inferred_type: None,
76            }))),
77
78            Expression::IfNull(f) => Ok(Expression::Coalesce(Box::new(VarArgFunc {
79                original_name: None,
80                expressions: vec![f.this, f.expression],
81                inferred_type: None,
82            }))),
83
84            // Bitwise operations → BIT_* functions
85            Expression::BitwiseAnd(op) => Ok(Expression::Function(Box::new(Function::new(
86                "BIT_AND".to_string(),
87                vec![op.left, op.right],
88            )))),
89
90            Expression::BitwiseOr(op) => Ok(Expression::Function(Box::new(Function::new(
91                "BIT_OR".to_string(),
92                vec![op.left, op.right],
93            )))),
94
95            Expression::BitwiseXor(op) => Ok(Expression::Function(Box::new(Function::new(
96                "BIT_XOR".to_string(),
97                vec![op.left, op.right],
98            )))),
99
100            Expression::BitwiseNot(f) => Ok(Expression::Function(Box::new(Function::new(
101                "BIT_NOT".to_string(),
102                vec![f.this],
103            )))),
104
105            Expression::BitwiseLeftShift(op) => Ok(Expression::Function(Box::new(Function::new(
106                "BIT_LSHIFT".to_string(),
107                vec![op.left, op.right],
108            )))),
109
110            Expression::BitwiseRightShift(op) => Ok(Expression::Function(Box::new(Function::new(
111                "BIT_RSHIFT".to_string(),
112                vec![op.left, op.right],
113            )))),
114
115            // Modulo → MOD function
116            Expression::Mod(op) => Ok(Expression::Function(Box::new(Function::new(
117                "MOD".to_string(),
118                vec![op.left, op.right],
119            )))),
120
121            // GROUP_CONCAT -> LISTAGG in Exasol (with WITHIN GROUP for ORDER BY)
122            Expression::GroupConcat(f) => Ok(Expression::ListAgg(Box::new(ListAggFunc {
123                this: f.this,
124                separator: f.separator,
125                on_overflow: None,
126                order_by: f.order_by,
127                distinct: f.distinct,
128                filter: f.filter,
129                inferred_type: None,
130            }))),
131
132            // Generic function transformations
133            Expression::Function(f) => self.transform_function(*f),
134
135            // Aggregate function transformations
136            Expression::AggregateFunction(f) => self.transform_aggregate_function(f),
137
138            // Pass through everything else
139            _ => Ok(expr),
140        }
141    }
142}
143
144impl ExasolDialect {
145    fn transform_function(&self, f: Function) -> Result<Expression> {
146        let name_upper = f.name.to_uppercase();
147        match name_upper.as_str() {
148            // SYSTIMESTAMP -> SYSTIMESTAMP() (with parentheses in Exasol)
149            // Exasol requires parentheses even for no-arg functions
150            // Preserve any arguments (like precision)
151            "SYSTIMESTAMP" => Ok(Expression::Function(Box::new(Function::new(
152                "SYSTIMESTAMP".to_string(),
153                f.args,
154            )))),
155
156            // ALL → EVERY
157            "ALL" => Ok(Expression::Function(Box::new(Function::new(
158                "EVERY".to_string(),
159                f.args,
160            )))),
161
162            // IFNULL/ISNULL/NVL → COALESCE (native in Exasol)
163            "IFNULL" | "ISNULL" | "NVL" if f.args.len() == 2 => {
164                Ok(Expression::Coalesce(Box::new(VarArgFunc {
165                    original_name: None,
166                    expressions: f.args,
167                    inferred_type: None,
168                })))
169            }
170
171            // DateDiff → DAYS_BETWEEN (for DAY unit) or other *_BETWEEN functions
172            "DATEDIFF" => Ok(Expression::Function(Box::new(Function::new(
173                "DAYS_BETWEEN".to_string(),
174                f.args,
175            )))),
176
177            // DateAdd → ADD_DAYS (for DAY unit) or other ADD_* functions
178            "DATEADD" | "DATE_ADD" => Ok(Expression::Function(Box::new(Function::new(
179                "ADD_DAYS".to_string(),
180                f.args,
181            )))),
182
183            // DateSub → Negate and use ADD_DAYS
184            "DATESUB" | "DATE_SUB" => {
185                // Would need to negate the interval, for now just use ADD_DAYS
186                Ok(Expression::Function(Box::new(Function::new(
187                    "ADD_DAYS".to_string(),
188                    f.args,
189                ))))
190            }
191
192            // DATE_TRUNC is native
193            "DATE_TRUNC" | "TRUNC" => Ok(Expression::Function(Box::new(f))),
194
195            // LEVENSHTEIN → EDIT_DISTANCE
196            "LEVENSHTEIN" | "LEVENSHTEIN_DISTANCE" => Ok(Expression::Function(Box::new(
197                Function::new("EDIT_DISTANCE".to_string(), f.args),
198            ))),
199
200            // REGEXP_EXTRACT → REGEXP_SUBSTR
201            "REGEXP_EXTRACT" => Ok(Expression::Function(Box::new(Function::new(
202                "REGEXP_SUBSTR".to_string(),
203                f.args,
204            )))),
205
206            // SHA/SHA1 → HASH_SHA
207            "SHA" | "SHA1" => Ok(Expression::Function(Box::new(Function::new(
208                "HASH_SHA".to_string(),
209                f.args,
210            )))),
211
212            // MD5 → HASH_MD5
213            "MD5" => Ok(Expression::Function(Box::new(Function::new(
214                "HASH_MD5".to_string(),
215                f.args,
216            )))),
217
218            // SHA256 → HASH_SHA256
219            "SHA256" | "SHA2" => {
220                // SHA2 in some dialects takes a length parameter
221                // HASH_SHA256 in Exasol just takes the value
222                let arg = f
223                    .args
224                    .into_iter()
225                    .next()
226                    .unwrap_or(Expression::Null(crate::expressions::Null));
227                Ok(Expression::Function(Box::new(Function::new(
228                    "HASH_SHA256".to_string(),
229                    vec![arg],
230                ))))
231            }
232
233            // SHA512 → HASH_SHA512
234            "SHA512" => Ok(Expression::Function(Box::new(Function::new(
235                "HASH_SHA512".to_string(),
236                f.args,
237            )))),
238
239            // VAR_POP is native
240            "VAR_POP" | "VARIANCE_POP" => Ok(Expression::Function(Box::new(Function::new(
241                "VAR_POP".to_string(),
242                f.args,
243            )))),
244
245            // APPROX_DISTINCT → APPROXIMATE_COUNT_DISTINCT
246            "APPROX_DISTINCT" | "APPROX_COUNT_DISTINCT" => Ok(Expression::Function(Box::new(
247                Function::new("APPROXIMATE_COUNT_DISTINCT".to_string(), f.args),
248            ))),
249
250            // TO_CHAR is native for date formatting
251            "TO_CHAR" | "DATE_FORMAT" | "STRFTIME" => Ok(Expression::Function(Box::new(
252                Function::new("TO_CHAR".to_string(), f.args),
253            ))),
254
255            // TO_DATE is native but format specifiers need uppercasing
256            "TO_DATE" => {
257                if f.args.len() >= 2 {
258                    // Uppercase format string if present
259                    let mut new_args = f.args.clone();
260                    if let Expression::Literal(Literal::String(fmt)) = &f.args[1] {
261                        new_args[1] = Expression::Literal(Literal::String(
262                            Self::uppercase_exasol_format(fmt),
263                        ));
264                    }
265                    Ok(Expression::Function(Box::new(Function::new(
266                        "TO_DATE".to_string(),
267                        new_args,
268                    ))))
269                } else {
270                    Ok(Expression::Function(Box::new(f)))
271                }
272            }
273
274            // TIME_TO_STR -> TO_CHAR with format conversion
275            "TIME_TO_STR" => {
276                if f.args.len() >= 2 {
277                    let mut new_args = vec![f.args[0].clone()];
278                    if let Expression::Literal(Literal::String(fmt)) = &f.args[1] {
279                        new_args.push(Expression::Literal(Literal::String(
280                            Self::convert_strptime_to_exasol_format(fmt),
281                        )));
282                    } else {
283                        new_args.push(f.args[1].clone());
284                    }
285                    Ok(Expression::Function(Box::new(Function::new(
286                        "TO_CHAR".to_string(),
287                        new_args,
288                    ))))
289                } else {
290                    Ok(Expression::Function(Box::new(Function::new(
291                        "TO_CHAR".to_string(),
292                        f.args,
293                    ))))
294                }
295            }
296
297            // STR_TO_TIME -> TO_DATE with format conversion
298            "STR_TO_TIME" => {
299                if f.args.len() >= 2 {
300                    let mut new_args = vec![f.args[0].clone()];
301                    if let Expression::Literal(Literal::String(fmt)) = &f.args[1] {
302                        new_args.push(Expression::Literal(Literal::String(
303                            Self::convert_strptime_to_exasol_format(fmt),
304                        )));
305                    } else {
306                        new_args.push(f.args[1].clone());
307                    }
308                    Ok(Expression::Function(Box::new(Function::new(
309                        "TO_DATE".to_string(),
310                        new_args,
311                    ))))
312                } else {
313                    Ok(Expression::Function(Box::new(Function::new(
314                        "TO_DATE".to_string(),
315                        f.args,
316                    ))))
317                }
318            }
319
320            // TO_TIMESTAMP is native
321            "TO_TIMESTAMP" => Ok(Expression::Function(Box::new(f))),
322
323            // CONVERT_TZ for timezone conversion
324            "CONVERT_TIMEZONE" | "AT_TIME_ZONE" => Ok(Expression::Function(Box::new(
325                Function::new("CONVERT_TZ".to_string(), f.args),
326            ))),
327
328            // STRPOS/POSITION → INSTR
329            "STRPOS" | "POSITION" | "CHARINDEX" | "LOCATE" => Ok(Expression::Function(Box::new(
330                Function::new("INSTR".to_string(), f.args),
331            ))),
332
333            // WEEK_OF_YEAR → WEEK
334            "WEEK_OF_YEAR" | "WEEKOFYEAR" => Ok(Expression::Function(Box::new(Function::new(
335                "WEEK".to_string(),
336                f.args,
337            )))),
338
339            // LAST_DAY is not native, would need complex transformation
340            "LAST_DAY" => {
341                // Exasol doesn't have LAST_DAY, but we can compute it
342                // For now, pass through
343                Ok(Expression::Function(Box::new(f)))
344            }
345
346            // Pass through everything else
347            _ => Ok(Expression::Function(Box::new(f))),
348        }
349    }
350
351    fn transform_aggregate_function(
352        &self,
353        f: Box<crate::expressions::AggregateFunction>,
354    ) -> Result<Expression> {
355        let name_upper = f.name.to_uppercase();
356        match name_upper.as_str() {
357            // ALL → EVERY
358            "ALL" | "EVERY" => Ok(Expression::Function(Box::new(Function::new(
359                "EVERY".to_string(),
360                f.args,
361            )))),
362
363            // GROUP_CONCAT / STRING_AGG → LISTAGG (native with WITHIN GROUP)
364            "GROUP_CONCAT" | "STRING_AGG" => Ok(Expression::Function(Box::new(Function::new(
365                "LISTAGG".to_string(),
366                f.args,
367            )))),
368
369            // LISTAGG is native
370            "LISTAGG" => Ok(Expression::AggregateFunction(f)),
371
372            // APPROX_DISTINCT → APPROXIMATE_COUNT_DISTINCT
373            "APPROX_DISTINCT" | "APPROX_COUNT_DISTINCT" => Ok(Expression::Function(Box::new(
374                Function::new("APPROXIMATE_COUNT_DISTINCT".to_string(), f.args),
375            ))),
376
377            // Pass through everything else
378            _ => Ok(Expression::AggregateFunction(f)),
379        }
380    }
381
382    /// Convert strptime format string to Exasol format string
383    /// Exasol TIME_MAPPING (reverse of Python sqlglot):
384    /// %Y -> YYYY, %y -> YY, %m -> MM, %d -> DD, %H -> HH, %M -> MI, %S -> SS, %a -> DY
385    fn convert_strptime_to_exasol_format(format: &str) -> String {
386        let mut result = String::new();
387        let chars: Vec<char> = format.chars().collect();
388        let mut i = 0;
389        while i < chars.len() {
390            if chars[i] == '%' && i + 1 < chars.len() {
391                let spec = chars[i + 1];
392                let exasol_spec = match spec {
393                    'Y' => "YYYY",
394                    'y' => "YY",
395                    'm' => "MM",
396                    'd' => "DD",
397                    'H' => "HH",
398                    'M' => "MI",
399                    'S' => "SS",
400                    'a' => "DY",    // abbreviated weekday name
401                    'A' => "DAY",   // full weekday name
402                    'b' => "MON",   // abbreviated month name
403                    'B' => "MONTH", // full month name
404                    'I' => "H12",   // 12-hour format
405                    'u' => "ID",    // ISO weekday (1-7)
406                    'V' => "IW",    // ISO week number
407                    'G' => "IYYY",  // ISO year
408                    'W' => "UW",    // Week number (Monday as first day)
409                    'U' => "UW",    // Week number (Sunday as first day)
410                    'z' => "Z",     // timezone offset
411                    _ => {
412                        // Unknown specifier, keep as-is
413                        result.push('%');
414                        result.push(spec);
415                        i += 2;
416                        continue;
417                    }
418                };
419                result.push_str(exasol_spec);
420                i += 2;
421            } else {
422                result.push(chars[i]);
423                i += 1;
424            }
425        }
426        result
427    }
428
429    /// Uppercase Exasol format specifiers (DD, MM, YYYY, etc.)
430    /// Converts lowercase format strings like 'dd-mm-yyyy' to 'DD-MM-YYYY'
431    fn uppercase_exasol_format(format: &str) -> String {
432        // Exasol format specifiers are always uppercase
433        format.to_uppercase()
434    }
435}
436
437// Note: Exasol type mappings (handled in generator if needed):
438// - BLOB, LONGBLOB, etc. → VARCHAR
439// - TEXT → LONG VARCHAR
440// - VARBINARY → VARCHAR
441// - TINYINT → SMALLINT
442// - MEDIUMINT → INT
443// - DECIMAL32/64/128/256 → DECIMAL
444// - DATETIME → TIMESTAMP
445// - TIMESTAMPTZ/TIMESTAMPLTZ/TIMESTAMPNTZ → TIMESTAMP
446//
447// Exasol also supports:
448// - TIMESTAMP WITH LOCAL TIME ZONE (fixed precision of 3)
449// - IF...THEN...ELSE...ENDIF syntax for conditionals