Skip to main content

polyglot_sql/dialects/
exasol.rs

1//! Exasol SQL Dialect
2//!
3//! Exasol-specific SQL dialect based on sqlglot patterns.
4//!
5//! References:
6//! - SQL Reference: https://docs.exasol.com/db/latest/sql_references/basiclanguageelements.htm
7//! - Data Types: https://docs.exasol.com/db/latest/sql_references/data_types/datatypesoverview.htm
8//! - Functions: https://docs.exasol.com/db/latest/sql_references/functions/
9//!
10//! Key characteristics:
11//! - Uppercase normalization for identifiers
12//! - Identifiers: double quotes or square brackets
13//! - Date functions: ADD_DAYS, ADD_MONTHS, DAYS_BETWEEN, MONTHS_BETWEEN
14//! - Bitwise: BIT_AND, BIT_OR, BIT_XOR, BIT_NOT, BIT_LSHIFT, BIT_RSHIFT
15//! - Functions: ZEROIFNULL, NULLIFZERO, SYSTIMESTAMP
16//! - EVERY for ALL aggregate
17//! - No SEMI/ANTI join support
18//! - DATE_TRUNC for date truncation
19//! - IF...THEN...ELSE...ENDIF syntax
20
21use super::{DialectImpl, DialectType};
22use crate::error::Result;
23use crate::expressions::{Expression, Function, ListAggFunc, Literal, VarArgFunc};
24use crate::generator::GeneratorConfig;
25use crate::tokens::TokenizerConfig;
26
27/// Exasol dialect
28pub struct ExasolDialect;
29
30impl DialectImpl for ExasolDialect {
31    fn dialect_type(&self) -> DialectType {
32        DialectType::Exasol
33    }
34
35    fn tokenizer_config(&self) -> TokenizerConfig {
36        let mut config = TokenizerConfig::default();
37        // Exasol uses double quotes for identifiers
38        config.identifiers.insert('"', '"');
39        // Also supports square brackets
40        config.identifiers.insert('[', ']');
41        config
42    }
43
44    fn generator_config(&self) -> GeneratorConfig {
45        use crate::generator::IdentifierQuoteStyle;
46        GeneratorConfig {
47            identifier_quote: '"',
48            identifier_quote_style: IdentifierQuoteStyle::DOUBLE_QUOTE,
49            dialect: Some(DialectType::Exasol),
50            supports_column_join_marks: true,
51            // Exasol uses lowercase for window frame keywords (rows, preceding, following)
52            lowercase_window_frame_keywords: true,
53            ..Default::default()
54        }
55    }
56
57    fn transform_expr(&self, expr: Expression) -> Result<Expression> {
58        match expr {
59            // SYSTIMESTAMP -> SYSTIMESTAMP() (with parentheses in Exasol)
60            Expression::Systimestamp(_) => Ok(Expression::Function(Box::new(Function::new(
61                "SYSTIMESTAMP".to_string(),
62                vec![],
63            )))),
64
65            // WeekOfYear -> WEEK
66            Expression::WeekOfYear(f) => Ok(Expression::Function(Box::new(Function::new(
67                "WEEK".to_string(),
68                vec![f.this],
69            )))),
70
71            // COALESCE is native, but also support transformations from other forms
72            Expression::Nvl(f) => Ok(Expression::Coalesce(Box::new(VarArgFunc { original_name: None,
73                expressions: vec![f.this, f.expression],
74            }))),
75
76            Expression::IfNull(f) => Ok(Expression::Coalesce(Box::new(VarArgFunc { original_name: None,
77                expressions: vec![f.this, f.expression],
78            }))),
79
80            // Bitwise operations → BIT_* functions
81            Expression::BitwiseAnd(op) => Ok(Expression::Function(Box::new(Function::new(
82                "BIT_AND".to_string(),
83                vec![op.left, op.right],
84            )))),
85
86            Expression::BitwiseOr(op) => Ok(Expression::Function(Box::new(Function::new(
87                "BIT_OR".to_string(),
88                vec![op.left, op.right],
89            )))),
90
91            Expression::BitwiseXor(op) => Ok(Expression::Function(Box::new(Function::new(
92                "BIT_XOR".to_string(),
93                vec![op.left, op.right],
94            )))),
95
96            Expression::BitwiseNot(f) => Ok(Expression::Function(Box::new(Function::new(
97                "BIT_NOT".to_string(),
98                vec![f.this],
99            )))),
100
101            Expression::BitwiseLeftShift(op) => Ok(Expression::Function(Box::new(Function::new(
102                "BIT_LSHIFT".to_string(),
103                vec![op.left, op.right],
104            )))),
105
106            Expression::BitwiseRightShift(op) => Ok(Expression::Function(Box::new(Function::new(
107                "BIT_RSHIFT".to_string(),
108                vec![op.left, op.right],
109            )))),
110
111            // Modulo → MOD function
112            Expression::Mod(op) => Ok(Expression::Function(Box::new(Function::new(
113                "MOD".to_string(),
114                vec![op.left, op.right],
115            )))),
116
117            // GROUP_CONCAT -> LISTAGG in Exasol (with WITHIN GROUP for ORDER BY)
118            Expression::GroupConcat(f) => Ok(Expression::ListAgg(Box::new(ListAggFunc {
119                this: f.this,
120                separator: f.separator,
121                on_overflow: None,
122                order_by: f.order_by,
123                distinct: f.distinct,
124                filter: f.filter,
125            }))),
126
127            // Generic function transformations
128            Expression::Function(f) => self.transform_function(*f),
129
130            // Aggregate function transformations
131            Expression::AggregateFunction(f) => self.transform_aggregate_function(f),
132
133            // Pass through everything else
134            _ => Ok(expr),
135        }
136    }
137}
138
139impl ExasolDialect {
140    fn transform_function(&self, f: Function) -> Result<Expression> {
141        let name_upper = f.name.to_uppercase();
142        match name_upper.as_str() {
143            // SYSTIMESTAMP -> SYSTIMESTAMP() (with parentheses in Exasol)
144            // Exasol requires parentheses even for no-arg functions
145            // Preserve any arguments (like precision)
146            "SYSTIMESTAMP" => Ok(Expression::Function(Box::new(Function::new(
147                "SYSTIMESTAMP".to_string(),
148                f.args,
149            )))),
150
151            // ALL → EVERY
152            "ALL" => Ok(Expression::Function(Box::new(Function::new(
153                "EVERY".to_string(),
154                f.args,
155            )))),
156
157            // IFNULL/ISNULL/NVL → COALESCE (native in Exasol)
158            "IFNULL" | "ISNULL" | "NVL" if f.args.len() == 2 => {
159                Ok(Expression::Coalesce(Box::new(VarArgFunc { original_name: None,
160                    expressions: f.args,
161                })))
162            }
163
164            // DateDiff → DAYS_BETWEEN (for DAY unit) or other *_BETWEEN functions
165            "DATEDIFF" => Ok(Expression::Function(Box::new(Function::new(
166                "DAYS_BETWEEN".to_string(),
167                f.args,
168            )))),
169
170            // DateAdd → ADD_DAYS (for DAY unit) or other ADD_* functions
171            "DATEADD" | "DATE_ADD" => Ok(Expression::Function(Box::new(Function::new(
172                "ADD_DAYS".to_string(),
173                f.args,
174            )))),
175
176            // DateSub → Negate and use ADD_DAYS
177            "DATESUB" | "DATE_SUB" => {
178                // Would need to negate the interval, for now just use ADD_DAYS
179                Ok(Expression::Function(Box::new(Function::new(
180                    "ADD_DAYS".to_string(),
181                    f.args,
182                ))))
183            }
184
185            // DATE_TRUNC is native
186            "DATE_TRUNC" | "TRUNC" => Ok(Expression::Function(Box::new(f))),
187
188            // LEVENSHTEIN → EDIT_DISTANCE
189            "LEVENSHTEIN" | "LEVENSHTEIN_DISTANCE" => {
190                Ok(Expression::Function(Box::new(Function::new(
191                    "EDIT_DISTANCE".to_string(),
192                    f.args,
193                ))))
194            }
195
196            // REGEXP_EXTRACT → REGEXP_SUBSTR
197            "REGEXP_EXTRACT" => Ok(Expression::Function(Box::new(Function::new(
198                "REGEXP_SUBSTR".to_string(),
199                f.args,
200            )))),
201
202            // SHA/SHA1 → HASH_SHA
203            "SHA" | "SHA1" => Ok(Expression::Function(Box::new(Function::new(
204                "HASH_SHA".to_string(),
205                f.args,
206            )))),
207
208            // MD5 → HASH_MD5
209            "MD5" => Ok(Expression::Function(Box::new(Function::new(
210                "HASH_MD5".to_string(),
211                f.args,
212            )))),
213
214            // SHA256 → HASH_SHA256
215            "SHA256" | "SHA2" => {
216                // SHA2 in some dialects takes a length parameter
217                // HASH_SHA256 in Exasol just takes the value
218                let arg = f.args.into_iter().next().unwrap_or(Expression::Null(crate::expressions::Null));
219                Ok(Expression::Function(Box::new(Function::new(
220                    "HASH_SHA256".to_string(),
221                    vec![arg],
222                ))))
223            }
224
225            // SHA512 → HASH_SHA512
226            "SHA512" => Ok(Expression::Function(Box::new(Function::new(
227                "HASH_SHA512".to_string(),
228                f.args,
229            )))),
230
231            // VAR_POP is native
232            "VAR_POP" | "VARIANCE_POP" => Ok(Expression::Function(Box::new(Function::new(
233                "VAR_POP".to_string(),
234                f.args,
235            )))),
236
237            // APPROX_DISTINCT → APPROXIMATE_COUNT_DISTINCT
238            "APPROX_DISTINCT" | "APPROX_COUNT_DISTINCT" => {
239                Ok(Expression::Function(Box::new(Function::new(
240                    "APPROXIMATE_COUNT_DISTINCT".to_string(),
241                    f.args,
242                ))))
243            }
244
245            // TO_CHAR is native for date formatting
246            "TO_CHAR" | "DATE_FORMAT" | "STRFTIME" => {
247                Ok(Expression::Function(Box::new(Function::new(
248                    "TO_CHAR".to_string(),
249                    f.args,
250                ))))
251            }
252
253            // TO_DATE is native but format specifiers need uppercasing
254            "TO_DATE" => {
255                if f.args.len() >= 2 {
256                    // Uppercase format string if present
257                    let mut new_args = f.args.clone();
258                    if let Expression::Literal(Literal::String(fmt)) = &f.args[1] {
259                        new_args[1] = Expression::Literal(Literal::String(
260                            Self::uppercase_exasol_format(fmt),
261                        ));
262                    }
263                    Ok(Expression::Function(Box::new(Function::new(
264                        "TO_DATE".to_string(),
265                        new_args,
266                    ))))
267                } else {
268                    Ok(Expression::Function(Box::new(f)))
269                }
270            }
271
272            // TIME_TO_STR -> TO_CHAR with format conversion
273            "TIME_TO_STR" => {
274                if f.args.len() >= 2 {
275                    let mut new_args = vec![f.args[0].clone()];
276                    if let Expression::Literal(Literal::String(fmt)) = &f.args[1] {
277                        new_args.push(Expression::Literal(Literal::String(
278                            Self::convert_strptime_to_exasol_format(fmt),
279                        )));
280                    } else {
281                        new_args.push(f.args[1].clone());
282                    }
283                    Ok(Expression::Function(Box::new(Function::new(
284                        "TO_CHAR".to_string(),
285                        new_args,
286                    ))))
287                } else {
288                    Ok(Expression::Function(Box::new(Function::new(
289                        "TO_CHAR".to_string(),
290                        f.args,
291                    ))))
292                }
293            }
294
295            // STR_TO_TIME -> TO_DATE with format conversion
296            "STR_TO_TIME" => {
297                if f.args.len() >= 2 {
298                    let mut new_args = vec![f.args[0].clone()];
299                    if let Expression::Literal(Literal::String(fmt)) = &f.args[1] {
300                        new_args.push(Expression::Literal(Literal::String(
301                            Self::convert_strptime_to_exasol_format(fmt),
302                        )));
303                    } else {
304                        new_args.push(f.args[1].clone());
305                    }
306                    Ok(Expression::Function(Box::new(Function::new(
307                        "TO_DATE".to_string(),
308                        new_args,
309                    ))))
310                } else {
311                    Ok(Expression::Function(Box::new(Function::new(
312                        "TO_DATE".to_string(),
313                        f.args,
314                    ))))
315                }
316            }
317
318            // TO_TIMESTAMP is native
319            "TO_TIMESTAMP" => Ok(Expression::Function(Box::new(f))),
320
321            // CONVERT_TZ for timezone conversion
322            "CONVERT_TIMEZONE" | "AT_TIME_ZONE" => {
323                Ok(Expression::Function(Box::new(Function::new(
324                    "CONVERT_TZ".to_string(),
325                    f.args,
326                ))))
327            }
328
329            // STRPOS/POSITION → INSTR
330            "STRPOS" | "POSITION" | "CHARINDEX" | "LOCATE" => {
331                Ok(Expression::Function(Box::new(Function::new(
332                    "INSTR".to_string(),
333                    f.args,
334                ))))
335            }
336
337            // WEEK_OF_YEAR → WEEK
338            "WEEK_OF_YEAR" | "WEEKOFYEAR" => Ok(Expression::Function(Box::new(Function::new(
339                "WEEK".to_string(),
340                f.args,
341            )))),
342
343            // LAST_DAY is not native, would need complex transformation
344            "LAST_DAY" => {
345                // Exasol doesn't have LAST_DAY, but we can compute it
346                // For now, pass through
347                Ok(Expression::Function(Box::new(f)))
348            }
349
350            // Pass through everything else
351            _ => Ok(Expression::Function(Box::new(f))),
352        }
353    }
354
355    fn transform_aggregate_function(
356        &self,
357        f: Box<crate::expressions::AggregateFunction>,
358    ) -> Result<Expression> {
359        let name_upper = f.name.to_uppercase();
360        match name_upper.as_str() {
361            // ALL → EVERY
362            "ALL" | "EVERY" => Ok(Expression::Function(Box::new(Function::new(
363                "EVERY".to_string(),
364                f.args,
365            )))),
366
367            // GROUP_CONCAT / STRING_AGG → LISTAGG (native with WITHIN GROUP)
368            "GROUP_CONCAT" | "STRING_AGG" => Ok(Expression::Function(Box::new(Function::new(
369                "LISTAGG".to_string(),
370                f.args,
371            )))),
372
373            // LISTAGG is native
374            "LISTAGG" => Ok(Expression::AggregateFunction(f)),
375
376            // APPROX_DISTINCT → APPROXIMATE_COUNT_DISTINCT
377            "APPROX_DISTINCT" | "APPROX_COUNT_DISTINCT" => {
378                Ok(Expression::Function(Box::new(Function::new(
379                    "APPROXIMATE_COUNT_DISTINCT".to_string(),
380                    f.args,
381                ))))
382            }
383
384            // Pass through everything else
385            _ => Ok(Expression::AggregateFunction(f)),
386        }
387    }
388
389    /// Convert strptime format string to Exasol format string
390    /// Exasol TIME_MAPPING (reverse of Python sqlglot):
391    /// %Y -> YYYY, %y -> YY, %m -> MM, %d -> DD, %H -> HH, %M -> MI, %S -> SS, %a -> DY
392    fn convert_strptime_to_exasol_format(format: &str) -> String {
393        let mut result = String::new();
394        let chars: Vec<char> = format.chars().collect();
395        let mut i = 0;
396        while i < chars.len() {
397            if chars[i] == '%' && i + 1 < chars.len() {
398                let spec = chars[i + 1];
399                let exasol_spec = match spec {
400                    'Y' => "YYYY",
401                    'y' => "YY",
402                    'm' => "MM",
403                    'd' => "DD",
404                    'H' => "HH",
405                    'M' => "MI",
406                    'S' => "SS",
407                    'a' => "DY",    // abbreviated weekday name
408                    'A' => "DAY",   // full weekday name
409                    'b' => "MON",   // abbreviated month name
410                    'B' => "MONTH", // full month name
411                    'I' => "H12",   // 12-hour format
412                    'u' => "ID",    // ISO weekday (1-7)
413                    'V' => "IW",    // ISO week number
414                    'G' => "IYYY",  // ISO year
415                    'W' => "UW",    // Week number (Monday as first day)
416                    'U' => "UW",    // Week number (Sunday as first day)
417                    'z' => "Z",     // timezone offset
418                    _ => {
419                        // Unknown specifier, keep as-is
420                        result.push('%');
421                        result.push(spec);
422                        i += 2;
423                        continue;
424                    }
425                };
426                result.push_str(exasol_spec);
427                i += 2;
428            } else {
429                result.push(chars[i]);
430                i += 1;
431            }
432        }
433        result
434    }
435
436    /// Uppercase Exasol format specifiers (DD, MM, YYYY, etc.)
437    /// Converts lowercase format strings like 'dd-mm-yyyy' to 'DD-MM-YYYY'
438    fn uppercase_exasol_format(format: &str) -> String {
439        // Exasol format specifiers are always uppercase
440        format.to_uppercase()
441    }
442}
443
444// Note: Exasol type mappings (handled in generator if needed):
445// - BLOB, LONGBLOB, etc. → VARCHAR
446// - TEXT → LONG VARCHAR
447// - VARBINARY → VARCHAR
448// - TINYINT → SMALLINT
449// - MEDIUMINT → INT
450// - DECIMAL32/64/128/256 → DECIMAL
451// - DATETIME → TIMESTAMP
452// - TIMESTAMPTZ/TIMESTAMPLTZ/TIMESTAMPNTZ → TIMESTAMP
453//
454// Exasol also supports:
455// - TIMESTAMP WITH LOCAL TIME ZONE (fixed precision of 3)
456// - IF...THEN...ELSE...ENDIF syntax for conditionals