Skip to main content

polyglot_sql/dialects/
exasol.rs

1//! Exasol SQL Dialect
2//!
3//! Exasol-specific SQL dialect based on sqlglot patterns.
4//!
5//! References:
6//! - SQL Reference: https://docs.exasol.com/db/latest/sql_references/basiclanguageelements.htm
7//! - Data Types: https://docs.exasol.com/db/latest/sql_references/data_types/datatypesoverview.htm
8//! - Functions: https://docs.exasol.com/db/latest/sql_references/functions/
9//!
10//! Key characteristics:
11//! - Uppercase normalization for identifiers
12//! - Identifiers: double quotes or square brackets
13//! - Date functions: ADD_DAYS, ADD_MONTHS, DAYS_BETWEEN, MONTHS_BETWEEN
14//! - Bitwise: BIT_AND, BIT_OR, BIT_XOR, BIT_NOT, BIT_LSHIFT, BIT_RSHIFT
15//! - Functions: ZEROIFNULL, NULLIFZERO, SYSTIMESTAMP
16//! - EVERY for ALL aggregate
17//! - No SEMI/ANTI join support
18//! - DATE_TRUNC for date truncation
19//! - IF...THEN...ELSE...ENDIF syntax
20
21use super::{DialectImpl, DialectType};
22use crate::error::Result;
23use crate::expressions::{Expression, Function, ListAggFunc, Literal, VarArgFunc};
24#[cfg(feature = "generate")]
25use crate::generator::GeneratorConfig;
26use crate::tokens::TokenizerConfig;
27
28/// Exasol dialect
29pub struct ExasolDialect;
30
31impl DialectImpl for ExasolDialect {
32    fn dialect_type(&self) -> DialectType {
33        DialectType::Exasol
34    }
35
36    fn tokenizer_config(&self) -> TokenizerConfig {
37        let mut config = TokenizerConfig::default();
38        // Exasol uses double quotes for identifiers
39        config.identifiers.insert('"', '"');
40        // Also supports square brackets
41        config.identifiers.insert('[', ']');
42        config
43    }
44
45    #[cfg(feature = "generate")]
46
47    fn generator_config(&self) -> GeneratorConfig {
48        use crate::generator::IdentifierQuoteStyle;
49        GeneratorConfig {
50            identifier_quote: '"',
51            identifier_quote_style: IdentifierQuoteStyle::DOUBLE_QUOTE,
52            dialect: Some(DialectType::Exasol),
53            supports_column_join_marks: true,
54            // Exasol uses lowercase for window frame keywords (rows, preceding, following)
55            lowercase_window_frame_keywords: true,
56            ..Default::default()
57        }
58    }
59
60    #[cfg(feature = "transpile")]
61
62    fn transform_expr(&self, expr: Expression) -> Result<Expression> {
63        match expr {
64            // SYSTIMESTAMP -> SYSTIMESTAMP() (with parentheses in Exasol)
65            Expression::Systimestamp(_) => Ok(Expression::Function(Box::new(Function::new(
66                "SYSTIMESTAMP".to_string(),
67                vec![],
68            )))),
69
70            // WeekOfYear -> WEEK
71            Expression::WeekOfYear(f) => Ok(Expression::Function(Box::new(Function::new(
72                "WEEK".to_string(),
73                vec![f.this],
74            )))),
75
76            // COALESCE is native, but also support transformations from other forms
77            Expression::Nvl(f) => Ok(Expression::Coalesce(Box::new(VarArgFunc {
78                original_name: None,
79                expressions: vec![f.this, f.expression],
80                inferred_type: None,
81            }))),
82
83            Expression::IfNull(f) => Ok(Expression::Coalesce(Box::new(VarArgFunc {
84                original_name: None,
85                expressions: vec![f.this, f.expression],
86                inferred_type: None,
87            }))),
88
89            // Bitwise operations → BIT_* functions
90            Expression::BitwiseAnd(op) => Ok(Expression::Function(Box::new(Function::new(
91                "BIT_AND".to_string(),
92                vec![op.left, op.right],
93            )))),
94
95            Expression::BitwiseOr(op) => Ok(Expression::Function(Box::new(Function::new(
96                "BIT_OR".to_string(),
97                vec![op.left, op.right],
98            )))),
99
100            Expression::BitwiseXor(op) => Ok(Expression::Function(Box::new(Function::new(
101                "BIT_XOR".to_string(),
102                vec![op.left, op.right],
103            )))),
104
105            Expression::BitwiseNot(f) => Ok(Expression::Function(Box::new(Function::new(
106                "BIT_NOT".to_string(),
107                vec![f.this],
108            )))),
109
110            Expression::BitwiseLeftShift(op) => Ok(Expression::Function(Box::new(Function::new(
111                "BIT_LSHIFT".to_string(),
112                vec![op.left, op.right],
113            )))),
114
115            Expression::BitwiseRightShift(op) => Ok(Expression::Function(Box::new(Function::new(
116                "BIT_RSHIFT".to_string(),
117                vec![op.left, op.right],
118            )))),
119
120            // Modulo → MOD function
121            Expression::Mod(op) => Ok(Expression::Function(Box::new(Function::new(
122                "MOD".to_string(),
123                vec![op.left, op.right],
124            )))),
125
126            // GROUP_CONCAT -> LISTAGG in Exasol (with WITHIN GROUP for ORDER BY)
127            Expression::GroupConcat(f) => Ok(Expression::ListAgg(Box::new(ListAggFunc {
128                this: f.this,
129                separator: f.separator,
130                on_overflow: None,
131                order_by: f.order_by,
132                distinct: f.distinct,
133                filter: f.filter,
134                inferred_type: None,
135            }))),
136
137            // USER (no parens) -> CURRENT_USER
138            Expression::Column(col)
139                if col.table.is_none() && col.name.name.eq_ignore_ascii_case("USER") =>
140            {
141                Ok(Expression::CurrentUser(Box::new(
142                    crate::expressions::CurrentUser { this: None },
143                )))
144            }
145
146            // Generic function transformations
147            Expression::Function(f) => self.transform_function(*f),
148
149            // Aggregate function transformations
150            Expression::AggregateFunction(f) => self.transform_aggregate_function(f),
151
152            // Pass through everything else
153            _ => Ok(expr),
154        }
155    }
156}
157
158#[cfg(feature = "transpile")]
159impl ExasolDialect {
160    fn transform_function(&self, f: Function) -> Result<Expression> {
161        let name_upper = f.name.to_uppercase();
162        match name_upper.as_str() {
163            // SYSTIMESTAMP -> SYSTIMESTAMP() (with parentheses in Exasol)
164            // Exasol requires parentheses even for no-arg functions
165            // Preserve any arguments (like precision)
166            "SYSTIMESTAMP" => Ok(Expression::Function(Box::new(Function::new(
167                "SYSTIMESTAMP".to_string(),
168                f.args,
169            )))),
170
171            // ALL → EVERY
172            "ALL" => Ok(Expression::Function(Box::new(Function::new(
173                "EVERY".to_string(),
174                f.args,
175            )))),
176
177            // IFNULL/ISNULL/NVL → COALESCE (native in Exasol)
178            "IFNULL" | "ISNULL" | "NVL" if f.args.len() == 2 => {
179                Ok(Expression::Coalesce(Box::new(VarArgFunc {
180                    original_name: None,
181                    expressions: f.args,
182                    inferred_type: None,
183                })))
184            }
185
186            // DateDiff → DAYS_BETWEEN (for DAY unit) or other *_BETWEEN functions
187            "DATEDIFF" => Ok(Expression::Function(Box::new(Function::new(
188                "DAYS_BETWEEN".to_string(),
189                f.args,
190            )))),
191
192            // DateAdd → ADD_DAYS (for DAY unit) or other ADD_* functions
193            "DATEADD" | "DATE_ADD" => Ok(Expression::Function(Box::new(Function::new(
194                "ADD_DAYS".to_string(),
195                f.args,
196            )))),
197
198            // DateSub → Negate and use ADD_DAYS
199            "DATESUB" | "DATE_SUB" => {
200                // Would need to negate the interval, for now just use ADD_DAYS
201                Ok(Expression::Function(Box::new(Function::new(
202                    "ADD_DAYS".to_string(),
203                    f.args,
204                ))))
205            }
206
207            // DATE_TRUNC is native
208            "DATE_TRUNC" | "TRUNC" => Ok(Expression::Function(Box::new(f))),
209
210            // LEVENSHTEIN → EDIT_DISTANCE
211            "LEVENSHTEIN" | "LEVENSHTEIN_DISTANCE" => Ok(Expression::Function(Box::new(
212                Function::new("EDIT_DISTANCE".to_string(), f.args),
213            ))),
214
215            // REGEXP_EXTRACT → REGEXP_SUBSTR
216            "REGEXP_EXTRACT" => Ok(Expression::Function(Box::new(Function::new(
217                "REGEXP_SUBSTR".to_string(),
218                f.args,
219            )))),
220
221            // SHA/SHA1 → HASH_SHA
222            "SHA" | "SHA1" => Ok(Expression::Function(Box::new(Function::new(
223                "HASH_SHA".to_string(),
224                f.args,
225            )))),
226
227            // MD5 → HASH_MD5
228            "MD5" => Ok(Expression::Function(Box::new(Function::new(
229                "HASH_MD5".to_string(),
230                f.args,
231            )))),
232
233            // SHA256 → HASH_SHA256
234            "SHA256" | "SHA2" => {
235                // SHA2 in some dialects takes a length parameter
236                // HASH_SHA256 in Exasol just takes the value
237                let arg = f
238                    .args
239                    .into_iter()
240                    .next()
241                    .unwrap_or(Expression::Null(crate::expressions::Null));
242                Ok(Expression::Function(Box::new(Function::new(
243                    "HASH_SHA256".to_string(),
244                    vec![arg],
245                ))))
246            }
247
248            // SHA512 → HASH_SHA512
249            "SHA512" => Ok(Expression::Function(Box::new(Function::new(
250                "HASH_SHA512".to_string(),
251                f.args,
252            )))),
253
254            // VAR_POP is native
255            "VAR_POP" | "VARIANCE_POP" => Ok(Expression::Function(Box::new(Function::new(
256                "VAR_POP".to_string(),
257                f.args,
258            )))),
259
260            // APPROX_DISTINCT → APPROXIMATE_COUNT_DISTINCT
261            "APPROX_DISTINCT" | "APPROX_COUNT_DISTINCT" => Ok(Expression::Function(Box::new(
262                Function::new("APPROXIMATE_COUNT_DISTINCT".to_string(), f.args),
263            ))),
264
265            // TO_CHAR is native for date formatting
266            // DATE_FORMAT/STRFTIME: convert format codes and CAST value to TIMESTAMP
267            "TO_CHAR" | "DATE_FORMAT" | "STRFTIME" => {
268                let mut args = f.args;
269                if args.len() >= 2 {
270                    // Convert format codes from C-style (%Y, %m, etc.) to Exasol format
271                    if let Expression::Literal(lit) = &args[1] {
272                        if let Literal::String(fmt) = lit.as_ref() {
273                            let exasol_fmt = Self::convert_c_format_to_exasol(fmt);
274                            args[1] = Expression::Literal(Box::new(Literal::String(exasol_fmt)));
275                        }
276                    }
277                    // CAST string literal values to TIMESTAMP for date formatting functions
278                    if matches!(&args[0], Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)))
279                        && (f.name.eq_ignore_ascii_case("DATE_FORMAT")
280                            || f.name.eq_ignore_ascii_case("STRFTIME"))
281                    {
282                        args[0] = Expression::Cast(Box::new(crate::expressions::Cast {
283                            this: args[0].clone(),
284                            to: crate::expressions::DataType::Timestamp {
285                                timezone: false,
286                                precision: None,
287                            },
288                            trailing_comments: vec![],
289                            double_colon_syntax: false,
290                            format: None,
291                            default: None,
292                            inferred_type: None,
293                        }));
294                    }
295                }
296                Ok(Expression::Function(Box::new(Function::new(
297                    "TO_CHAR".to_string(),
298                    args,
299                ))))
300            }
301
302            // TO_DATE is native but format specifiers need uppercasing
303            "TO_DATE" => {
304                if f.args.len() >= 2 {
305                    // Uppercase format string if present
306                    let mut new_args = f.args.clone();
307                    if let Expression::Literal(lit) = &f.args[1] {
308                        if let Literal::String(fmt) = lit.as_ref() {
309                            new_args[1] = Expression::Literal(Box::new(Literal::String(
310                                Self::uppercase_exasol_format(fmt),
311                            )));
312                        }
313                    }
314                    Ok(Expression::Function(Box::new(Function::new(
315                        "TO_DATE".to_string(),
316                        new_args,
317                    ))))
318                } else {
319                    Ok(Expression::Function(Box::new(f)))
320                }
321            }
322
323            // TIME_TO_STR -> TO_CHAR with format conversion
324            "TIME_TO_STR" => {
325                if f.args.len() >= 2 {
326                    let mut new_args = vec![f.args[0].clone()];
327                    if let Expression::Literal(lit) = &f.args[1] {
328                        if let Literal::String(fmt) = lit.as_ref() {
329                            new_args.push(Expression::Literal(Box::new(Literal::String(
330                                Self::convert_strptime_to_exasol_format(fmt),
331                            ))));
332                        }
333                    } else {
334                        new_args.push(f.args[1].clone());
335                    }
336                    Ok(Expression::Function(Box::new(Function::new(
337                        "TO_CHAR".to_string(),
338                        new_args,
339                    ))))
340                } else {
341                    Ok(Expression::Function(Box::new(Function::new(
342                        "TO_CHAR".to_string(),
343                        f.args,
344                    ))))
345                }
346            }
347
348            // STR_TO_TIME -> TO_DATE with format conversion
349            "STR_TO_TIME" => {
350                if f.args.len() >= 2 {
351                    let mut new_args = vec![f.args[0].clone()];
352                    if let Expression::Literal(lit) = &f.args[1] {
353                        if let Literal::String(fmt) = lit.as_ref() {
354                            new_args.push(Expression::Literal(Box::new(Literal::String(
355                                Self::convert_strptime_to_exasol_format(fmt),
356                            ))));
357                        }
358                    } else {
359                        new_args.push(f.args[1].clone());
360                    }
361                    Ok(Expression::Function(Box::new(Function::new(
362                        "TO_DATE".to_string(),
363                        new_args,
364                    ))))
365                } else {
366                    Ok(Expression::Function(Box::new(Function::new(
367                        "TO_DATE".to_string(),
368                        f.args,
369                    ))))
370                }
371            }
372
373            // TO_TIMESTAMP is native
374            "TO_TIMESTAMP" => Ok(Expression::Function(Box::new(f))),
375
376            // CONVERT_TZ for timezone conversion
377            "CONVERT_TIMEZONE" | "AT_TIME_ZONE" => Ok(Expression::Function(Box::new(
378                Function::new("CONVERT_TZ".to_string(), f.args),
379            ))),
380
381            // STRPOS/POSITION → INSTR
382            "STRPOS" | "POSITION" | "CHARINDEX" | "LOCATE" => Ok(Expression::Function(Box::new(
383                Function::new("INSTR".to_string(), f.args),
384            ))),
385
386            // WEEK_OF_YEAR → WEEK
387            "WEEK_OF_YEAR" | "WEEKOFYEAR" => Ok(Expression::Function(Box::new(Function::new(
388                "WEEK".to_string(),
389                f.args,
390            )))),
391
392            // LAST_DAY is not native, would need complex transformation
393            "LAST_DAY" => {
394                // Exasol doesn't have LAST_DAY, but we can compute it
395                // For now, pass through
396                Ok(Expression::Function(Box::new(f)))
397            }
398
399            // CURDATE -> CURRENT_DATE
400            "CURDATE" => Ok(Expression::CurrentDate(crate::expressions::CurrentDate)),
401
402            // USER / USER() -> CURRENT_USER
403            "USER" if f.args.is_empty() => Ok(Expression::CurrentUser(Box::new(
404                crate::expressions::CurrentUser { this: None },
405            ))),
406
407            // NOW -> CURRENT_TIMESTAMP
408            "NOW" => Ok(Expression::CurrentTimestamp(
409                crate::expressions::CurrentTimestamp {
410                    precision: None,
411                    sysdate: false,
412                },
413            )),
414
415            // Pass through everything else
416            _ => Ok(Expression::Function(Box::new(f))),
417        }
418    }
419
420    fn transform_aggregate_function(
421        &self,
422        f: Box<crate::expressions::AggregateFunction>,
423    ) -> Result<Expression> {
424        let name_upper = f.name.to_uppercase();
425        match name_upper.as_str() {
426            // ALL → EVERY
427            "ALL" | "EVERY" => Ok(Expression::Function(Box::new(Function::new(
428                "EVERY".to_string(),
429                f.args,
430            )))),
431
432            // GROUP_CONCAT / STRING_AGG → LISTAGG (native with WITHIN GROUP)
433            "GROUP_CONCAT" | "STRING_AGG" => Ok(Expression::Function(Box::new(Function::new(
434                "LISTAGG".to_string(),
435                f.args,
436            )))),
437
438            // LISTAGG is native
439            "LISTAGG" => Ok(Expression::AggregateFunction(f)),
440
441            // APPROX_DISTINCT → APPROXIMATE_COUNT_DISTINCT
442            "APPROX_DISTINCT" | "APPROX_COUNT_DISTINCT" => Ok(Expression::Function(Box::new(
443                Function::new("APPROXIMATE_COUNT_DISTINCT".to_string(), f.args),
444            ))),
445
446            // Pass through everything else
447            _ => Ok(Expression::AggregateFunction(f)),
448        }
449    }
450
451    /// Convert strptime format string to Exasol format string
452    /// Exasol TIME_MAPPING (reverse of Python sqlglot):
453    /// %Y -> YYYY, %y -> YY, %m -> MM, %d -> DD, %H -> HH, %M -> MI, %S -> SS, %a -> DY
454    fn convert_strptime_to_exasol_format(format: &str) -> String {
455        let mut result = String::new();
456        let chars: Vec<char> = format.chars().collect();
457        let mut i = 0;
458        while i < chars.len() {
459            if chars[i] == '%' && i + 1 < chars.len() {
460                let spec = chars[i + 1];
461                let exasol_spec = match spec {
462                    'Y' => "YYYY",
463                    'y' => "YY",
464                    'm' => "MM",
465                    'd' => "DD",
466                    'H' => "HH",
467                    'M' => "MI",
468                    'S' => "SS",
469                    'a' => "DY",    // abbreviated weekday name
470                    'A' => "DAY",   // full weekday name
471                    'b' => "MON",   // abbreviated month name
472                    'B' => "MONTH", // full month name
473                    'I' => "H12",   // 12-hour format
474                    'u' => "ID",    // ISO weekday (1-7)
475                    'V' => "IW",    // ISO week number
476                    'G' => "IYYY",  // ISO year
477                    'W' => "UW",    // Week number (Monday as first day)
478                    'U' => "UW",    // Week number (Sunday as first day)
479                    'z' => "Z",     // timezone offset
480                    _ => {
481                        // Unknown specifier, keep as-is
482                        result.push('%');
483                        result.push(spec);
484                        i += 2;
485                        continue;
486                    }
487                };
488                result.push_str(exasol_spec);
489                i += 2;
490            } else {
491                result.push(chars[i]);
492                i += 1;
493            }
494        }
495        result
496    }
497
498    /// Convert C-style / MySQL format codes to Exasol format codes.
499    /// Handles both standard strptime codes and MySQL-specific codes (%i, %T).
500    fn convert_c_format_to_exasol(format: &str) -> String {
501        let mut result = String::new();
502        let chars: Vec<char> = format.chars().collect();
503        let mut i = 0;
504        while i < chars.len() {
505            if chars[i] == '%' && i + 1 < chars.len() {
506                let spec = chars[i + 1];
507                let exasol_spec = match spec {
508                    'Y' => "YYYY",
509                    'y' => "YY",
510                    'm' => "MM",
511                    'd' => "DD",
512                    'H' => "HH",
513                    'M' => "MI", // strptime minutes
514                    'i' => "MI", // MySQL minutes
515                    'S' | 's' => "SS",
516                    'T' => "HH:MI:SS", // MySQL %T = time (HH:MM:SS)
517                    'a' => "DY",       // abbreviated weekday name
518                    'A' => "DAY",      // full weekday name
519                    'b' => "MON",      // abbreviated month name
520                    'B' => "MONTH",    // full month name
521                    'I' => "H12",      // 12-hour format
522                    'u' => "ID",       // ISO weekday (1-7)
523                    'V' => "IW",       // ISO week number
524                    'G' => "IYYY",     // ISO year
525                    'W' => "UW",       // Week number
526                    'U' => "UW",       // Week number
527                    'z' => "Z",        // timezone offset
528                    _ => {
529                        // Unknown specifier, keep as-is
530                        result.push('%');
531                        result.push(spec);
532                        i += 2;
533                        continue;
534                    }
535                };
536                result.push_str(exasol_spec);
537                i += 2;
538            } else {
539                result.push(chars[i]);
540                i += 1;
541            }
542        }
543        result
544    }
545
546    /// Uppercase Exasol format specifiers (DD, MM, YYYY, etc.)
547    /// Converts lowercase format strings like 'dd-mm-yyyy' to 'DD-MM-YYYY'
548    fn uppercase_exasol_format(format: &str) -> String {
549        // Exasol format specifiers are always uppercase
550        format.to_uppercase()
551    }
552}
553
554// Note: Exasol type mappings (handled in generator if needed):
555// - BLOB, LONGBLOB, etc. → VARCHAR
556// - TEXT → LONG VARCHAR
557// - VARBINARY → VARCHAR
558// - TINYINT → SMALLINT
559// - MEDIUMINT → INT
560// - DECIMAL32/64/128/256 → DECIMAL
561// - DATETIME → TIMESTAMP
562// - TIMESTAMPTZ/TIMESTAMPLTZ/TIMESTAMPNTZ → TIMESTAMP
563//
564// Exasol also supports:
565// - TIMESTAMP WITH LOCAL TIME ZONE (fixed precision of 3)
566// - IF...THEN...ELSE...ENDIF syntax for conditionals