Skip to main content

polyglot_sql/dialects/
exasol.rs

1//! Exasol SQL Dialect
2//!
3//! Exasol-specific SQL dialect based on sqlglot patterns.
4//!
5//! References:
6//! - SQL Reference: https://docs.exasol.com/db/latest/sql_references/basiclanguageelements.htm
7//! - Data Types: https://docs.exasol.com/db/latest/sql_references/data_types/datatypesoverview.htm
8//! - Functions: https://docs.exasol.com/db/latest/sql_references/functions/
9//!
10//! Key characteristics:
11//! - Uppercase normalization for identifiers
12//! - Identifiers: double quotes or square brackets
13//! - Date functions: ADD_DAYS, ADD_MONTHS, DAYS_BETWEEN, MONTHS_BETWEEN
14//! - Bitwise: BIT_AND, BIT_OR, BIT_XOR, BIT_NOT, BIT_LSHIFT, BIT_RSHIFT
15//! - Functions: ZEROIFNULL, NULLIFZERO, SYSTIMESTAMP
16//! - EVERY for ALL aggregate
17//! - No SEMI/ANTI join support
18//! - DATE_TRUNC for date truncation
19//! - IF...THEN...ELSE...ENDIF syntax
20
21use super::{DialectImpl, DialectType};
22use crate::error::Result;
23use crate::expressions::{Expression, Function, ListAggFunc, Literal, VarArgFunc};
24use crate::generator::GeneratorConfig;
25use crate::tokens::TokenizerConfig;
26
27/// Exasol dialect
28pub struct ExasolDialect;
29
30impl DialectImpl for ExasolDialect {
31    fn dialect_type(&self) -> DialectType {
32        DialectType::Exasol
33    }
34
35    fn tokenizer_config(&self) -> TokenizerConfig {
36        let mut config = TokenizerConfig::default();
37        // Exasol uses double quotes for identifiers
38        config.identifiers.insert('"', '"');
39        // Also supports square brackets
40        config.identifiers.insert('[', ']');
41        config
42    }
43
44    fn generator_config(&self) -> GeneratorConfig {
45        use crate::generator::IdentifierQuoteStyle;
46        GeneratorConfig {
47            identifier_quote: '"',
48            identifier_quote_style: IdentifierQuoteStyle::DOUBLE_QUOTE,
49            dialect: Some(DialectType::Exasol),
50            supports_column_join_marks: true,
51            // Exasol uses lowercase for window frame keywords (rows, preceding, following)
52            lowercase_window_frame_keywords: true,
53            ..Default::default()
54        }
55    }
56
57    fn transform_expr(&self, expr: Expression) -> Result<Expression> {
58        match expr {
59            // SYSTIMESTAMP -> SYSTIMESTAMP() (with parentheses in Exasol)
60            Expression::Systimestamp(_) => Ok(Expression::Function(Box::new(Function::new(
61                "SYSTIMESTAMP".to_string(),
62                vec![],
63            )))),
64
65            // WeekOfYear -> WEEK
66            Expression::WeekOfYear(f) => Ok(Expression::Function(Box::new(Function::new(
67                "WEEK".to_string(),
68                vec![f.this],
69            )))),
70
71            // COALESCE is native, but also support transformations from other forms
72            Expression::Nvl(f) => Ok(Expression::Coalesce(Box::new(VarArgFunc {
73                original_name: None,
74                expressions: vec![f.this, f.expression],
75                inferred_type: None,
76            }))),
77
78            Expression::IfNull(f) => Ok(Expression::Coalesce(Box::new(VarArgFunc {
79                original_name: None,
80                expressions: vec![f.this, f.expression],
81                inferred_type: None,
82            }))),
83
84            // Bitwise operations → BIT_* functions
85            Expression::BitwiseAnd(op) => Ok(Expression::Function(Box::new(Function::new(
86                "BIT_AND".to_string(),
87                vec![op.left, op.right],
88            )))),
89
90            Expression::BitwiseOr(op) => Ok(Expression::Function(Box::new(Function::new(
91                "BIT_OR".to_string(),
92                vec![op.left, op.right],
93            )))),
94
95            Expression::BitwiseXor(op) => Ok(Expression::Function(Box::new(Function::new(
96                "BIT_XOR".to_string(),
97                vec![op.left, op.right],
98            )))),
99
100            Expression::BitwiseNot(f) => Ok(Expression::Function(Box::new(Function::new(
101                "BIT_NOT".to_string(),
102                vec![f.this],
103            )))),
104
105            Expression::BitwiseLeftShift(op) => Ok(Expression::Function(Box::new(Function::new(
106                "BIT_LSHIFT".to_string(),
107                vec![op.left, op.right],
108            )))),
109
110            Expression::BitwiseRightShift(op) => Ok(Expression::Function(Box::new(Function::new(
111                "BIT_RSHIFT".to_string(),
112                vec![op.left, op.right],
113            )))),
114
115            // Modulo → MOD function
116            Expression::Mod(op) => Ok(Expression::Function(Box::new(Function::new(
117                "MOD".to_string(),
118                vec![op.left, op.right],
119            )))),
120
121            // GROUP_CONCAT -> LISTAGG in Exasol (with WITHIN GROUP for ORDER BY)
122            Expression::GroupConcat(f) => Ok(Expression::ListAgg(Box::new(ListAggFunc {
123                this: f.this,
124                separator: f.separator,
125                on_overflow: None,
126                order_by: f.order_by,
127                distinct: f.distinct,
128                filter: f.filter,
129                inferred_type: None,
130            }))),
131
132            // USER (no parens) -> CURRENT_USER
133            Expression::Column(col)
134                if col.table.is_none() && col.name.name.eq_ignore_ascii_case("USER") =>
135            {
136                Ok(Expression::CurrentUser(Box::new(
137                    crate::expressions::CurrentUser { this: None },
138                )))
139            }
140
141            // Generic function transformations
142            Expression::Function(f) => self.transform_function(*f),
143
144            // Aggregate function transformations
145            Expression::AggregateFunction(f) => self.transform_aggregate_function(f),
146
147            // Pass through everything else
148            _ => Ok(expr),
149        }
150    }
151}
152
153impl ExasolDialect {
154    fn transform_function(&self, f: Function) -> Result<Expression> {
155        let name_upper = f.name.to_uppercase();
156        match name_upper.as_str() {
157            // SYSTIMESTAMP -> SYSTIMESTAMP() (with parentheses in Exasol)
158            // Exasol requires parentheses even for no-arg functions
159            // Preserve any arguments (like precision)
160            "SYSTIMESTAMP" => Ok(Expression::Function(Box::new(Function::new(
161                "SYSTIMESTAMP".to_string(),
162                f.args,
163            )))),
164
165            // ALL → EVERY
166            "ALL" => Ok(Expression::Function(Box::new(Function::new(
167                "EVERY".to_string(),
168                f.args,
169            )))),
170
171            // IFNULL/ISNULL/NVL → COALESCE (native in Exasol)
172            "IFNULL" | "ISNULL" | "NVL" if f.args.len() == 2 => {
173                Ok(Expression::Coalesce(Box::new(VarArgFunc {
174                    original_name: None,
175                    expressions: f.args,
176                    inferred_type: None,
177                })))
178            }
179
180            // DateDiff → DAYS_BETWEEN (for DAY unit) or other *_BETWEEN functions
181            "DATEDIFF" => Ok(Expression::Function(Box::new(Function::new(
182                "DAYS_BETWEEN".to_string(),
183                f.args,
184            )))),
185
186            // DateAdd → ADD_DAYS (for DAY unit) or other ADD_* functions
187            "DATEADD" | "DATE_ADD" => Ok(Expression::Function(Box::new(Function::new(
188                "ADD_DAYS".to_string(),
189                f.args,
190            )))),
191
192            // DateSub → Negate and use ADD_DAYS
193            "DATESUB" | "DATE_SUB" => {
194                // Would need to negate the interval, for now just use ADD_DAYS
195                Ok(Expression::Function(Box::new(Function::new(
196                    "ADD_DAYS".to_string(),
197                    f.args,
198                ))))
199            }
200
201            // DATE_TRUNC is native
202            "DATE_TRUNC" | "TRUNC" => Ok(Expression::Function(Box::new(f))),
203
204            // LEVENSHTEIN → EDIT_DISTANCE
205            "LEVENSHTEIN" | "LEVENSHTEIN_DISTANCE" => Ok(Expression::Function(Box::new(
206                Function::new("EDIT_DISTANCE".to_string(), f.args),
207            ))),
208
209            // REGEXP_EXTRACT → REGEXP_SUBSTR
210            "REGEXP_EXTRACT" => Ok(Expression::Function(Box::new(Function::new(
211                "REGEXP_SUBSTR".to_string(),
212                f.args,
213            )))),
214
215            // SHA/SHA1 → HASH_SHA
216            "SHA" | "SHA1" => Ok(Expression::Function(Box::new(Function::new(
217                "HASH_SHA".to_string(),
218                f.args,
219            )))),
220
221            // MD5 → HASH_MD5
222            "MD5" => Ok(Expression::Function(Box::new(Function::new(
223                "HASH_MD5".to_string(),
224                f.args,
225            )))),
226
227            // SHA256 → HASH_SHA256
228            "SHA256" | "SHA2" => {
229                // SHA2 in some dialects takes a length parameter
230                // HASH_SHA256 in Exasol just takes the value
231                let arg = f
232                    .args
233                    .into_iter()
234                    .next()
235                    .unwrap_or(Expression::Null(crate::expressions::Null));
236                Ok(Expression::Function(Box::new(Function::new(
237                    "HASH_SHA256".to_string(),
238                    vec![arg],
239                ))))
240            }
241
242            // SHA512 → HASH_SHA512
243            "SHA512" => Ok(Expression::Function(Box::new(Function::new(
244                "HASH_SHA512".to_string(),
245                f.args,
246            )))),
247
248            // VAR_POP is native
249            "VAR_POP" | "VARIANCE_POP" => Ok(Expression::Function(Box::new(Function::new(
250                "VAR_POP".to_string(),
251                f.args,
252            )))),
253
254            // APPROX_DISTINCT → APPROXIMATE_COUNT_DISTINCT
255            "APPROX_DISTINCT" | "APPROX_COUNT_DISTINCT" => Ok(Expression::Function(Box::new(
256                Function::new("APPROXIMATE_COUNT_DISTINCT".to_string(), f.args),
257            ))),
258
259            // TO_CHAR is native for date formatting
260            // DATE_FORMAT/STRFTIME: convert format codes and CAST value to TIMESTAMP
261            "TO_CHAR" | "DATE_FORMAT" | "STRFTIME" => {
262                let mut args = f.args;
263                if args.len() >= 2 {
264                    // Convert format codes from C-style (%Y, %m, etc.) to Exasol format
265                    if let Expression::Literal(lit) = &args[1] {
266                        if let Literal::String(fmt) = lit.as_ref() {
267                            let exasol_fmt = Self::convert_c_format_to_exasol(fmt);
268                            args[1] = Expression::Literal(Box::new(Literal::String(exasol_fmt)));
269                        }
270                    }
271                    // CAST string literal values to TIMESTAMP for date formatting functions
272                    if matches!(&args[0], Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)))
273                        && (f.name.eq_ignore_ascii_case("DATE_FORMAT")
274                            || f.name.eq_ignore_ascii_case("STRFTIME"))
275                    {
276                        args[0] = Expression::Cast(Box::new(crate::expressions::Cast {
277                            this: args[0].clone(),
278                            to: crate::expressions::DataType::Timestamp {
279                                timezone: false,
280                                precision: None,
281                            },
282                            trailing_comments: vec![],
283                            double_colon_syntax: false,
284                            format: None,
285                            default: None,
286                            inferred_type: None,
287                        }));
288                    }
289                }
290                Ok(Expression::Function(Box::new(Function::new(
291                    "TO_CHAR".to_string(),
292                    args,
293                ))))
294            }
295
296            // TO_DATE is native but format specifiers need uppercasing
297            "TO_DATE" => {
298                if f.args.len() >= 2 {
299                    // Uppercase format string if present
300                    let mut new_args = f.args.clone();
301                    if let Expression::Literal(lit) = &f.args[1] {
302                        if let Literal::String(fmt) = lit.as_ref() {
303                            new_args[1] = Expression::Literal(Box::new(Literal::String(
304                                Self::uppercase_exasol_format(fmt),
305                            )));
306                        }
307                    }
308                    Ok(Expression::Function(Box::new(Function::new(
309                        "TO_DATE".to_string(),
310                        new_args,
311                    ))))
312                } else {
313                    Ok(Expression::Function(Box::new(f)))
314                }
315            }
316
317            // TIME_TO_STR -> TO_CHAR with format conversion
318            "TIME_TO_STR" => {
319                if f.args.len() >= 2 {
320                    let mut new_args = vec![f.args[0].clone()];
321                    if let Expression::Literal(lit) = &f.args[1] {
322                        if let Literal::String(fmt) = lit.as_ref() {
323                            new_args.push(Expression::Literal(Box::new(Literal::String(
324                                Self::convert_strptime_to_exasol_format(fmt),
325                            ))));
326                        }
327                    } else {
328                        new_args.push(f.args[1].clone());
329                    }
330                    Ok(Expression::Function(Box::new(Function::new(
331                        "TO_CHAR".to_string(),
332                        new_args,
333                    ))))
334                } else {
335                    Ok(Expression::Function(Box::new(Function::new(
336                        "TO_CHAR".to_string(),
337                        f.args,
338                    ))))
339                }
340            }
341
342            // STR_TO_TIME -> TO_DATE with format conversion
343            "STR_TO_TIME" => {
344                if f.args.len() >= 2 {
345                    let mut new_args = vec![f.args[0].clone()];
346                    if let Expression::Literal(lit) = &f.args[1] {
347                        if let Literal::String(fmt) = lit.as_ref() {
348                            new_args.push(Expression::Literal(Box::new(Literal::String(
349                                Self::convert_strptime_to_exasol_format(fmt),
350                            ))));
351                        }
352                    } else {
353                        new_args.push(f.args[1].clone());
354                    }
355                    Ok(Expression::Function(Box::new(Function::new(
356                        "TO_DATE".to_string(),
357                        new_args,
358                    ))))
359                } else {
360                    Ok(Expression::Function(Box::new(Function::new(
361                        "TO_DATE".to_string(),
362                        f.args,
363                    ))))
364                }
365            }
366
367            // TO_TIMESTAMP is native
368            "TO_TIMESTAMP" => Ok(Expression::Function(Box::new(f))),
369
370            // CONVERT_TZ for timezone conversion
371            "CONVERT_TIMEZONE" | "AT_TIME_ZONE" => Ok(Expression::Function(Box::new(
372                Function::new("CONVERT_TZ".to_string(), f.args),
373            ))),
374
375            // STRPOS/POSITION → INSTR
376            "STRPOS" | "POSITION" | "CHARINDEX" | "LOCATE" => Ok(Expression::Function(Box::new(
377                Function::new("INSTR".to_string(), f.args),
378            ))),
379
380            // WEEK_OF_YEAR → WEEK
381            "WEEK_OF_YEAR" | "WEEKOFYEAR" => Ok(Expression::Function(Box::new(Function::new(
382                "WEEK".to_string(),
383                f.args,
384            )))),
385
386            // LAST_DAY is not native, would need complex transformation
387            "LAST_DAY" => {
388                // Exasol doesn't have LAST_DAY, but we can compute it
389                // For now, pass through
390                Ok(Expression::Function(Box::new(f)))
391            }
392
393            // CURDATE -> CURRENT_DATE
394            "CURDATE" => Ok(Expression::CurrentDate(crate::expressions::CurrentDate)),
395
396            // USER / USER() -> CURRENT_USER
397            "USER" if f.args.is_empty() => Ok(Expression::CurrentUser(Box::new(
398                crate::expressions::CurrentUser { this: None },
399            ))),
400
401            // NOW -> CURRENT_TIMESTAMP
402            "NOW" => Ok(Expression::CurrentTimestamp(
403                crate::expressions::CurrentTimestamp {
404                    precision: None,
405                    sysdate: false,
406                },
407            )),
408
409            // Pass through everything else
410            _ => Ok(Expression::Function(Box::new(f))),
411        }
412    }
413
414    fn transform_aggregate_function(
415        &self,
416        f: Box<crate::expressions::AggregateFunction>,
417    ) -> Result<Expression> {
418        let name_upper = f.name.to_uppercase();
419        match name_upper.as_str() {
420            // ALL → EVERY
421            "ALL" | "EVERY" => Ok(Expression::Function(Box::new(Function::new(
422                "EVERY".to_string(),
423                f.args,
424            )))),
425
426            // GROUP_CONCAT / STRING_AGG → LISTAGG (native with WITHIN GROUP)
427            "GROUP_CONCAT" | "STRING_AGG" => Ok(Expression::Function(Box::new(Function::new(
428                "LISTAGG".to_string(),
429                f.args,
430            )))),
431
432            // LISTAGG is native
433            "LISTAGG" => Ok(Expression::AggregateFunction(f)),
434
435            // APPROX_DISTINCT → APPROXIMATE_COUNT_DISTINCT
436            "APPROX_DISTINCT" | "APPROX_COUNT_DISTINCT" => Ok(Expression::Function(Box::new(
437                Function::new("APPROXIMATE_COUNT_DISTINCT".to_string(), f.args),
438            ))),
439
440            // Pass through everything else
441            _ => Ok(Expression::AggregateFunction(f)),
442        }
443    }
444
445    /// Convert strptime format string to Exasol format string
446    /// Exasol TIME_MAPPING (reverse of Python sqlglot):
447    /// %Y -> YYYY, %y -> YY, %m -> MM, %d -> DD, %H -> HH, %M -> MI, %S -> SS, %a -> DY
448    fn convert_strptime_to_exasol_format(format: &str) -> String {
449        let mut result = String::new();
450        let chars: Vec<char> = format.chars().collect();
451        let mut i = 0;
452        while i < chars.len() {
453            if chars[i] == '%' && i + 1 < chars.len() {
454                let spec = chars[i + 1];
455                let exasol_spec = match spec {
456                    'Y' => "YYYY",
457                    'y' => "YY",
458                    'm' => "MM",
459                    'd' => "DD",
460                    'H' => "HH",
461                    'M' => "MI",
462                    'S' => "SS",
463                    'a' => "DY",    // abbreviated weekday name
464                    'A' => "DAY",   // full weekday name
465                    'b' => "MON",   // abbreviated month name
466                    'B' => "MONTH", // full month name
467                    'I' => "H12",   // 12-hour format
468                    'u' => "ID",    // ISO weekday (1-7)
469                    'V' => "IW",    // ISO week number
470                    'G' => "IYYY",  // ISO year
471                    'W' => "UW",    // Week number (Monday as first day)
472                    'U' => "UW",    // Week number (Sunday as first day)
473                    'z' => "Z",     // timezone offset
474                    _ => {
475                        // Unknown specifier, keep as-is
476                        result.push('%');
477                        result.push(spec);
478                        i += 2;
479                        continue;
480                    }
481                };
482                result.push_str(exasol_spec);
483                i += 2;
484            } else {
485                result.push(chars[i]);
486                i += 1;
487            }
488        }
489        result
490    }
491
492    /// Convert C-style / MySQL format codes to Exasol format codes.
493    /// Handles both standard strptime codes and MySQL-specific codes (%i, %T).
494    fn convert_c_format_to_exasol(format: &str) -> String {
495        let mut result = String::new();
496        let chars: Vec<char> = format.chars().collect();
497        let mut i = 0;
498        while i < chars.len() {
499            if chars[i] == '%' && i + 1 < chars.len() {
500                let spec = chars[i + 1];
501                let exasol_spec = match spec {
502                    'Y' => "YYYY",
503                    'y' => "YY",
504                    'm' => "MM",
505                    'd' => "DD",
506                    'H' => "HH",
507                    'M' => "MI", // strptime minutes
508                    'i' => "MI", // MySQL minutes
509                    'S' | 's' => "SS",
510                    'T' => "HH:MI:SS", // MySQL %T = time (HH:MM:SS)
511                    'a' => "DY",       // abbreviated weekday name
512                    'A' => "DAY",      // full weekday name
513                    'b' => "MON",      // abbreviated month name
514                    'B' => "MONTH",    // full month name
515                    'I' => "H12",      // 12-hour format
516                    'u' => "ID",       // ISO weekday (1-7)
517                    'V' => "IW",       // ISO week number
518                    'G' => "IYYY",     // ISO year
519                    'W' => "UW",       // Week number
520                    'U' => "UW",       // Week number
521                    'z' => "Z",        // timezone offset
522                    _ => {
523                        // Unknown specifier, keep as-is
524                        result.push('%');
525                        result.push(spec);
526                        i += 2;
527                        continue;
528                    }
529                };
530                result.push_str(exasol_spec);
531                i += 2;
532            } else {
533                result.push(chars[i]);
534                i += 1;
535            }
536        }
537        result
538    }
539
540    /// Uppercase Exasol format specifiers (DD, MM, YYYY, etc.)
541    /// Converts lowercase format strings like 'dd-mm-yyyy' to 'DD-MM-YYYY'
542    fn uppercase_exasol_format(format: &str) -> String {
543        // Exasol format specifiers are always uppercase
544        format.to_uppercase()
545    }
546}
547
548// Note: Exasol type mappings (handled in generator if needed):
549// - BLOB, LONGBLOB, etc. → VARCHAR
550// - TEXT → LONG VARCHAR
551// - VARBINARY → VARCHAR
552// - TINYINT → SMALLINT
553// - MEDIUMINT → INT
554// - DECIMAL32/64/128/256 → DECIMAL
555// - DATETIME → TIMESTAMP
556// - TIMESTAMPTZ/TIMESTAMPLTZ/TIMESTAMPNTZ → TIMESTAMP
557//
558// Exasol also supports:
559// - TIMESTAMP WITH LOCAL TIME ZONE (fixed precision of 3)
560// - IF...THEN...ELSE...ENDIF syntax for conditionals