polyglot_sql/dialects/exasol.rs
1//! Exasol SQL Dialect
2//!
3//! Exasol-specific SQL dialect based on sqlglot patterns.
4//!
5//! References:
6//! - SQL Reference: https://docs.exasol.com/db/latest/sql_references/basiclanguageelements.htm
7//! - Data Types: https://docs.exasol.com/db/latest/sql_references/data_types/datatypesoverview.htm
8//! - Functions: https://docs.exasol.com/db/latest/sql_references/functions/
9//!
10//! Key characteristics:
11//! - Uppercase normalization for identifiers
12//! - Identifiers: double quotes or square brackets
13//! - Date functions: ADD_DAYS, ADD_MONTHS, DAYS_BETWEEN, MONTHS_BETWEEN
14//! - Bitwise: BIT_AND, BIT_OR, BIT_XOR, BIT_NOT, BIT_LSHIFT, BIT_RSHIFT
15//! - Functions: ZEROIFNULL, NULLIFZERO, SYSTIMESTAMP
16//! - EVERY for ALL aggregate
17//! - No SEMI/ANTI join support
18//! - DATE_TRUNC for date truncation
19//! - IF...THEN...ELSE...ENDIF syntax
20
21use super::{DialectImpl, DialectType};
22use crate::error::Result;
23use crate::expressions::{Expression, Function, ListAggFunc, Literal, VarArgFunc};
24use crate::generator::GeneratorConfig;
25use crate::tokens::TokenizerConfig;
26
27/// Exasol dialect
28pub struct ExasolDialect;
29
30impl DialectImpl for ExasolDialect {
31 fn dialect_type(&self) -> DialectType {
32 DialectType::Exasol
33 }
34
35 fn tokenizer_config(&self) -> TokenizerConfig {
36 let mut config = TokenizerConfig::default();
37 // Exasol uses double quotes for identifiers
38 config.identifiers.insert('"', '"');
39 // Also supports square brackets
40 config.identifiers.insert('[', ']');
41 config
42 }
43
44 fn generator_config(&self) -> GeneratorConfig {
45 use crate::generator::IdentifierQuoteStyle;
46 GeneratorConfig {
47 identifier_quote: '"',
48 identifier_quote_style: IdentifierQuoteStyle::DOUBLE_QUOTE,
49 dialect: Some(DialectType::Exasol),
50 supports_column_join_marks: true,
51 // Exasol uses lowercase for window frame keywords (rows, preceding, following)
52 lowercase_window_frame_keywords: true,
53 ..Default::default()
54 }
55 }
56
57 fn transform_expr(&self, expr: Expression) -> Result<Expression> {
58 match expr {
59 // SYSTIMESTAMP -> SYSTIMESTAMP() (with parentheses in Exasol)
60 Expression::Systimestamp(_) => Ok(Expression::Function(Box::new(Function::new(
61 "SYSTIMESTAMP".to_string(),
62 vec![],
63 )))),
64
65 // WeekOfYear -> WEEK
66 Expression::WeekOfYear(f) => Ok(Expression::Function(Box::new(Function::new(
67 "WEEK".to_string(),
68 vec![f.this],
69 )))),
70
71 // COALESCE is native, but also support transformations from other forms
72 Expression::Nvl(f) => Ok(Expression::Coalesce(Box::new(VarArgFunc {
73 original_name: None,
74 expressions: vec![f.this, f.expression],
75 inferred_type: None,
76 }))),
77
78 Expression::IfNull(f) => Ok(Expression::Coalesce(Box::new(VarArgFunc {
79 original_name: None,
80 expressions: vec![f.this, f.expression],
81 inferred_type: None,
82 }))),
83
84 // Bitwise operations → BIT_* functions
85 Expression::BitwiseAnd(op) => Ok(Expression::Function(Box::new(Function::new(
86 "BIT_AND".to_string(),
87 vec![op.left, op.right],
88 )))),
89
90 Expression::BitwiseOr(op) => Ok(Expression::Function(Box::new(Function::new(
91 "BIT_OR".to_string(),
92 vec![op.left, op.right],
93 )))),
94
95 Expression::BitwiseXor(op) => Ok(Expression::Function(Box::new(Function::new(
96 "BIT_XOR".to_string(),
97 vec![op.left, op.right],
98 )))),
99
100 Expression::BitwiseNot(f) => Ok(Expression::Function(Box::new(Function::new(
101 "BIT_NOT".to_string(),
102 vec![f.this],
103 )))),
104
105 Expression::BitwiseLeftShift(op) => Ok(Expression::Function(Box::new(Function::new(
106 "BIT_LSHIFT".to_string(),
107 vec![op.left, op.right],
108 )))),
109
110 Expression::BitwiseRightShift(op) => Ok(Expression::Function(Box::new(Function::new(
111 "BIT_RSHIFT".to_string(),
112 vec![op.left, op.right],
113 )))),
114
115 // Modulo → MOD function
116 Expression::Mod(op) => Ok(Expression::Function(Box::new(Function::new(
117 "MOD".to_string(),
118 vec![op.left, op.right],
119 )))),
120
121 // GROUP_CONCAT -> LISTAGG in Exasol (with WITHIN GROUP for ORDER BY)
122 Expression::GroupConcat(f) => Ok(Expression::ListAgg(Box::new(ListAggFunc {
123 this: f.this,
124 separator: f.separator,
125 on_overflow: None,
126 order_by: f.order_by,
127 distinct: f.distinct,
128 filter: f.filter,
129 inferred_type: None,
130 }))),
131
132 // USER (no parens) -> CURRENT_USER
133 Expression::Column(col)
134 if col.table.is_none() && col.name.name.eq_ignore_ascii_case("USER") =>
135 {
136 Ok(Expression::CurrentUser(Box::new(crate::expressions::CurrentUser { this: None })))
137 }
138
139 // Generic function transformations
140 Expression::Function(f) => self.transform_function(*f),
141
142 // Aggregate function transformations
143 Expression::AggregateFunction(f) => self.transform_aggregate_function(f),
144
145 // Pass through everything else
146 _ => Ok(expr),
147 }
148 }
149}
150
151impl ExasolDialect {
152 fn transform_function(&self, f: Function) -> Result<Expression> {
153 let name_upper = f.name.to_uppercase();
154 match name_upper.as_str() {
155 // SYSTIMESTAMP -> SYSTIMESTAMP() (with parentheses in Exasol)
156 // Exasol requires parentheses even for no-arg functions
157 // Preserve any arguments (like precision)
158 "SYSTIMESTAMP" => Ok(Expression::Function(Box::new(Function::new(
159 "SYSTIMESTAMP".to_string(),
160 f.args,
161 )))),
162
163 // ALL → EVERY
164 "ALL" => Ok(Expression::Function(Box::new(Function::new(
165 "EVERY".to_string(),
166 f.args,
167 )))),
168
169 // IFNULL/ISNULL/NVL → COALESCE (native in Exasol)
170 "IFNULL" | "ISNULL" | "NVL" if f.args.len() == 2 => {
171 Ok(Expression::Coalesce(Box::new(VarArgFunc {
172 original_name: None,
173 expressions: f.args,
174 inferred_type: None,
175 })))
176 }
177
178 // DateDiff → DAYS_BETWEEN (for DAY unit) or other *_BETWEEN functions
179 "DATEDIFF" => Ok(Expression::Function(Box::new(Function::new(
180 "DAYS_BETWEEN".to_string(),
181 f.args,
182 )))),
183
184 // DateAdd → ADD_DAYS (for DAY unit) or other ADD_* functions
185 "DATEADD" | "DATE_ADD" => Ok(Expression::Function(Box::new(Function::new(
186 "ADD_DAYS".to_string(),
187 f.args,
188 )))),
189
190 // DateSub → Negate and use ADD_DAYS
191 "DATESUB" | "DATE_SUB" => {
192 // Would need to negate the interval, for now just use ADD_DAYS
193 Ok(Expression::Function(Box::new(Function::new(
194 "ADD_DAYS".to_string(),
195 f.args,
196 ))))
197 }
198
199 // DATE_TRUNC is native
200 "DATE_TRUNC" | "TRUNC" => Ok(Expression::Function(Box::new(f))),
201
202 // LEVENSHTEIN → EDIT_DISTANCE
203 "LEVENSHTEIN" | "LEVENSHTEIN_DISTANCE" => Ok(Expression::Function(Box::new(
204 Function::new("EDIT_DISTANCE".to_string(), f.args),
205 ))),
206
207 // REGEXP_EXTRACT → REGEXP_SUBSTR
208 "REGEXP_EXTRACT" => Ok(Expression::Function(Box::new(Function::new(
209 "REGEXP_SUBSTR".to_string(),
210 f.args,
211 )))),
212
213 // SHA/SHA1 → HASH_SHA
214 "SHA" | "SHA1" => Ok(Expression::Function(Box::new(Function::new(
215 "HASH_SHA".to_string(),
216 f.args,
217 )))),
218
219 // MD5 → HASH_MD5
220 "MD5" => Ok(Expression::Function(Box::new(Function::new(
221 "HASH_MD5".to_string(),
222 f.args,
223 )))),
224
225 // SHA256 → HASH_SHA256
226 "SHA256" | "SHA2" => {
227 // SHA2 in some dialects takes a length parameter
228 // HASH_SHA256 in Exasol just takes the value
229 let arg = f
230 .args
231 .into_iter()
232 .next()
233 .unwrap_or(Expression::Null(crate::expressions::Null));
234 Ok(Expression::Function(Box::new(Function::new(
235 "HASH_SHA256".to_string(),
236 vec![arg],
237 ))))
238 }
239
240 // SHA512 → HASH_SHA512
241 "SHA512" => Ok(Expression::Function(Box::new(Function::new(
242 "HASH_SHA512".to_string(),
243 f.args,
244 )))),
245
246 // VAR_POP is native
247 "VAR_POP" | "VARIANCE_POP" => Ok(Expression::Function(Box::new(Function::new(
248 "VAR_POP".to_string(),
249 f.args,
250 )))),
251
252 // APPROX_DISTINCT → APPROXIMATE_COUNT_DISTINCT
253 "APPROX_DISTINCT" | "APPROX_COUNT_DISTINCT" => Ok(Expression::Function(Box::new(
254 Function::new("APPROXIMATE_COUNT_DISTINCT".to_string(), f.args),
255 ))),
256
257 // TO_CHAR is native for date formatting
258 // DATE_FORMAT/STRFTIME: convert format codes and CAST value to TIMESTAMP
259 "TO_CHAR" | "DATE_FORMAT" | "STRFTIME" => {
260 let mut args = f.args;
261 if args.len() >= 2 {
262 // Convert format codes from C-style (%Y, %m, etc.) to Exasol format
263 if let Expression::Literal(lit) = &args[1] {
264 if let Literal::String(fmt) = lit.as_ref() {
265 let exasol_fmt = Self::convert_c_format_to_exasol(fmt);
266 args[1] = Expression::Literal(Box::new(Literal::String(exasol_fmt)));
267 }
268 }
269 // CAST string literal values to TIMESTAMP for date formatting functions
270 if matches!(&args[0], Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)))
271 && (f.name.eq_ignore_ascii_case("DATE_FORMAT")
272 || f.name.eq_ignore_ascii_case("STRFTIME"))
273 {
274 args[0] = Expression::Cast(Box::new(crate::expressions::Cast {
275 this: args[0].clone(),
276 to: crate::expressions::DataType::Timestamp {
277 timezone: false,
278 precision: None,
279 },
280 trailing_comments: vec![],
281 double_colon_syntax: false,
282 format: None,
283 default: None,
284 inferred_type: None,
285 }));
286 }
287 }
288 Ok(Expression::Function(Box::new(
289 Function::new("TO_CHAR".to_string(), args),
290 )))
291 }
292
293 // TO_DATE is native but format specifiers need uppercasing
294 "TO_DATE" => {
295 if f.args.len() >= 2 {
296 // Uppercase format string if present
297 let mut new_args = f.args.clone();
298 if let Expression::Literal(lit) = &f.args[1] {
299 if let Literal::String(fmt) = lit.as_ref() {
300 new_args[1] = Expression::Literal(Box::new(Literal::String(
301 Self::uppercase_exasol_format(fmt),
302 )));
303 }
304 }
305 Ok(Expression::Function(Box::new(Function::new(
306 "TO_DATE".to_string(),
307 new_args,
308 ))))
309 } else {
310 Ok(Expression::Function(Box::new(f)))
311 }
312 }
313
314 // TIME_TO_STR -> TO_CHAR with format conversion
315 "TIME_TO_STR" => {
316 if f.args.len() >= 2 {
317 let mut new_args = vec![f.args[0].clone()];
318 if let Expression::Literal(lit) = &f.args[1] {
319 if let Literal::String(fmt) = lit.as_ref() {
320 new_args.push(Expression::Literal(Box::new(Literal::String(
321 Self::convert_strptime_to_exasol_format(fmt),
322 ))));
323 }
324 } else {
325 new_args.push(f.args[1].clone());
326 }
327 Ok(Expression::Function(Box::new(Function::new(
328 "TO_CHAR".to_string(),
329 new_args,
330 ))))
331 } else {
332 Ok(Expression::Function(Box::new(Function::new(
333 "TO_CHAR".to_string(),
334 f.args,
335 ))))
336 }
337 }
338
339 // STR_TO_TIME -> TO_DATE with format conversion
340 "STR_TO_TIME" => {
341 if f.args.len() >= 2 {
342 let mut new_args = vec![f.args[0].clone()];
343 if let Expression::Literal(lit) = &f.args[1] {
344 if let Literal::String(fmt) = lit.as_ref() {
345 new_args.push(Expression::Literal(Box::new(Literal::String(
346 Self::convert_strptime_to_exasol_format(fmt),
347 ))));
348 }
349 } else {
350 new_args.push(f.args[1].clone());
351 }
352 Ok(Expression::Function(Box::new(Function::new(
353 "TO_DATE".to_string(),
354 new_args,
355 ))))
356 } else {
357 Ok(Expression::Function(Box::new(Function::new(
358 "TO_DATE".to_string(),
359 f.args,
360 ))))
361 }
362 }
363
364 // TO_TIMESTAMP is native
365 "TO_TIMESTAMP" => Ok(Expression::Function(Box::new(f))),
366
367 // CONVERT_TZ for timezone conversion
368 "CONVERT_TIMEZONE" | "AT_TIME_ZONE" => Ok(Expression::Function(Box::new(
369 Function::new("CONVERT_TZ".to_string(), f.args),
370 ))),
371
372 // STRPOS/POSITION → INSTR
373 "STRPOS" | "POSITION" | "CHARINDEX" | "LOCATE" => Ok(Expression::Function(Box::new(
374 Function::new("INSTR".to_string(), f.args),
375 ))),
376
377 // WEEK_OF_YEAR → WEEK
378 "WEEK_OF_YEAR" | "WEEKOFYEAR" => Ok(Expression::Function(Box::new(Function::new(
379 "WEEK".to_string(),
380 f.args,
381 )))),
382
383 // LAST_DAY is not native, would need complex transformation
384 "LAST_DAY" => {
385 // Exasol doesn't have LAST_DAY, but we can compute it
386 // For now, pass through
387 Ok(Expression::Function(Box::new(f)))
388 }
389
390 // CURDATE -> CURRENT_DATE
391 "CURDATE" => Ok(Expression::CurrentDate(crate::expressions::CurrentDate)),
392
393 // USER / USER() -> CURRENT_USER
394 "USER" if f.args.is_empty() => Ok(Expression::CurrentUser(Box::new(crate::expressions::CurrentUser { this: None }))),
395
396 // NOW -> CURRENT_TIMESTAMP
397 "NOW" => Ok(Expression::CurrentTimestamp(
398 crate::expressions::CurrentTimestamp {
399 precision: None,
400 sysdate: false,
401 },
402 )),
403
404 // Pass through everything else
405 _ => Ok(Expression::Function(Box::new(f))),
406 }
407 }
408
409 fn transform_aggregate_function(
410 &self,
411 f: Box<crate::expressions::AggregateFunction>,
412 ) -> Result<Expression> {
413 let name_upper = f.name.to_uppercase();
414 match name_upper.as_str() {
415 // ALL → EVERY
416 "ALL" | "EVERY" => Ok(Expression::Function(Box::new(Function::new(
417 "EVERY".to_string(),
418 f.args,
419 )))),
420
421 // GROUP_CONCAT / STRING_AGG → LISTAGG (native with WITHIN GROUP)
422 "GROUP_CONCAT" | "STRING_AGG" => Ok(Expression::Function(Box::new(Function::new(
423 "LISTAGG".to_string(),
424 f.args,
425 )))),
426
427 // LISTAGG is native
428 "LISTAGG" => Ok(Expression::AggregateFunction(f)),
429
430 // APPROX_DISTINCT → APPROXIMATE_COUNT_DISTINCT
431 "APPROX_DISTINCT" | "APPROX_COUNT_DISTINCT" => Ok(Expression::Function(Box::new(
432 Function::new("APPROXIMATE_COUNT_DISTINCT".to_string(), f.args),
433 ))),
434
435 // Pass through everything else
436 _ => Ok(Expression::AggregateFunction(f)),
437 }
438 }
439
440 /// Convert strptime format string to Exasol format string
441 /// Exasol TIME_MAPPING (reverse of Python sqlglot):
442 /// %Y -> YYYY, %y -> YY, %m -> MM, %d -> DD, %H -> HH, %M -> MI, %S -> SS, %a -> DY
443 fn convert_strptime_to_exasol_format(format: &str) -> String {
444 let mut result = String::new();
445 let chars: Vec<char> = format.chars().collect();
446 let mut i = 0;
447 while i < chars.len() {
448 if chars[i] == '%' && i + 1 < chars.len() {
449 let spec = chars[i + 1];
450 let exasol_spec = match spec {
451 'Y' => "YYYY",
452 'y' => "YY",
453 'm' => "MM",
454 'd' => "DD",
455 'H' => "HH",
456 'M' => "MI",
457 'S' => "SS",
458 'a' => "DY", // abbreviated weekday name
459 'A' => "DAY", // full weekday name
460 'b' => "MON", // abbreviated month name
461 'B' => "MONTH", // full month name
462 'I' => "H12", // 12-hour format
463 'u' => "ID", // ISO weekday (1-7)
464 'V' => "IW", // ISO week number
465 'G' => "IYYY", // ISO year
466 'W' => "UW", // Week number (Monday as first day)
467 'U' => "UW", // Week number (Sunday as first day)
468 'z' => "Z", // timezone offset
469 _ => {
470 // Unknown specifier, keep as-is
471 result.push('%');
472 result.push(spec);
473 i += 2;
474 continue;
475 }
476 };
477 result.push_str(exasol_spec);
478 i += 2;
479 } else {
480 result.push(chars[i]);
481 i += 1;
482 }
483 }
484 result
485 }
486
487 /// Convert C-style / MySQL format codes to Exasol format codes.
488 /// Handles both standard strptime codes and MySQL-specific codes (%i, %T).
489 fn convert_c_format_to_exasol(format: &str) -> String {
490 let mut result = String::new();
491 let chars: Vec<char> = format.chars().collect();
492 let mut i = 0;
493 while i < chars.len() {
494 if chars[i] == '%' && i + 1 < chars.len() {
495 let spec = chars[i + 1];
496 let exasol_spec = match spec {
497 'Y' => "YYYY",
498 'y' => "YY",
499 'm' => "MM",
500 'd' => "DD",
501 'H' => "HH",
502 'M' => "MI", // strptime minutes
503 'i' => "MI", // MySQL minutes
504 'S' | 's' => "SS",
505 'T' => "HH:MI:SS", // MySQL %T = time (HH:MM:SS)
506 'a' => "DY", // abbreviated weekday name
507 'A' => "DAY", // full weekday name
508 'b' => "MON", // abbreviated month name
509 'B' => "MONTH", // full month name
510 'I' => "H12", // 12-hour format
511 'u' => "ID", // ISO weekday (1-7)
512 'V' => "IW", // ISO week number
513 'G' => "IYYY", // ISO year
514 'W' => "UW", // Week number
515 'U' => "UW", // Week number
516 'z' => "Z", // timezone offset
517 _ => {
518 // Unknown specifier, keep as-is
519 result.push('%');
520 result.push(spec);
521 i += 2;
522 continue;
523 }
524 };
525 result.push_str(exasol_spec);
526 i += 2;
527 } else {
528 result.push(chars[i]);
529 i += 1;
530 }
531 }
532 result
533 }
534
535 /// Uppercase Exasol format specifiers (DD, MM, YYYY, etc.)
536 /// Converts lowercase format strings like 'dd-mm-yyyy' to 'DD-MM-YYYY'
537 fn uppercase_exasol_format(format: &str) -> String {
538 // Exasol format specifiers are always uppercase
539 format.to_uppercase()
540 }
541}
542
543// Note: Exasol type mappings (handled in generator if needed):
544// - BLOB, LONGBLOB, etc. → VARCHAR
545// - TEXT → LONG VARCHAR
546// - VARBINARY → VARCHAR
547// - TINYINT → SMALLINT
548// - MEDIUMINT → INT
549// - DECIMAL32/64/128/256 → DECIMAL
550// - DATETIME → TIMESTAMP
551// - TIMESTAMPTZ/TIMESTAMPLTZ/TIMESTAMPNTZ → TIMESTAMP
552//
553// Exasol also supports:
554// - TIMESTAMP WITH LOCAL TIME ZONE (fixed precision of 3)
555// - IF...THEN...ELSE...ENDIF syntax for conditionals