polyglot_sql/dialects/exasol.rs
1//! Exasol SQL Dialect
2//!
3//! Exasol-specific SQL dialect based on sqlglot patterns.
4//!
5//! References:
6//! - SQL Reference: https://docs.exasol.com/db/latest/sql_references/basiclanguageelements.htm
7//! - Data Types: https://docs.exasol.com/db/latest/sql_references/data_types/datatypesoverview.htm
8//! - Functions: https://docs.exasol.com/db/latest/sql_references/functions/
9//!
10//! Key characteristics:
11//! - Uppercase normalization for identifiers
12//! - Identifiers: double quotes or square brackets
13//! - Date functions: ADD_DAYS, ADD_MONTHS, DAYS_BETWEEN, MONTHS_BETWEEN
14//! - Bitwise: BIT_AND, BIT_OR, BIT_XOR, BIT_NOT, BIT_LSHIFT, BIT_RSHIFT
15//! - Functions: ZEROIFNULL, NULLIFZERO, SYSTIMESTAMP
16//! - EVERY for ALL aggregate
17//! - No SEMI/ANTI join support
18//! - DATE_TRUNC for date truncation
19//! - IF...THEN...ELSE...ENDIF syntax
20
21use super::{DialectImpl, DialectType};
22use crate::error::Result;
23use crate::expressions::{Expression, Function, ListAggFunc, Literal, VarArgFunc};
24use crate::generator::GeneratorConfig;
25use crate::tokens::TokenizerConfig;
26
27/// Exasol dialect
28pub struct ExasolDialect;
29
30impl DialectImpl for ExasolDialect {
31 fn dialect_type(&self) -> DialectType {
32 DialectType::Exasol
33 }
34
35 fn tokenizer_config(&self) -> TokenizerConfig {
36 let mut config = TokenizerConfig::default();
37 // Exasol uses double quotes for identifiers
38 config.identifiers.insert('"', '"');
39 // Also supports square brackets
40 config.identifiers.insert('[', ']');
41 config
42 }
43
44 fn generator_config(&self) -> GeneratorConfig {
45 use crate::generator::IdentifierQuoteStyle;
46 GeneratorConfig {
47 identifier_quote: '"',
48 identifier_quote_style: IdentifierQuoteStyle::DOUBLE_QUOTE,
49 dialect: Some(DialectType::Exasol),
50 supports_column_join_marks: true,
51 // Exasol uses lowercase for window frame keywords (rows, preceding, following)
52 lowercase_window_frame_keywords: true,
53 ..Default::default()
54 }
55 }
56
57 fn transform_expr(&self, expr: Expression) -> Result<Expression> {
58 match expr {
59 // SYSTIMESTAMP -> SYSTIMESTAMP() (with parentheses in Exasol)
60 Expression::Systimestamp(_) => Ok(Expression::Function(Box::new(Function::new(
61 "SYSTIMESTAMP".to_string(),
62 vec![],
63 )))),
64
65 // WeekOfYear -> WEEK
66 Expression::WeekOfYear(f) => Ok(Expression::Function(Box::new(Function::new(
67 "WEEK".to_string(),
68 vec![f.this],
69 )))),
70
71 // COALESCE is native, but also support transformations from other forms
72 Expression::Nvl(f) => Ok(Expression::Coalesce(Box::new(VarArgFunc {
73 original_name: None,
74 expressions: vec![f.this, f.expression],
75 inferred_type: None,
76 }))),
77
78 Expression::IfNull(f) => Ok(Expression::Coalesce(Box::new(VarArgFunc {
79 original_name: None,
80 expressions: vec![f.this, f.expression],
81 inferred_type: None,
82 }))),
83
84 // Bitwise operations → BIT_* functions
85 Expression::BitwiseAnd(op) => Ok(Expression::Function(Box::new(Function::new(
86 "BIT_AND".to_string(),
87 vec![op.left, op.right],
88 )))),
89
90 Expression::BitwiseOr(op) => Ok(Expression::Function(Box::new(Function::new(
91 "BIT_OR".to_string(),
92 vec![op.left, op.right],
93 )))),
94
95 Expression::BitwiseXor(op) => Ok(Expression::Function(Box::new(Function::new(
96 "BIT_XOR".to_string(),
97 vec![op.left, op.right],
98 )))),
99
100 Expression::BitwiseNot(f) => Ok(Expression::Function(Box::new(Function::new(
101 "BIT_NOT".to_string(),
102 vec![f.this],
103 )))),
104
105 Expression::BitwiseLeftShift(op) => Ok(Expression::Function(Box::new(Function::new(
106 "BIT_LSHIFT".to_string(),
107 vec![op.left, op.right],
108 )))),
109
110 Expression::BitwiseRightShift(op) => Ok(Expression::Function(Box::new(Function::new(
111 "BIT_RSHIFT".to_string(),
112 vec![op.left, op.right],
113 )))),
114
115 // Modulo → MOD function
116 Expression::Mod(op) => Ok(Expression::Function(Box::new(Function::new(
117 "MOD".to_string(),
118 vec![op.left, op.right],
119 )))),
120
121 // GROUP_CONCAT -> LISTAGG in Exasol (with WITHIN GROUP for ORDER BY)
122 Expression::GroupConcat(f) => Ok(Expression::ListAgg(Box::new(ListAggFunc {
123 this: f.this,
124 separator: f.separator,
125 on_overflow: None,
126 order_by: f.order_by,
127 distinct: f.distinct,
128 filter: f.filter,
129 inferred_type: None,
130 }))),
131
132 // USER (no parens) -> CURRENT_USER
133 Expression::Column(col)
134 if col.table.is_none() && col.name.name.eq_ignore_ascii_case("USER") =>
135 {
136 Ok(Expression::CurrentUser(Box::new(
137 crate::expressions::CurrentUser { this: None },
138 )))
139 }
140
141 // Generic function transformations
142 Expression::Function(f) => self.transform_function(*f),
143
144 // Aggregate function transformations
145 Expression::AggregateFunction(f) => self.transform_aggregate_function(f),
146
147 // Pass through everything else
148 _ => Ok(expr),
149 }
150 }
151}
152
153impl ExasolDialect {
154 fn transform_function(&self, f: Function) -> Result<Expression> {
155 let name_upper = f.name.to_uppercase();
156 match name_upper.as_str() {
157 // SYSTIMESTAMP -> SYSTIMESTAMP() (with parentheses in Exasol)
158 // Exasol requires parentheses even for no-arg functions
159 // Preserve any arguments (like precision)
160 "SYSTIMESTAMP" => Ok(Expression::Function(Box::new(Function::new(
161 "SYSTIMESTAMP".to_string(),
162 f.args,
163 )))),
164
165 // ALL → EVERY
166 "ALL" => Ok(Expression::Function(Box::new(Function::new(
167 "EVERY".to_string(),
168 f.args,
169 )))),
170
171 // IFNULL/ISNULL/NVL → COALESCE (native in Exasol)
172 "IFNULL" | "ISNULL" | "NVL" if f.args.len() == 2 => {
173 Ok(Expression::Coalesce(Box::new(VarArgFunc {
174 original_name: None,
175 expressions: f.args,
176 inferred_type: None,
177 })))
178 }
179
180 // DateDiff → DAYS_BETWEEN (for DAY unit) or other *_BETWEEN functions
181 "DATEDIFF" => Ok(Expression::Function(Box::new(Function::new(
182 "DAYS_BETWEEN".to_string(),
183 f.args,
184 )))),
185
186 // DateAdd → ADD_DAYS (for DAY unit) or other ADD_* functions
187 "DATEADD" | "DATE_ADD" => Ok(Expression::Function(Box::new(Function::new(
188 "ADD_DAYS".to_string(),
189 f.args,
190 )))),
191
192 // DateSub → Negate and use ADD_DAYS
193 "DATESUB" | "DATE_SUB" => {
194 // Would need to negate the interval, for now just use ADD_DAYS
195 Ok(Expression::Function(Box::new(Function::new(
196 "ADD_DAYS".to_string(),
197 f.args,
198 ))))
199 }
200
201 // DATE_TRUNC is native
202 "DATE_TRUNC" | "TRUNC" => Ok(Expression::Function(Box::new(f))),
203
204 // LEVENSHTEIN → EDIT_DISTANCE
205 "LEVENSHTEIN" | "LEVENSHTEIN_DISTANCE" => Ok(Expression::Function(Box::new(
206 Function::new("EDIT_DISTANCE".to_string(), f.args),
207 ))),
208
209 // REGEXP_EXTRACT → REGEXP_SUBSTR
210 "REGEXP_EXTRACT" => Ok(Expression::Function(Box::new(Function::new(
211 "REGEXP_SUBSTR".to_string(),
212 f.args,
213 )))),
214
215 // SHA/SHA1 → HASH_SHA
216 "SHA" | "SHA1" => Ok(Expression::Function(Box::new(Function::new(
217 "HASH_SHA".to_string(),
218 f.args,
219 )))),
220
221 // MD5 → HASH_MD5
222 "MD5" => Ok(Expression::Function(Box::new(Function::new(
223 "HASH_MD5".to_string(),
224 f.args,
225 )))),
226
227 // SHA256 → HASH_SHA256
228 "SHA256" | "SHA2" => {
229 // SHA2 in some dialects takes a length parameter
230 // HASH_SHA256 in Exasol just takes the value
231 let arg = f
232 .args
233 .into_iter()
234 .next()
235 .unwrap_or(Expression::Null(crate::expressions::Null));
236 Ok(Expression::Function(Box::new(Function::new(
237 "HASH_SHA256".to_string(),
238 vec![arg],
239 ))))
240 }
241
242 // SHA512 → HASH_SHA512
243 "SHA512" => Ok(Expression::Function(Box::new(Function::new(
244 "HASH_SHA512".to_string(),
245 f.args,
246 )))),
247
248 // VAR_POP is native
249 "VAR_POP" | "VARIANCE_POP" => Ok(Expression::Function(Box::new(Function::new(
250 "VAR_POP".to_string(),
251 f.args,
252 )))),
253
254 // APPROX_DISTINCT → APPROXIMATE_COUNT_DISTINCT
255 "APPROX_DISTINCT" | "APPROX_COUNT_DISTINCT" => Ok(Expression::Function(Box::new(
256 Function::new("APPROXIMATE_COUNT_DISTINCT".to_string(), f.args),
257 ))),
258
259 // TO_CHAR is native for date formatting
260 // DATE_FORMAT/STRFTIME: convert format codes and CAST value to TIMESTAMP
261 "TO_CHAR" | "DATE_FORMAT" | "STRFTIME" => {
262 let mut args = f.args;
263 if args.len() >= 2 {
264 // Convert format codes from C-style (%Y, %m, etc.) to Exasol format
265 if let Expression::Literal(lit) = &args[1] {
266 if let Literal::String(fmt) = lit.as_ref() {
267 let exasol_fmt = Self::convert_c_format_to_exasol(fmt);
268 args[1] = Expression::Literal(Box::new(Literal::String(exasol_fmt)));
269 }
270 }
271 // CAST string literal values to TIMESTAMP for date formatting functions
272 if matches!(&args[0], Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)))
273 && (f.name.eq_ignore_ascii_case("DATE_FORMAT")
274 || f.name.eq_ignore_ascii_case("STRFTIME"))
275 {
276 args[0] = Expression::Cast(Box::new(crate::expressions::Cast {
277 this: args[0].clone(),
278 to: crate::expressions::DataType::Timestamp {
279 timezone: false,
280 precision: None,
281 },
282 trailing_comments: vec![],
283 double_colon_syntax: false,
284 format: None,
285 default: None,
286 inferred_type: None,
287 }));
288 }
289 }
290 Ok(Expression::Function(Box::new(Function::new(
291 "TO_CHAR".to_string(),
292 args,
293 ))))
294 }
295
296 // TO_DATE is native but format specifiers need uppercasing
297 "TO_DATE" => {
298 if f.args.len() >= 2 {
299 // Uppercase format string if present
300 let mut new_args = f.args.clone();
301 if let Expression::Literal(lit) = &f.args[1] {
302 if let Literal::String(fmt) = lit.as_ref() {
303 new_args[1] = Expression::Literal(Box::new(Literal::String(
304 Self::uppercase_exasol_format(fmt),
305 )));
306 }
307 }
308 Ok(Expression::Function(Box::new(Function::new(
309 "TO_DATE".to_string(),
310 new_args,
311 ))))
312 } else {
313 Ok(Expression::Function(Box::new(f)))
314 }
315 }
316
317 // TIME_TO_STR -> TO_CHAR with format conversion
318 "TIME_TO_STR" => {
319 if f.args.len() >= 2 {
320 let mut new_args = vec![f.args[0].clone()];
321 if let Expression::Literal(lit) = &f.args[1] {
322 if let Literal::String(fmt) = lit.as_ref() {
323 new_args.push(Expression::Literal(Box::new(Literal::String(
324 Self::convert_strptime_to_exasol_format(fmt),
325 ))));
326 }
327 } else {
328 new_args.push(f.args[1].clone());
329 }
330 Ok(Expression::Function(Box::new(Function::new(
331 "TO_CHAR".to_string(),
332 new_args,
333 ))))
334 } else {
335 Ok(Expression::Function(Box::new(Function::new(
336 "TO_CHAR".to_string(),
337 f.args,
338 ))))
339 }
340 }
341
342 // STR_TO_TIME -> TO_DATE with format conversion
343 "STR_TO_TIME" => {
344 if f.args.len() >= 2 {
345 let mut new_args = vec![f.args[0].clone()];
346 if let Expression::Literal(lit) = &f.args[1] {
347 if let Literal::String(fmt) = lit.as_ref() {
348 new_args.push(Expression::Literal(Box::new(Literal::String(
349 Self::convert_strptime_to_exasol_format(fmt),
350 ))));
351 }
352 } else {
353 new_args.push(f.args[1].clone());
354 }
355 Ok(Expression::Function(Box::new(Function::new(
356 "TO_DATE".to_string(),
357 new_args,
358 ))))
359 } else {
360 Ok(Expression::Function(Box::new(Function::new(
361 "TO_DATE".to_string(),
362 f.args,
363 ))))
364 }
365 }
366
367 // TO_TIMESTAMP is native
368 "TO_TIMESTAMP" => Ok(Expression::Function(Box::new(f))),
369
370 // CONVERT_TZ for timezone conversion
371 "CONVERT_TIMEZONE" | "AT_TIME_ZONE" => Ok(Expression::Function(Box::new(
372 Function::new("CONVERT_TZ".to_string(), f.args),
373 ))),
374
375 // STRPOS/POSITION → INSTR
376 "STRPOS" | "POSITION" | "CHARINDEX" | "LOCATE" => Ok(Expression::Function(Box::new(
377 Function::new("INSTR".to_string(), f.args),
378 ))),
379
380 // WEEK_OF_YEAR → WEEK
381 "WEEK_OF_YEAR" | "WEEKOFYEAR" => Ok(Expression::Function(Box::new(Function::new(
382 "WEEK".to_string(),
383 f.args,
384 )))),
385
386 // LAST_DAY is not native, would need complex transformation
387 "LAST_DAY" => {
388 // Exasol doesn't have LAST_DAY, but we can compute it
389 // For now, pass through
390 Ok(Expression::Function(Box::new(f)))
391 }
392
393 // CURDATE -> CURRENT_DATE
394 "CURDATE" => Ok(Expression::CurrentDate(crate::expressions::CurrentDate)),
395
396 // USER / USER() -> CURRENT_USER
397 "USER" if f.args.is_empty() => Ok(Expression::CurrentUser(Box::new(
398 crate::expressions::CurrentUser { this: None },
399 ))),
400
401 // NOW -> CURRENT_TIMESTAMP
402 "NOW" => Ok(Expression::CurrentTimestamp(
403 crate::expressions::CurrentTimestamp {
404 precision: None,
405 sysdate: false,
406 },
407 )),
408
409 // Pass through everything else
410 _ => Ok(Expression::Function(Box::new(f))),
411 }
412 }
413
414 fn transform_aggregate_function(
415 &self,
416 f: Box<crate::expressions::AggregateFunction>,
417 ) -> Result<Expression> {
418 let name_upper = f.name.to_uppercase();
419 match name_upper.as_str() {
420 // ALL → EVERY
421 "ALL" | "EVERY" => Ok(Expression::Function(Box::new(Function::new(
422 "EVERY".to_string(),
423 f.args,
424 )))),
425
426 // GROUP_CONCAT / STRING_AGG → LISTAGG (native with WITHIN GROUP)
427 "GROUP_CONCAT" | "STRING_AGG" => Ok(Expression::Function(Box::new(Function::new(
428 "LISTAGG".to_string(),
429 f.args,
430 )))),
431
432 // LISTAGG is native
433 "LISTAGG" => Ok(Expression::AggregateFunction(f)),
434
435 // APPROX_DISTINCT → APPROXIMATE_COUNT_DISTINCT
436 "APPROX_DISTINCT" | "APPROX_COUNT_DISTINCT" => Ok(Expression::Function(Box::new(
437 Function::new("APPROXIMATE_COUNT_DISTINCT".to_string(), f.args),
438 ))),
439
440 // Pass through everything else
441 _ => Ok(Expression::AggregateFunction(f)),
442 }
443 }
444
445 /// Convert strptime format string to Exasol format string
446 /// Exasol TIME_MAPPING (reverse of Python sqlglot):
447 /// %Y -> YYYY, %y -> YY, %m -> MM, %d -> DD, %H -> HH, %M -> MI, %S -> SS, %a -> DY
448 fn convert_strptime_to_exasol_format(format: &str) -> String {
449 let mut result = String::new();
450 let chars: Vec<char> = format.chars().collect();
451 let mut i = 0;
452 while i < chars.len() {
453 if chars[i] == '%' && i + 1 < chars.len() {
454 let spec = chars[i + 1];
455 let exasol_spec = match spec {
456 'Y' => "YYYY",
457 'y' => "YY",
458 'm' => "MM",
459 'd' => "DD",
460 'H' => "HH",
461 'M' => "MI",
462 'S' => "SS",
463 'a' => "DY", // abbreviated weekday name
464 'A' => "DAY", // full weekday name
465 'b' => "MON", // abbreviated month name
466 'B' => "MONTH", // full month name
467 'I' => "H12", // 12-hour format
468 'u' => "ID", // ISO weekday (1-7)
469 'V' => "IW", // ISO week number
470 'G' => "IYYY", // ISO year
471 'W' => "UW", // Week number (Monday as first day)
472 'U' => "UW", // Week number (Sunday as first day)
473 'z' => "Z", // timezone offset
474 _ => {
475 // Unknown specifier, keep as-is
476 result.push('%');
477 result.push(spec);
478 i += 2;
479 continue;
480 }
481 };
482 result.push_str(exasol_spec);
483 i += 2;
484 } else {
485 result.push(chars[i]);
486 i += 1;
487 }
488 }
489 result
490 }
491
492 /// Convert C-style / MySQL format codes to Exasol format codes.
493 /// Handles both standard strptime codes and MySQL-specific codes (%i, %T).
494 fn convert_c_format_to_exasol(format: &str) -> String {
495 let mut result = String::new();
496 let chars: Vec<char> = format.chars().collect();
497 let mut i = 0;
498 while i < chars.len() {
499 if chars[i] == '%' && i + 1 < chars.len() {
500 let spec = chars[i + 1];
501 let exasol_spec = match spec {
502 'Y' => "YYYY",
503 'y' => "YY",
504 'm' => "MM",
505 'd' => "DD",
506 'H' => "HH",
507 'M' => "MI", // strptime minutes
508 'i' => "MI", // MySQL minutes
509 'S' | 's' => "SS",
510 'T' => "HH:MI:SS", // MySQL %T = time (HH:MM:SS)
511 'a' => "DY", // abbreviated weekday name
512 'A' => "DAY", // full weekday name
513 'b' => "MON", // abbreviated month name
514 'B' => "MONTH", // full month name
515 'I' => "H12", // 12-hour format
516 'u' => "ID", // ISO weekday (1-7)
517 'V' => "IW", // ISO week number
518 'G' => "IYYY", // ISO year
519 'W' => "UW", // Week number
520 'U' => "UW", // Week number
521 'z' => "Z", // timezone offset
522 _ => {
523 // Unknown specifier, keep as-is
524 result.push('%');
525 result.push(spec);
526 i += 2;
527 continue;
528 }
529 };
530 result.push_str(exasol_spec);
531 i += 2;
532 } else {
533 result.push(chars[i]);
534 i += 1;
535 }
536 }
537 result
538 }
539
540 /// Uppercase Exasol format specifiers (DD, MM, YYYY, etc.)
541 /// Converts lowercase format strings like 'dd-mm-yyyy' to 'DD-MM-YYYY'
542 fn uppercase_exasol_format(format: &str) -> String {
543 // Exasol format specifiers are always uppercase
544 format.to_uppercase()
545 }
546}
547
548// Note: Exasol type mappings (handled in generator if needed):
549// - BLOB, LONGBLOB, etc. → VARCHAR
550// - TEXT → LONG VARCHAR
551// - VARBINARY → VARCHAR
552// - TINYINT → SMALLINT
553// - MEDIUMINT → INT
554// - DECIMAL32/64/128/256 → DECIMAL
555// - DATETIME → TIMESTAMP
556// - TIMESTAMPTZ/TIMESTAMPLTZ/TIMESTAMPNTZ → TIMESTAMP
557//
558// Exasol also supports:
559// - TIMESTAMP WITH LOCAL TIME ZONE (fixed precision of 3)
560// - IF...THEN...ELSE...ENDIF syntax for conditionals