polyglot_sql/dialects/exasol.rs
1//! Exasol SQL Dialect
2//!
3//! Exasol-specific SQL dialect based on sqlglot patterns.
4//!
5//! References:
6//! - SQL Reference: https://docs.exasol.com/db/latest/sql_references/basiclanguageelements.htm
7//! - Data Types: https://docs.exasol.com/db/latest/sql_references/data_types/datatypesoverview.htm
8//! - Functions: https://docs.exasol.com/db/latest/sql_references/functions/
9//!
10//! Key characteristics:
11//! - Uppercase normalization for identifiers
12//! - Identifiers: double quotes or square brackets
13//! - Date functions: ADD_DAYS, ADD_MONTHS, DAYS_BETWEEN, MONTHS_BETWEEN
14//! - Bitwise: BIT_AND, BIT_OR, BIT_XOR, BIT_NOT, BIT_LSHIFT, BIT_RSHIFT
15//! - Functions: ZEROIFNULL, NULLIFZERO, SYSTIMESTAMP
16//! - EVERY for ALL aggregate
17//! - No SEMI/ANTI join support
18//! - DATE_TRUNC for date truncation
19//! - IF...THEN...ELSE...ENDIF syntax
20
21use super::{DialectImpl, DialectType};
22use crate::error::Result;
23use crate::expressions::{Expression, Function, ListAggFunc, Literal, VarArgFunc};
24#[cfg(feature = "generate")]
25use crate::generator::GeneratorConfig;
26use crate::tokens::TokenizerConfig;
27
28/// Exasol dialect
29pub struct ExasolDialect;
30
31impl DialectImpl for ExasolDialect {
32 fn dialect_type(&self) -> DialectType {
33 DialectType::Exasol
34 }
35
36 fn tokenizer_config(&self) -> TokenizerConfig {
37 let mut config = TokenizerConfig::default();
38 // Exasol uses double quotes for identifiers
39 config.identifiers.insert('"', '"');
40 // Also supports square brackets
41 config.identifiers.insert('[', ']');
42 config
43 }
44
45 #[cfg(feature = "generate")]
46
47 fn generator_config(&self) -> GeneratorConfig {
48 use crate::generator::IdentifierQuoteStyle;
49 GeneratorConfig {
50 identifier_quote: '"',
51 identifier_quote_style: IdentifierQuoteStyle::DOUBLE_QUOTE,
52 dialect: Some(DialectType::Exasol),
53 supports_column_join_marks: true,
54 // Exasol uses lowercase for window frame keywords (rows, preceding, following)
55 lowercase_window_frame_keywords: true,
56 ..Default::default()
57 }
58 }
59
60 #[cfg(feature = "transpile")]
61
62 fn transform_expr(&self, expr: Expression) -> Result<Expression> {
63 match expr {
64 // SYSTIMESTAMP -> SYSTIMESTAMP() (with parentheses in Exasol)
65 Expression::Systimestamp(_) => Ok(Expression::Function(Box::new(Function::new(
66 "SYSTIMESTAMP".to_string(),
67 vec![],
68 )))),
69
70 // WeekOfYear -> WEEK
71 Expression::WeekOfYear(f) => Ok(Expression::Function(Box::new(Function::new(
72 "WEEK".to_string(),
73 vec![f.this],
74 )))),
75
76 // COALESCE is native, but also support transformations from other forms
77 Expression::Nvl(f) => Ok(Expression::Coalesce(Box::new(VarArgFunc {
78 original_name: None,
79 expressions: vec![f.this, f.expression],
80 inferred_type: None,
81 }))),
82
83 Expression::IfNull(f) => Ok(Expression::Coalesce(Box::new(VarArgFunc {
84 original_name: None,
85 expressions: vec![f.this, f.expression],
86 inferred_type: None,
87 }))),
88
89 // Bitwise operations → BIT_* functions
90 Expression::BitwiseAnd(op) => Ok(Expression::Function(Box::new(Function::new(
91 "BIT_AND".to_string(),
92 vec![op.left, op.right],
93 )))),
94
95 Expression::BitwiseOr(op) => Ok(Expression::Function(Box::new(Function::new(
96 "BIT_OR".to_string(),
97 vec![op.left, op.right],
98 )))),
99
100 Expression::BitwiseXor(op) => Ok(Expression::Function(Box::new(Function::new(
101 "BIT_XOR".to_string(),
102 vec![op.left, op.right],
103 )))),
104
105 Expression::BitwiseNot(f) => Ok(Expression::Function(Box::new(Function::new(
106 "BIT_NOT".to_string(),
107 vec![f.this],
108 )))),
109
110 Expression::BitwiseLeftShift(op) => Ok(Expression::Function(Box::new(Function::new(
111 "BIT_LSHIFT".to_string(),
112 vec![op.left, op.right],
113 )))),
114
115 Expression::BitwiseRightShift(op) => Ok(Expression::Function(Box::new(Function::new(
116 "BIT_RSHIFT".to_string(),
117 vec![op.left, op.right],
118 )))),
119
120 // Modulo → MOD function
121 Expression::Mod(op) => Ok(Expression::Function(Box::new(Function::new(
122 "MOD".to_string(),
123 vec![op.left, op.right],
124 )))),
125
126 // GROUP_CONCAT -> LISTAGG in Exasol (with WITHIN GROUP for ORDER BY)
127 Expression::GroupConcat(f) => Ok(Expression::ListAgg(Box::new(ListAggFunc {
128 this: f.this,
129 separator: f.separator,
130 on_overflow: None,
131 order_by: f.order_by,
132 distinct: f.distinct,
133 filter: f.filter,
134 inferred_type: None,
135 }))),
136
137 // USER (no parens) -> CURRENT_USER
138 Expression::Column(col)
139 if col.table.is_none() && col.name.name.eq_ignore_ascii_case("USER") =>
140 {
141 Ok(Expression::CurrentUser(Box::new(
142 crate::expressions::CurrentUser { this: None },
143 )))
144 }
145
146 // Generic function transformations
147 Expression::Function(f) => self.transform_function(*f),
148
149 // Aggregate function transformations
150 Expression::AggregateFunction(f) => self.transform_aggregate_function(f),
151
152 // Pass through everything else
153 _ => Ok(expr),
154 }
155 }
156}
157
158#[cfg(feature = "transpile")]
159impl ExasolDialect {
160 fn transform_function(&self, f: Function) -> Result<Expression> {
161 let name_upper = f.name.to_uppercase();
162 match name_upper.as_str() {
163 // SYSTIMESTAMP -> SYSTIMESTAMP() (with parentheses in Exasol)
164 // Exasol requires parentheses even for no-arg functions
165 // Preserve any arguments (like precision)
166 "SYSTIMESTAMP" => Ok(Expression::Function(Box::new(Function::new(
167 "SYSTIMESTAMP".to_string(),
168 f.args,
169 )))),
170
171 // ALL → EVERY
172 "ALL" => Ok(Expression::Function(Box::new(Function::new(
173 "EVERY".to_string(),
174 f.args,
175 )))),
176
177 // IFNULL/ISNULL/NVL → COALESCE (native in Exasol)
178 "IFNULL" | "ISNULL" | "NVL" if f.args.len() == 2 => {
179 Ok(Expression::Coalesce(Box::new(VarArgFunc {
180 original_name: None,
181 expressions: f.args,
182 inferred_type: None,
183 })))
184 }
185
186 // DateDiff → DAYS_BETWEEN (for DAY unit) or other *_BETWEEN functions
187 "DATEDIFF" => Ok(Expression::Function(Box::new(Function::new(
188 "DAYS_BETWEEN".to_string(),
189 f.args,
190 )))),
191
192 // DateAdd → ADD_DAYS (for DAY unit) or other ADD_* functions
193 "DATEADD" | "DATE_ADD" => Ok(Expression::Function(Box::new(Function::new(
194 "ADD_DAYS".to_string(),
195 f.args,
196 )))),
197
198 // DateSub → Negate and use ADD_DAYS
199 "DATESUB" | "DATE_SUB" => {
200 // Would need to negate the interval, for now just use ADD_DAYS
201 Ok(Expression::Function(Box::new(Function::new(
202 "ADD_DAYS".to_string(),
203 f.args,
204 ))))
205 }
206
207 // DATE_TRUNC is native
208 "DATE_TRUNC" | "TRUNC" => Ok(Expression::Function(Box::new(f))),
209
210 // LEVENSHTEIN → EDIT_DISTANCE
211 "LEVENSHTEIN" | "LEVENSHTEIN_DISTANCE" => Ok(Expression::Function(Box::new(
212 Function::new("EDIT_DISTANCE".to_string(), f.args),
213 ))),
214
215 // REGEXP_EXTRACT → REGEXP_SUBSTR
216 "REGEXP_EXTRACT" => Ok(Expression::Function(Box::new(Function::new(
217 "REGEXP_SUBSTR".to_string(),
218 f.args,
219 )))),
220
221 // SHA/SHA1 → HASH_SHA
222 "SHA" | "SHA1" => Ok(Expression::Function(Box::new(Function::new(
223 "HASH_SHA".to_string(),
224 f.args,
225 )))),
226
227 // MD5 → HASH_MD5
228 "MD5" => Ok(Expression::Function(Box::new(Function::new(
229 "HASH_MD5".to_string(),
230 f.args,
231 )))),
232
233 // SHA256 → HASH_SHA256
234 "SHA256" | "SHA2" => {
235 // SHA2 in some dialects takes a length parameter
236 // HASH_SHA256 in Exasol just takes the value
237 let arg = f
238 .args
239 .into_iter()
240 .next()
241 .unwrap_or(Expression::Null(crate::expressions::Null));
242 Ok(Expression::Function(Box::new(Function::new(
243 "HASH_SHA256".to_string(),
244 vec![arg],
245 ))))
246 }
247
248 // SHA512 → HASH_SHA512
249 "SHA512" => Ok(Expression::Function(Box::new(Function::new(
250 "HASH_SHA512".to_string(),
251 f.args,
252 )))),
253
254 // VAR_POP is native
255 "VAR_POP" | "VARIANCE_POP" => Ok(Expression::Function(Box::new(Function::new(
256 "VAR_POP".to_string(),
257 f.args,
258 )))),
259
260 // APPROX_DISTINCT → APPROXIMATE_COUNT_DISTINCT
261 "APPROX_DISTINCT" | "APPROX_COUNT_DISTINCT" => Ok(Expression::Function(Box::new(
262 Function::new("APPROXIMATE_COUNT_DISTINCT".to_string(), f.args),
263 ))),
264
265 // TO_CHAR is native for date formatting
266 // DATE_FORMAT/STRFTIME: convert format codes and CAST value to TIMESTAMP
267 "TO_CHAR" | "DATE_FORMAT" | "STRFTIME" => {
268 let mut args = f.args;
269 if args.len() >= 2 {
270 // Convert format codes from C-style (%Y, %m, etc.) to Exasol format
271 if let Expression::Literal(lit) = &args[1] {
272 if let Literal::String(fmt) = lit.as_ref() {
273 let exasol_fmt = Self::convert_c_format_to_exasol(fmt);
274 args[1] = Expression::Literal(Box::new(Literal::String(exasol_fmt)));
275 }
276 }
277 // CAST string literal values to TIMESTAMP for date formatting functions
278 if matches!(&args[0], Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)))
279 && (f.name.eq_ignore_ascii_case("DATE_FORMAT")
280 || f.name.eq_ignore_ascii_case("STRFTIME"))
281 {
282 args[0] = Expression::Cast(Box::new(crate::expressions::Cast {
283 this: args[0].clone(),
284 to: crate::expressions::DataType::Timestamp {
285 timezone: false,
286 precision: None,
287 },
288 trailing_comments: vec![],
289 double_colon_syntax: false,
290 format: None,
291 default: None,
292 inferred_type: None,
293 }));
294 }
295 }
296 Ok(Expression::Function(Box::new(Function::new(
297 "TO_CHAR".to_string(),
298 args,
299 ))))
300 }
301
302 // TO_DATE is native but format specifiers need uppercasing
303 "TO_DATE" => {
304 if f.args.len() >= 2 {
305 // Uppercase format string if present
306 let mut new_args = f.args.clone();
307 if let Expression::Literal(lit) = &f.args[1] {
308 if let Literal::String(fmt) = lit.as_ref() {
309 new_args[1] = Expression::Literal(Box::new(Literal::String(
310 Self::uppercase_exasol_format(fmt),
311 )));
312 }
313 }
314 Ok(Expression::Function(Box::new(Function::new(
315 "TO_DATE".to_string(),
316 new_args,
317 ))))
318 } else {
319 Ok(Expression::Function(Box::new(f)))
320 }
321 }
322
323 // TIME_TO_STR -> TO_CHAR with format conversion
324 "TIME_TO_STR" => {
325 if f.args.len() >= 2 {
326 let mut new_args = vec![f.args[0].clone()];
327 if let Expression::Literal(lit) = &f.args[1] {
328 if let Literal::String(fmt) = lit.as_ref() {
329 new_args.push(Expression::Literal(Box::new(Literal::String(
330 Self::convert_strptime_to_exasol_format(fmt),
331 ))));
332 }
333 } else {
334 new_args.push(f.args[1].clone());
335 }
336 Ok(Expression::Function(Box::new(Function::new(
337 "TO_CHAR".to_string(),
338 new_args,
339 ))))
340 } else {
341 Ok(Expression::Function(Box::new(Function::new(
342 "TO_CHAR".to_string(),
343 f.args,
344 ))))
345 }
346 }
347
348 // STR_TO_TIME -> TO_DATE with format conversion
349 "STR_TO_TIME" => {
350 if f.args.len() >= 2 {
351 let mut new_args = vec![f.args[0].clone()];
352 if let Expression::Literal(lit) = &f.args[1] {
353 if let Literal::String(fmt) = lit.as_ref() {
354 new_args.push(Expression::Literal(Box::new(Literal::String(
355 Self::convert_strptime_to_exasol_format(fmt),
356 ))));
357 }
358 } else {
359 new_args.push(f.args[1].clone());
360 }
361 Ok(Expression::Function(Box::new(Function::new(
362 "TO_DATE".to_string(),
363 new_args,
364 ))))
365 } else {
366 Ok(Expression::Function(Box::new(Function::new(
367 "TO_DATE".to_string(),
368 f.args,
369 ))))
370 }
371 }
372
373 // TO_TIMESTAMP is native
374 "TO_TIMESTAMP" => Ok(Expression::Function(Box::new(f))),
375
376 // CONVERT_TZ for timezone conversion
377 "CONVERT_TIMEZONE" | "AT_TIME_ZONE" => Ok(Expression::Function(Box::new(
378 Function::new("CONVERT_TZ".to_string(), f.args),
379 ))),
380
381 // STRPOS/POSITION → INSTR
382 "STRPOS" | "POSITION" | "CHARINDEX" | "LOCATE" => Ok(Expression::Function(Box::new(
383 Function::new("INSTR".to_string(), f.args),
384 ))),
385
386 // WEEK_OF_YEAR → WEEK
387 "WEEK_OF_YEAR" | "WEEKOFYEAR" => Ok(Expression::Function(Box::new(Function::new(
388 "WEEK".to_string(),
389 f.args,
390 )))),
391
392 // LAST_DAY is not native, would need complex transformation
393 "LAST_DAY" => {
394 // Exasol doesn't have LAST_DAY, but we can compute it
395 // For now, pass through
396 Ok(Expression::Function(Box::new(f)))
397 }
398
399 // CURDATE -> CURRENT_DATE
400 "CURDATE" => Ok(Expression::CurrentDate(crate::expressions::CurrentDate)),
401
402 // USER / USER() -> CURRENT_USER
403 "USER" if f.args.is_empty() => Ok(Expression::CurrentUser(Box::new(
404 crate::expressions::CurrentUser { this: None },
405 ))),
406
407 // NOW -> CURRENT_TIMESTAMP
408 "NOW" => Ok(Expression::CurrentTimestamp(
409 crate::expressions::CurrentTimestamp {
410 precision: None,
411 sysdate: false,
412 },
413 )),
414
415 // Pass through everything else
416 _ => Ok(Expression::Function(Box::new(f))),
417 }
418 }
419
420 fn transform_aggregate_function(
421 &self,
422 f: Box<crate::expressions::AggregateFunction>,
423 ) -> Result<Expression> {
424 let name_upper = f.name.to_uppercase();
425 match name_upper.as_str() {
426 // ALL → EVERY
427 "ALL" | "EVERY" => Ok(Expression::Function(Box::new(Function::new(
428 "EVERY".to_string(),
429 f.args,
430 )))),
431
432 // GROUP_CONCAT / STRING_AGG → LISTAGG (native with WITHIN GROUP)
433 "GROUP_CONCAT" | "STRING_AGG" => Ok(Expression::Function(Box::new(Function::new(
434 "LISTAGG".to_string(),
435 f.args,
436 )))),
437
438 // LISTAGG is native
439 "LISTAGG" => Ok(Expression::AggregateFunction(f)),
440
441 // APPROX_DISTINCT → APPROXIMATE_COUNT_DISTINCT
442 "APPROX_DISTINCT" | "APPROX_COUNT_DISTINCT" => Ok(Expression::Function(Box::new(
443 Function::new("APPROXIMATE_COUNT_DISTINCT".to_string(), f.args),
444 ))),
445
446 // Pass through everything else
447 _ => Ok(Expression::AggregateFunction(f)),
448 }
449 }
450
451 /// Convert strptime format string to Exasol format string
452 /// Exasol TIME_MAPPING (reverse of Python sqlglot):
453 /// %Y -> YYYY, %y -> YY, %m -> MM, %d -> DD, %H -> HH, %M -> MI, %S -> SS, %a -> DY
454 fn convert_strptime_to_exasol_format(format: &str) -> String {
455 let mut result = String::new();
456 let chars: Vec<char> = format.chars().collect();
457 let mut i = 0;
458 while i < chars.len() {
459 if chars[i] == '%' && i + 1 < chars.len() {
460 let spec = chars[i + 1];
461 let exasol_spec = match spec {
462 'Y' => "YYYY",
463 'y' => "YY",
464 'm' => "MM",
465 'd' => "DD",
466 'H' => "HH",
467 'M' => "MI",
468 'S' => "SS",
469 'a' => "DY", // abbreviated weekday name
470 'A' => "DAY", // full weekday name
471 'b' => "MON", // abbreviated month name
472 'B' => "MONTH", // full month name
473 'I' => "H12", // 12-hour format
474 'u' => "ID", // ISO weekday (1-7)
475 'V' => "IW", // ISO week number
476 'G' => "IYYY", // ISO year
477 'W' => "UW", // Week number (Monday as first day)
478 'U' => "UW", // Week number (Sunday as first day)
479 'z' => "Z", // timezone offset
480 _ => {
481 // Unknown specifier, keep as-is
482 result.push('%');
483 result.push(spec);
484 i += 2;
485 continue;
486 }
487 };
488 result.push_str(exasol_spec);
489 i += 2;
490 } else {
491 result.push(chars[i]);
492 i += 1;
493 }
494 }
495 result
496 }
497
498 /// Convert C-style / MySQL format codes to Exasol format codes.
499 /// Handles both standard strptime codes and MySQL-specific codes (%i, %T).
500 fn convert_c_format_to_exasol(format: &str) -> String {
501 let mut result = String::new();
502 let chars: Vec<char> = format.chars().collect();
503 let mut i = 0;
504 while i < chars.len() {
505 if chars[i] == '%' && i + 1 < chars.len() {
506 let spec = chars[i + 1];
507 let exasol_spec = match spec {
508 'Y' => "YYYY",
509 'y' => "YY",
510 'm' => "MM",
511 'd' => "DD",
512 'H' => "HH",
513 'M' => "MI", // strptime minutes
514 'i' => "MI", // MySQL minutes
515 'S' | 's' => "SS",
516 'T' => "HH:MI:SS", // MySQL %T = time (HH:MM:SS)
517 'a' => "DY", // abbreviated weekday name
518 'A' => "DAY", // full weekday name
519 'b' => "MON", // abbreviated month name
520 'B' => "MONTH", // full month name
521 'I' => "H12", // 12-hour format
522 'u' => "ID", // ISO weekday (1-7)
523 'V' => "IW", // ISO week number
524 'G' => "IYYY", // ISO year
525 'W' => "UW", // Week number
526 'U' => "UW", // Week number
527 'z' => "Z", // timezone offset
528 _ => {
529 // Unknown specifier, keep as-is
530 result.push('%');
531 result.push(spec);
532 i += 2;
533 continue;
534 }
535 };
536 result.push_str(exasol_spec);
537 i += 2;
538 } else {
539 result.push(chars[i]);
540 i += 1;
541 }
542 }
543 result
544 }
545
546 /// Uppercase Exasol format specifiers (DD, MM, YYYY, etc.)
547 /// Converts lowercase format strings like 'dd-mm-yyyy' to 'DD-MM-YYYY'
548 fn uppercase_exasol_format(format: &str) -> String {
549 // Exasol format specifiers are always uppercase
550 format.to_uppercase()
551 }
552}
553
554// Note: Exasol type mappings (handled in generator if needed):
555// - BLOB, LONGBLOB, etc. → VARCHAR
556// - TEXT → LONG VARCHAR
557// - VARBINARY → VARCHAR
558// - TINYINT → SMALLINT
559// - MEDIUMINT → INT
560// - DECIMAL32/64/128/256 → DECIMAL
561// - DATETIME → TIMESTAMP
562// - TIMESTAMPTZ/TIMESTAMPLTZ/TIMESTAMPNTZ → TIMESTAMP
563//
564// Exasol also supports:
565// - TIMESTAMP WITH LOCAL TIME ZONE (fixed precision of 3)
566// - IF...THEN...ELSE...ENDIF syntax for conditionals