Skip to main content

polyglot_sql/dialects/
redshift.rs

1//! Redshift Dialect
2//!
3//! Redshift-specific transformations based on sqlglot patterns.
4//! Redshift is based on PostgreSQL but has some differences.
5
6use super::{DialectImpl, DialectType};
7use crate::error::Result;
8use crate::expressions::{
9    AggFunc, Case, Cast, DataType, Expression, Function, Limit, RegexpFunc, VarArgFunc,
10};
11use crate::generator::GeneratorConfig;
12use crate::tokens::TokenizerConfig;
13
14/// Redshift dialect
15pub struct RedshiftDialect;
16
17impl DialectImpl for RedshiftDialect {
18    fn dialect_type(&self) -> DialectType {
19        DialectType::Redshift
20    }
21
22    fn tokenizer_config(&self) -> TokenizerConfig {
23        use crate::tokens::TokenType;
24        let mut config = TokenizerConfig::default();
25        // Redshift uses double quotes for identifiers (like PostgreSQL)
26        config.identifiers.insert('"', '"');
27        // Redshift does NOT support nested comments
28        config.nested_comments = false;
29        // MINUS is an alias for EXCEPT in Redshift
30        config.keywords.insert("MINUS".to_string(), TokenType::Except);
31        config
32    }
33
34    fn generator_config(&self) -> GeneratorConfig {
35        use crate::generator::IdentifierQuoteStyle;
36        GeneratorConfig {
37            identifier_quote: '"',
38            identifier_quote_style: IdentifierQuoteStyle::DOUBLE_QUOTE,
39            dialect: Some(DialectType::Redshift),
40            supports_column_join_marks: true,
41            locking_reads_supported: false,
42            tz_to_with_time_zone: true,
43            ..Default::default()
44        }
45    }
46
47    fn transform_expr(&self, expr: Expression) -> Result<Expression> {
48        match expr {
49            // IFNULL -> COALESCE in Redshift
50            Expression::IfNull(f) => Ok(Expression::Coalesce(Box::new(VarArgFunc { original_name: None,
51                expressions: vec![f.this, f.expression],
52            }))),
53
54            // NVL is native in Redshift, but we standardize to COALESCE for consistency
55            Expression::Nvl(f) => Ok(Expression::Coalesce(Box::new(VarArgFunc { original_name: None,
56                expressions: vec![f.this, f.expression],
57            }))),
58
59            // TryCast -> TRY_CAST (Redshift supports TRY_CAST via CONVERT)
60            Expression::TryCast(c) => Ok(Expression::TryCast(c)),
61
62            // SafeCast -> TRY_CAST in Redshift
63            Expression::SafeCast(c) => Ok(Expression::TryCast(c)),
64
65            // ILIKE is native in Redshift (inherited from PostgreSQL)
66            Expression::ILike(op) => Ok(Expression::ILike(op)),
67
68            // CountIf -> SUM(CASE WHEN condition THEN 1 ELSE 0 END)
69            Expression::CountIf(f) => {
70                let case_expr = Expression::Case(Box::new(Case {
71                    operand: None,
72                    whens: vec![(f.this.clone(), Expression::number(1))],
73                    else_: Some(Expression::number(0)),
74                }));
75                Ok(Expression::Sum(Box::new(AggFunc { ignore_nulls: None, having_max: None,
76                    this: case_expr,
77                    distinct: f.distinct,
78                    filter: f.filter,
79                    order_by: Vec::new(),
80                name: None,
81                limit: None,
82                })))
83            }
84
85            // EXPLODE is not supported in Redshift
86            Expression::Explode(_) => Ok(expr),
87
88            // ExplodeOuter is not supported in Redshift
89            Expression::ExplodeOuter(_) => Ok(expr),
90
91            // UNNEST is supported in Redshift (but limited)
92            Expression::Unnest(_) => Ok(expr),
93
94            // RAND -> RANDOM in Redshift (like PostgreSQL)
95            Expression::Rand(r) => {
96                let _ = r.seed;
97                Ok(Expression::Random(crate::expressions::Random))
98            }
99
100            // Generic function transformations
101            Expression::Function(f) => self.transform_function(*f),
102
103            // Generic aggregate function transformations
104            Expression::AggregateFunction(f) => self.transform_aggregate_function(f),
105
106            // Cast transformations
107            Expression::Cast(c) => self.transform_cast(*c),
108
109            // CONVERT -> CAST in Redshift
110            Expression::Convert(c) => {
111                Ok(Expression::Cast(Box::new(Cast {
112                    this: c.this,
113                    to: c.to,
114                    trailing_comments: Vec::new(),
115                    double_colon_syntax: false,
116                    format: None,
117                    default: None,
118                })))
119            }
120
121            // SELECT TOP n -> SELECT ... LIMIT n in Redshift (PostgreSQL-style)
122            Expression::Select(mut select) => {
123                if let Some(top) = select.top.take() {
124                    // Only convert simple TOP (not TOP PERCENT or WITH TIES)
125                    if !top.percent && !top.with_ties {
126                        // Convert TOP to LIMIT
127                        select.limit = Some(Limit {
128                            this: top.this,
129                            percent: false,
130                        });
131                    } else {
132                        // Restore TOP if it has PERCENT or WITH TIES (not supported as LIMIT)
133                        select.top = Some(top);
134                    }
135                }
136                Ok(Expression::Select(select))
137            }
138
139            // Pass through everything else
140            _ => Ok(expr),
141        }
142    }
143}
144
145impl RedshiftDialect {
146    fn transform_function(&self, f: Function) -> Result<Expression> {
147        let name_upper = f.name.to_uppercase();
148        match name_upper.as_str() {
149            // IFNULL -> COALESCE
150            "IFNULL" if f.args.len() == 2 => Ok(Expression::Coalesce(Box::new(VarArgFunc { original_name: None,
151                expressions: f.args,
152            }))),
153
154            // NVL -> COALESCE (supports 2+ args)
155            "NVL" if f.args.len() >= 2 => Ok(Expression::Coalesce(Box::new(VarArgFunc { original_name: None,
156                expressions: f.args,
157            }))),
158
159            // ISNULL -> COALESCE
160            "ISNULL" if f.args.len() == 2 => Ok(Expression::Coalesce(Box::new(VarArgFunc { original_name: None,
161                expressions: f.args,
162            }))),
163
164            // GETDATE is native to Redshift
165            "GETDATE" => Ok(Expression::Function(Box::new(Function::new(
166                "GETDATE".to_string(),
167                vec![],
168            )))),
169
170            // NOW -> GETDATE in Redshift
171            "NOW" => Ok(Expression::Function(Box::new(Function::new(
172                "GETDATE".to_string(),
173                vec![],
174            )))),
175
176            // RAND -> RANDOM in Redshift
177            "RAND" => Ok(Expression::Random(crate::expressions::Random)),
178
179            // GROUP_CONCAT -> LISTAGG in Redshift
180            "GROUP_CONCAT" if !f.args.is_empty() => Ok(Expression::Function(Box::new(
181                Function::new("LISTAGG".to_string(), f.args),
182            ))),
183
184            // STRING_AGG -> LISTAGG in Redshift
185            "STRING_AGG" if !f.args.is_empty() => Ok(Expression::Function(Box::new(
186                Function::new("LISTAGG".to_string(), f.args),
187            ))),
188
189            // LISTAGG is native in Redshift
190            "LISTAGG" => Ok(Expression::Function(Box::new(f))),
191
192            // SUBSTR -> SUBSTRING
193            "SUBSTR" => Ok(Expression::Function(Box::new(Function::new(
194                "SUBSTRING".to_string(),
195                f.args,
196            )))),
197
198            // LEN is native in Redshift
199            "LEN" => Ok(Expression::Function(Box::new(f))),
200
201            // LENGTH -> LEN in Redshift
202            "LENGTH" if f.args.len() == 1 => Ok(Expression::Function(Box::new(Function::new(
203                "LEN".to_string(),
204                f.args,
205            )))),
206
207            // CHARINDEX is native in Redshift
208            "CHARINDEX" => Ok(Expression::Function(Box::new(f))),
209
210            // POSITION -> CHARINDEX in Redshift (with swapped args)
211            "POSITION" if f.args.len() == 2 => {
212                let mut args = f.args;
213                let substring = args.remove(0);
214                let string = args.remove(0);
215                // CHARINDEX(substring, string)
216                Ok(Expression::Function(Box::new(Function::new(
217                    "CHARINDEX".to_string(),
218                    vec![substring, string],
219                ))))
220            }
221
222            // STRPOS -> CHARINDEX in Redshift
223            "STRPOS" if f.args.len() == 2 => {
224                let args = f.args;
225                // STRPOS(string, substring) -> CHARINDEX(substring, string)
226                let string = args[0].clone();
227                let substring = args[1].clone();
228                Ok(Expression::Function(Box::new(Function::new(
229                    "CHARINDEX".to_string(),
230                    vec![substring, string],
231                ))))
232            }
233
234            // INSTR -> CHARINDEX in Redshift
235            "INSTR" if f.args.len() >= 2 => {
236                let mut args = f.args;
237                let string = args.remove(0);
238                let substring = args.remove(0);
239                // INSTR(string, substring) -> CHARINDEX(substring, string)
240                Ok(Expression::Function(Box::new(Function::new(
241                    "CHARINDEX".to_string(),
242                    vec![substring, string],
243                ))))
244            }
245
246            // LOCATE -> CHARINDEX in Redshift
247            "LOCATE" if f.args.len() >= 2 => {
248                let mut args = f.args;
249                let substring = args.remove(0);
250                let string = args.remove(0);
251                // LOCATE(substring, string) -> CHARINDEX(substring, string)
252                Ok(Expression::Function(Box::new(Function::new(
253                    "CHARINDEX".to_string(),
254                    vec![substring, string],
255                ))))
256            }
257
258            // ARRAY_LENGTH -> ARRAY_UPPER / custom in Redshift
259            // Redshift doesn't have ARRAY_LENGTH, arrays are limited
260            "ARRAY_LENGTH" => Ok(Expression::Function(Box::new(f))),
261
262            // SIZE -> not directly supported
263            "SIZE" => Ok(Expression::Function(Box::new(f))),
264
265            // TO_DATE -> TO_DATE (native in Redshift)
266            "TO_DATE" => Ok(Expression::Function(Box::new(f))),
267
268            // TO_TIMESTAMP -> TO_TIMESTAMP (native in Redshift)
269            "TO_TIMESTAMP" => Ok(Expression::Function(Box::new(f))),
270
271            // DATE_FORMAT -> TO_CHAR in Redshift
272            "DATE_FORMAT" if f.args.len() >= 2 => Ok(Expression::Function(Box::new(
273                Function::new("TO_CHAR".to_string(), f.args),
274            ))),
275
276            // strftime -> TO_CHAR in Redshift
277            "STRFTIME" if f.args.len() >= 2 => {
278                let mut args = f.args;
279                let format = args.remove(0);
280                let date = args.remove(0);
281                Ok(Expression::Function(Box::new(Function::new(
282                    "TO_CHAR".to_string(),
283                    vec![date, format],
284                ))))
285            }
286
287            // TO_CHAR is native in Redshift
288            "TO_CHAR" => Ok(Expression::Function(Box::new(f))),
289
290            // LEVENSHTEIN -> not directly supported
291            "LEVENSHTEIN" => Ok(Expression::Function(Box::new(f))),
292
293            // JSON_EXTRACT -> JSON_EXTRACT_PATH_TEXT in Redshift
294            "JSON_EXTRACT" if f.args.len() >= 2 => Ok(Expression::Function(Box::new(
295                Function::new("JSON_EXTRACT_PATH_TEXT".to_string(), f.args),
296            ))),
297
298            // JSON_EXTRACT_SCALAR -> JSON_EXTRACT_PATH_TEXT in Redshift
299            "JSON_EXTRACT_SCALAR" if f.args.len() >= 2 => Ok(Expression::Function(Box::new(
300                Function::new("JSON_EXTRACT_PATH_TEXT".to_string(), f.args),
301            ))),
302
303            // GET_JSON_OBJECT -> JSON_EXTRACT_PATH_TEXT in Redshift
304            "GET_JSON_OBJECT" if f.args.len() == 2 => Ok(Expression::Function(Box::new(
305                Function::new("JSON_EXTRACT_PATH_TEXT".to_string(), f.args),
306            ))),
307
308            // COLLECT_LIST -> not directly supported (limited array support)
309            "COLLECT_LIST" => Ok(Expression::Function(Box::new(f))),
310
311            // COLLECT_SET -> not directly supported
312            "COLLECT_SET" => Ok(Expression::Function(Box::new(f))),
313
314            // RLIKE -> REGEXP_MATCHES in Redshift (or SIMILAR TO)
315            "RLIKE" if f.args.len() == 2 => {
316                // Redshift uses ~ for regex matching
317                let mut args = f.args;
318                let string = args.remove(0);
319                let pattern = args.remove(0);
320                Ok(Expression::RegexpLike(Box::new(RegexpFunc {
321                    this: string,
322                    pattern,
323                    flags: None,
324                })))
325            }
326
327            // REGEXP -> RegexpLike
328            "REGEXP" if f.args.len() == 2 => {
329                let mut args = f.args;
330                let string = args.remove(0);
331                let pattern = args.remove(0);
332                Ok(Expression::RegexpLike(Box::new(RegexpFunc {
333                    this: string,
334                    pattern,
335                    flags: None,
336                })))
337            }
338
339            // REGEXP_LIKE -> native in Redshift (PostgreSQL-compatible)
340            "REGEXP_LIKE" => Ok(Expression::Function(Box::new(f))),
341
342            // ADD_MONTHS -> DATEADD in Redshift
343            "ADD_MONTHS" if f.args.len() == 2 => {
344                let mut args = f.args;
345                let date = args.remove(0);
346                let months = args.remove(0);
347                // DATEADD(month, num_months, date)
348                Ok(Expression::Function(Box::new(Function::new(
349                    "DATEADD".to_string(),
350                    vec![Expression::identifier("month"), months, date],
351                ))))
352            }
353
354            // DATEDIFF is native in Redshift
355            "DATEDIFF" => Ok(Expression::Function(Box::new(f))),
356
357            // DATE_DIFF -> DATEDIFF in Redshift
358            "DATE_DIFF" => Ok(Expression::Function(Box::new(Function::new(
359                "DATEDIFF".to_string(),
360                f.args,
361            )))),
362
363            // DATEADD is native in Redshift
364            "DATEADD" => Ok(Expression::Function(Box::new(f))),
365
366            // DATE_ADD -> DATEADD in Redshift
367            "DATE_ADD" => Ok(Expression::Function(Box::new(Function::new(
368                "DATEADD".to_string(),
369                f.args,
370            )))),
371
372            // SPLIT_TO_ARRAY is native in Redshift
373            "SPLIT_TO_ARRAY" => Ok(Expression::Function(Box::new(f))),
374
375            // STRING_TO_ARRAY -> SPLIT_TO_ARRAY in Redshift
376            "STRING_TO_ARRAY" if f.args.len() >= 1 => Ok(Expression::Function(Box::new(
377                Function::new("SPLIT_TO_ARRAY".to_string(), f.args),
378            ))),
379
380            // SPLIT -> SPLIT_TO_ARRAY in Redshift
381            "SPLIT" if f.args.len() >= 1 => Ok(Expression::Function(Box::new(Function::new(
382                "SPLIT_TO_ARRAY".to_string(),
383                f.args,
384            )))),
385
386            // STRTOL is native in Redshift (string to long/base conversion)
387            "STRTOL" => Ok(Expression::Function(Box::new(f))),
388
389            // FROM_BASE -> STRTOL in Redshift
390            "FROM_BASE" if f.args.len() == 2 => Ok(Expression::Function(Box::new(Function::new(
391                "STRTOL".to_string(),
392                f.args,
393            )))),
394
395            // CONVERT_TIMEZONE(target_tz, timestamp) -> CONVERT_TIMEZONE('UTC', target_tz, timestamp)
396            "CONVERT_TIMEZONE" if f.args.len() == 2 => {
397                let mut new_args = vec![Expression::string("UTC")];
398                new_args.extend(f.args);
399                Ok(Expression::Function(Box::new(Function::new(
400                    "CONVERT_TIMEZONE".to_string(),
401                    new_args,
402                ))))
403            }
404            // 3-arg form stays as-is
405            "CONVERT_TIMEZONE" => Ok(Expression::Function(Box::new(f))),
406
407            // CONVERT(type, expr) -> CAST(expr AS type)
408            "CONVERT" if f.args.len() == 2 => {
409                let type_expr = &f.args[0];
410                let value_expr = f.args[1].clone();
411
412                // Extract type name from the first argument (it's likely a Column or Identifier)
413                let type_name = match type_expr {
414                    Expression::Column(c) => c.name.name.clone(),
415                    Expression::Identifier(i) => i.name.clone(),
416                    _ => return Ok(Expression::Function(Box::new(f))), // Can't handle, pass through
417                };
418
419                // Map type name to DataType
420                let data_type = match type_name.to_uppercase().as_str() {
421                    "INT" | "INTEGER" => DataType::Int { length: None, integer_spelling: false },
422                    "BIGINT" => DataType::BigInt { length: None },
423                    "SMALLINT" => DataType::SmallInt { length: None },
424                    "TINYINT" => DataType::TinyInt { length: None },
425                    "VARCHAR" => DataType::VarChar { length: None, parenthesized_length: false },
426                    "CHAR" => DataType::Char { length: None },
427                    "FLOAT" | "REAL" => DataType::Float { precision: None, scale: None, real_spelling: false },
428                    "DOUBLE" => DataType::Double { precision: None, scale: None },
429                    "BOOLEAN" | "BOOL" => DataType::Boolean,
430                    "DATE" => DataType::Date,
431                    "TIMESTAMP" => DataType::Timestamp { precision: None, timezone: false },
432                    "TEXT" => DataType::Text,
433                    "DECIMAL" | "NUMERIC" => DataType::Decimal { precision: None, scale: None },
434                    _ => return Ok(Expression::Function(Box::new(f))), // Unknown type, pass through
435                };
436
437                Ok(Expression::Cast(Box::new(Cast {
438                    this: value_expr,
439                    to: data_type,
440                    trailing_comments: Vec::new(),
441                    double_colon_syntax: false,
442                    format: None,
443                    default: None,
444                })))
445            }
446
447            // Pass through everything else
448            _ => Ok(Expression::Function(Box::new(f))),
449        }
450    }
451
452    fn transform_aggregate_function(
453        &self,
454        f: Box<crate::expressions::AggregateFunction>,
455    ) -> Result<Expression> {
456        let name_upper = f.name.to_uppercase();
457        match name_upper.as_str() {
458            // COUNT_IF -> SUM(CASE WHEN...)
459            "COUNT_IF" if !f.args.is_empty() => {
460                let condition = f.args.into_iter().next().unwrap();
461                let case_expr = Expression::Case(Box::new(Case {
462                    operand: None,
463                    whens: vec![(condition, Expression::number(1))],
464                    else_: Some(Expression::number(0)),
465                }));
466                Ok(Expression::Sum(Box::new(AggFunc { ignore_nulls: None, having_max: None,
467                    this: case_expr,
468                    distinct: f.distinct,
469                    filter: f.filter,
470                    order_by: Vec::new(),
471                name: None,
472                limit: None,
473                })))
474            }
475
476            // ANY_VALUE is native in Redshift
477            "ANY_VALUE" => Ok(Expression::AggregateFunction(f)),
478
479            // GROUP_CONCAT -> LISTAGG in Redshift
480            "GROUP_CONCAT" if !f.args.is_empty() => Ok(Expression::Function(Box::new(
481                Function::new("LISTAGG".to_string(), f.args),
482            ))),
483
484            // STRING_AGG -> LISTAGG in Redshift
485            "STRING_AGG" if !f.args.is_empty() => Ok(Expression::Function(Box::new(
486                Function::new("LISTAGG".to_string(), f.args),
487            ))),
488
489            // LISTAGG is native in Redshift
490            "LISTAGG" => Ok(Expression::AggregateFunction(f)),
491
492            // STDDEV is native in Redshift
493            "STDDEV" => Ok(Expression::AggregateFunction(f)),
494
495            // VARIANCE is native in Redshift
496            "VARIANCE" => Ok(Expression::AggregateFunction(f)),
497
498            // MEDIAN is native in Redshift
499            "MEDIAN" => Ok(Expression::AggregateFunction(f)),
500
501            // Pass through everything else
502            _ => Ok(Expression::AggregateFunction(f)),
503        }
504    }
505
506    fn transform_cast(&self, c: Cast) -> Result<Expression> {
507        // Redshift type mappings are handled in the generator
508        Ok(Expression::Cast(Box::new(c)))
509    }
510}