Skip to main content

polyglot_sql/dialects/
redshift.rs

1//! Redshift Dialect
2//!
3//! Redshift-specific transformations based on sqlglot patterns.
4//! Redshift is based on PostgreSQL but has some differences.
5
6use super::{DialectImpl, DialectType};
7use crate::error::Result;
8use crate::expressions::{
9    AggFunc, Case, Cast, DataType, Expression, Function, Limit, RegexpFunc, VarArgFunc,
10};
11use crate::generator::GeneratorConfig;
12use crate::tokens::TokenizerConfig;
13
14/// Redshift dialect
15pub struct RedshiftDialect;
16
17impl DialectImpl for RedshiftDialect {
18    fn dialect_type(&self) -> DialectType {
19        DialectType::Redshift
20    }
21
22    fn tokenizer_config(&self) -> TokenizerConfig {
23        use crate::tokens::TokenType;
24        let mut config = TokenizerConfig::default();
25        // Redshift uses double quotes for identifiers (like PostgreSQL)
26        config.identifiers.insert('"', '"');
27        // Redshift does NOT support nested comments
28        config.nested_comments = false;
29        // MINUS is an alias for EXCEPT in Redshift
30        config
31            .keywords
32            .insert("MINUS".to_string(), TokenType::Except);
33        config
34    }
35
36    fn generator_config(&self) -> GeneratorConfig {
37        use crate::generator::IdentifierQuoteStyle;
38        GeneratorConfig {
39            identifier_quote: '"',
40            identifier_quote_style: IdentifierQuoteStyle::DOUBLE_QUOTE,
41            dialect: Some(DialectType::Redshift),
42            supports_column_join_marks: true,
43            locking_reads_supported: false,
44            tz_to_with_time_zone: true,
45            ..Default::default()
46        }
47    }
48
49    fn transform_expr(&self, expr: Expression) -> Result<Expression> {
50        match expr {
51            // IFNULL -> COALESCE in Redshift
52            Expression::IfNull(f) => Ok(Expression::Coalesce(Box::new(VarArgFunc {
53                original_name: None,
54                expressions: vec![f.this, f.expression],
55            }))),
56
57            // NVL is native in Redshift, but we standardize to COALESCE for consistency
58            Expression::Nvl(f) => Ok(Expression::Coalesce(Box::new(VarArgFunc {
59                original_name: None,
60                expressions: vec![f.this, f.expression],
61            }))),
62
63            // TryCast -> TRY_CAST (Redshift supports TRY_CAST via CONVERT)
64            Expression::TryCast(c) => Ok(Expression::TryCast(c)),
65
66            // SafeCast -> TRY_CAST in Redshift
67            Expression::SafeCast(c) => Ok(Expression::TryCast(c)),
68
69            // ILIKE is native in Redshift (inherited from PostgreSQL)
70            Expression::ILike(op) => Ok(Expression::ILike(op)),
71
72            // CountIf -> SUM(CASE WHEN condition THEN 1 ELSE 0 END)
73            Expression::CountIf(f) => {
74                let case_expr = Expression::Case(Box::new(Case {
75                    operand: None,
76                    whens: vec![(f.this.clone(), Expression::number(1))],
77                    else_: Some(Expression::number(0)),
78                    comments: Vec::new(),
79                }));
80                Ok(Expression::Sum(Box::new(AggFunc {
81                    ignore_nulls: None,
82                    having_max: None,
83                    this: case_expr,
84                    distinct: f.distinct,
85                    filter: f.filter,
86                    order_by: Vec::new(),
87                    name: None,
88                    limit: None,
89                })))
90            }
91
92            // EXPLODE is not supported in Redshift
93            Expression::Explode(_) => Ok(expr),
94
95            // ExplodeOuter is not supported in Redshift
96            Expression::ExplodeOuter(_) => Ok(expr),
97
98            // UNNEST is supported in Redshift (but limited)
99            Expression::Unnest(_) => Ok(expr),
100
101            // RAND -> RANDOM in Redshift (like PostgreSQL)
102            Expression::Rand(r) => {
103                let _ = r.seed;
104                Ok(Expression::Random(crate::expressions::Random))
105            }
106
107            // DateAdd -> DATEADD(unit, count, date) in Redshift
108            Expression::DateAdd(f) => {
109                let unit_str = match f.unit {
110                    crate::expressions::IntervalUnit::Year => "YEAR",
111                    crate::expressions::IntervalUnit::Quarter => "QUARTER",
112                    crate::expressions::IntervalUnit::Month => "MONTH",
113                    crate::expressions::IntervalUnit::Week => "WEEK",
114                    crate::expressions::IntervalUnit::Day => "DAY",
115                    crate::expressions::IntervalUnit::Hour => "HOUR",
116                    crate::expressions::IntervalUnit::Minute => "MINUTE",
117                    crate::expressions::IntervalUnit::Second => "SECOND",
118                    crate::expressions::IntervalUnit::Millisecond => "MILLISECOND",
119                    crate::expressions::IntervalUnit::Microsecond => "MICROSECOND",
120                    crate::expressions::IntervalUnit::Nanosecond => "NANOSECOND",
121                };
122                let unit = Expression::Identifier(crate::expressions::Identifier {
123                    name: unit_str.to_string(),
124                    quoted: false,
125                    trailing_comments: Vec::new(),
126                    span: None,
127                });
128                Ok(Expression::Function(Box::new(Function::new(
129                    "DATEADD".to_string(),
130                    vec![unit, f.interval, f.this],
131                ))))
132            }
133
134            // Generic function transformations
135            Expression::Function(f) => self.transform_function(*f),
136
137            // Generic aggregate function transformations
138            Expression::AggregateFunction(f) => self.transform_aggregate_function(f),
139
140            // Cast transformations
141            Expression::Cast(c) => self.transform_cast(*c),
142
143            // CONVERT -> CAST in Redshift
144            Expression::Convert(c) => Ok(Expression::Cast(Box::new(Cast {
145                this: c.this,
146                to: c.to,
147                trailing_comments: Vec::new(),
148                double_colon_syntax: false,
149                format: None,
150                default: None,
151            }))),
152
153            // SELECT TOP n -> SELECT ... LIMIT n in Redshift (PostgreSQL-style)
154            Expression::Select(mut select) => {
155                if let Some(top) = select.top.take() {
156                    // Only convert simple TOP (not TOP PERCENT or WITH TIES)
157                    if !top.percent && !top.with_ties {
158                        // Convert TOP to LIMIT
159                        select.limit = Some(Limit {
160                            this: top.this,
161                            percent: false,
162                            comments: Vec::new(),
163                        });
164                    } else {
165                        // Restore TOP if it has PERCENT or WITH TIES (not supported as LIMIT)
166                        select.top = Some(top);
167                    }
168                }
169                Ok(Expression::Select(select))
170            }
171
172            // Pass through everything else
173            _ => Ok(expr),
174        }
175    }
176}
177
178impl RedshiftDialect {
179    fn transform_function(&self, f: Function) -> Result<Expression> {
180        let name_upper = f.name.to_uppercase();
181        match name_upper.as_str() {
182            // IFNULL -> COALESCE
183            "IFNULL" if f.args.len() == 2 => Ok(Expression::Coalesce(Box::new(VarArgFunc {
184                original_name: None,
185                expressions: f.args,
186            }))),
187
188            // NVL -> COALESCE (supports 2+ args)
189            "NVL" if f.args.len() >= 2 => Ok(Expression::Coalesce(Box::new(VarArgFunc {
190                original_name: None,
191                expressions: f.args,
192            }))),
193
194            // ISNULL -> COALESCE
195            "ISNULL" if f.args.len() == 2 => Ok(Expression::Coalesce(Box::new(VarArgFunc {
196                original_name: None,
197                expressions: f.args,
198            }))),
199
200            // GETDATE is native to Redshift
201            "GETDATE" => Ok(Expression::Function(Box::new(Function::new(
202                "GETDATE".to_string(),
203                vec![],
204            )))),
205
206            // NOW -> GETDATE in Redshift
207            "NOW" => Ok(Expression::Function(Box::new(Function::new(
208                "GETDATE".to_string(),
209                vec![],
210            )))),
211
212            // RAND -> RANDOM in Redshift
213            "RAND" => Ok(Expression::Random(crate::expressions::Random)),
214
215            // GROUP_CONCAT -> LISTAGG in Redshift
216            "GROUP_CONCAT" if !f.args.is_empty() => Ok(Expression::Function(Box::new(
217                Function::new("LISTAGG".to_string(), f.args),
218            ))),
219
220            // STRING_AGG -> LISTAGG in Redshift
221            "STRING_AGG" if !f.args.is_empty() => Ok(Expression::Function(Box::new(
222                Function::new("LISTAGG".to_string(), f.args),
223            ))),
224
225            // LISTAGG is native in Redshift
226            "LISTAGG" => Ok(Expression::Function(Box::new(f))),
227
228            // SUBSTR -> SUBSTRING
229            "SUBSTR" => Ok(Expression::Function(Box::new(Function::new(
230                "SUBSTRING".to_string(),
231                f.args,
232            )))),
233
234            // LEN is native in Redshift
235            "LEN" => Ok(Expression::Function(Box::new(f))),
236
237            // LENGTH -> LEN in Redshift
238            "LENGTH" if f.args.len() == 1 => Ok(Expression::Function(Box::new(Function::new(
239                "LEN".to_string(),
240                f.args,
241            )))),
242
243            // CHARINDEX is native in Redshift
244            "CHARINDEX" => Ok(Expression::Function(Box::new(f))),
245
246            // POSITION -> CHARINDEX in Redshift (with swapped args)
247            "POSITION" if f.args.len() == 2 => {
248                let mut args = f.args;
249                let substring = args.remove(0);
250                let string = args.remove(0);
251                // CHARINDEX(substring, string)
252                Ok(Expression::Function(Box::new(Function::new(
253                    "CHARINDEX".to_string(),
254                    vec![substring, string],
255                ))))
256            }
257
258            // STRPOS -> CHARINDEX in Redshift
259            "STRPOS" if f.args.len() == 2 => {
260                let args = f.args;
261                // STRPOS(string, substring) -> CHARINDEX(substring, string)
262                let string = args[0].clone();
263                let substring = args[1].clone();
264                Ok(Expression::Function(Box::new(Function::new(
265                    "CHARINDEX".to_string(),
266                    vec![substring, string],
267                ))))
268            }
269
270            // INSTR -> CHARINDEX in Redshift
271            "INSTR" if f.args.len() >= 2 => {
272                let mut args = f.args;
273                let string = args.remove(0);
274                let substring = args.remove(0);
275                // INSTR(string, substring) -> CHARINDEX(substring, string)
276                Ok(Expression::Function(Box::new(Function::new(
277                    "CHARINDEX".to_string(),
278                    vec![substring, string],
279                ))))
280            }
281
282            // LOCATE -> CHARINDEX in Redshift
283            "LOCATE" if f.args.len() >= 2 => {
284                let mut args = f.args;
285                let substring = args.remove(0);
286                let string = args.remove(0);
287                // LOCATE(substring, string) -> CHARINDEX(substring, string)
288                Ok(Expression::Function(Box::new(Function::new(
289                    "CHARINDEX".to_string(),
290                    vec![substring, string],
291                ))))
292            }
293
294            // ARRAY_LENGTH -> ARRAY_UPPER / custom in Redshift
295            // Redshift doesn't have ARRAY_LENGTH, arrays are limited
296            "ARRAY_LENGTH" => Ok(Expression::Function(Box::new(f))),
297
298            // SIZE -> not directly supported
299            "SIZE" => Ok(Expression::Function(Box::new(f))),
300
301            // TO_DATE -> TO_DATE (native in Redshift)
302            "TO_DATE" => Ok(Expression::Function(Box::new(f))),
303
304            // TO_TIMESTAMP -> TO_TIMESTAMP (native in Redshift)
305            "TO_TIMESTAMP" => Ok(Expression::Function(Box::new(f))),
306
307            // DATE_FORMAT -> TO_CHAR in Redshift
308            "DATE_FORMAT" if f.args.len() >= 2 => Ok(Expression::Function(Box::new(
309                Function::new("TO_CHAR".to_string(), f.args),
310            ))),
311
312            // strftime -> TO_CHAR in Redshift
313            "STRFTIME" if f.args.len() >= 2 => {
314                let mut args = f.args;
315                let format = args.remove(0);
316                let date = args.remove(0);
317                Ok(Expression::Function(Box::new(Function::new(
318                    "TO_CHAR".to_string(),
319                    vec![date, format],
320                ))))
321            }
322
323            // TO_CHAR is native in Redshift
324            "TO_CHAR" => Ok(Expression::Function(Box::new(f))),
325
326            // LEVENSHTEIN -> not directly supported
327            "LEVENSHTEIN" => Ok(Expression::Function(Box::new(f))),
328
329            // JSON_EXTRACT -> JSON_EXTRACT_PATH_TEXT in Redshift
330            "JSON_EXTRACT" if f.args.len() >= 2 => Ok(Expression::Function(Box::new(
331                Function::new("JSON_EXTRACT_PATH_TEXT".to_string(), f.args),
332            ))),
333
334            // JSON_EXTRACT_SCALAR -> JSON_EXTRACT_PATH_TEXT in Redshift
335            "JSON_EXTRACT_SCALAR" if f.args.len() >= 2 => Ok(Expression::Function(Box::new(
336                Function::new("JSON_EXTRACT_PATH_TEXT".to_string(), f.args),
337            ))),
338
339            // GET_JSON_OBJECT -> JSON_EXTRACT_PATH_TEXT in Redshift
340            "GET_JSON_OBJECT" if f.args.len() == 2 => Ok(Expression::Function(Box::new(
341                Function::new("JSON_EXTRACT_PATH_TEXT".to_string(), f.args),
342            ))),
343
344            // COLLECT_LIST -> not directly supported (limited array support)
345            "COLLECT_LIST" => Ok(Expression::Function(Box::new(f))),
346
347            // COLLECT_SET -> not directly supported
348            "COLLECT_SET" => Ok(Expression::Function(Box::new(f))),
349
350            // RLIKE -> REGEXP_MATCHES in Redshift (or SIMILAR TO)
351            "RLIKE" if f.args.len() == 2 => {
352                // Redshift uses ~ for regex matching
353                let mut args = f.args;
354                let string = args.remove(0);
355                let pattern = args.remove(0);
356                Ok(Expression::RegexpLike(Box::new(RegexpFunc {
357                    this: string,
358                    pattern,
359                    flags: None,
360                })))
361            }
362
363            // REGEXP -> RegexpLike
364            "REGEXP" if f.args.len() == 2 => {
365                let mut args = f.args;
366                let string = args.remove(0);
367                let pattern = args.remove(0);
368                Ok(Expression::RegexpLike(Box::new(RegexpFunc {
369                    this: string,
370                    pattern,
371                    flags: None,
372                })))
373            }
374
375            // REGEXP_LIKE -> native in Redshift (PostgreSQL-compatible)
376            "REGEXP_LIKE" => Ok(Expression::Function(Box::new(f))),
377
378            // ADD_MONTHS -> DATEADD in Redshift
379            "ADD_MONTHS" if f.args.len() == 2 => {
380                let mut args = f.args;
381                let date = args.remove(0);
382                let months = args.remove(0);
383                // DATEADD(month, num_months, date)
384                Ok(Expression::Function(Box::new(Function::new(
385                    "DATEADD".to_string(),
386                    vec![Expression::identifier("month"), months, date],
387                ))))
388            }
389
390            // DATEDIFF is native in Redshift
391            "DATEDIFF" => Ok(Expression::Function(Box::new(f))),
392
393            // DATE_DIFF -> DATEDIFF in Redshift
394            "DATE_DIFF" => Ok(Expression::Function(Box::new(Function::new(
395                "DATEDIFF".to_string(),
396                f.args,
397            )))),
398
399            // DATEADD is native in Redshift
400            "DATEADD" => Ok(Expression::Function(Box::new(f))),
401
402            // DATE_ADD -> DATEADD in Redshift
403            "DATE_ADD" => Ok(Expression::Function(Box::new(Function::new(
404                "DATEADD".to_string(),
405                f.args,
406            )))),
407
408            // SPLIT_TO_ARRAY is native in Redshift
409            "SPLIT_TO_ARRAY" => Ok(Expression::Function(Box::new(f))),
410
411            // STRING_TO_ARRAY -> SPLIT_TO_ARRAY in Redshift
412            "STRING_TO_ARRAY" if f.args.len() >= 1 => Ok(Expression::Function(Box::new(
413                Function::new("SPLIT_TO_ARRAY".to_string(), f.args),
414            ))),
415
416            // SPLIT -> SPLIT_TO_ARRAY in Redshift
417            "SPLIT" if f.args.len() >= 1 => Ok(Expression::Function(Box::new(Function::new(
418                "SPLIT_TO_ARRAY".to_string(),
419                f.args,
420            )))),
421
422            // STRTOL is native in Redshift (string to long/base conversion)
423            "STRTOL" => Ok(Expression::Function(Box::new(f))),
424
425            // FROM_BASE -> STRTOL in Redshift
426            "FROM_BASE" if f.args.len() == 2 => Ok(Expression::Function(Box::new(Function::new(
427                "STRTOL".to_string(),
428                f.args,
429            )))),
430
431            // CONVERT_TIMEZONE(target_tz, timestamp) -> CONVERT_TIMEZONE('UTC', target_tz, timestamp)
432            "CONVERT_TIMEZONE" if f.args.len() == 2 => {
433                let mut new_args = vec![Expression::string("UTC")];
434                new_args.extend(f.args);
435                Ok(Expression::Function(Box::new(Function::new(
436                    "CONVERT_TIMEZONE".to_string(),
437                    new_args,
438                ))))
439            }
440            // 3-arg form stays as-is
441            "CONVERT_TIMEZONE" => Ok(Expression::Function(Box::new(f))),
442
443            // CONVERT(type, expr) -> CAST(expr AS type)
444            "CONVERT" if f.args.len() == 2 => {
445                let type_expr = &f.args[0];
446                let value_expr = f.args[1].clone();
447
448                // Extract type name from the first argument (it's likely a Column or Identifier)
449                let type_name = match type_expr {
450                    Expression::Column(c) => c.name.name.clone(),
451                    Expression::Identifier(i) => i.name.clone(),
452                    _ => return Ok(Expression::Function(Box::new(f))), // Can't handle, pass through
453                };
454
455                // Map type name to DataType
456                let data_type = match type_name.to_uppercase().as_str() {
457                    "INT" | "INTEGER" => DataType::Int {
458                        length: None,
459                        integer_spelling: false,
460                    },
461                    "BIGINT" => DataType::BigInt { length: None },
462                    "SMALLINT" => DataType::SmallInt { length: None },
463                    "TINYINT" => DataType::TinyInt { length: None },
464                    "VARCHAR" => DataType::VarChar {
465                        length: None,
466                        parenthesized_length: false,
467                    },
468                    "CHAR" => DataType::Char { length: None },
469                    "FLOAT" | "REAL" => DataType::Float {
470                        precision: None,
471                        scale: None,
472                        real_spelling: false,
473                    },
474                    "DOUBLE" => DataType::Double {
475                        precision: None,
476                        scale: None,
477                    },
478                    "BOOLEAN" | "BOOL" => DataType::Boolean,
479                    "DATE" => DataType::Date,
480                    "TIMESTAMP" => DataType::Timestamp {
481                        precision: None,
482                        timezone: false,
483                    },
484                    "TEXT" => DataType::Text,
485                    "DECIMAL" | "NUMERIC" => DataType::Decimal {
486                        precision: None,
487                        scale: None,
488                    },
489                    _ => return Ok(Expression::Function(Box::new(f))), // Unknown type, pass through
490                };
491
492                Ok(Expression::Cast(Box::new(Cast {
493                    this: value_expr,
494                    to: data_type,
495                    trailing_comments: Vec::new(),
496                    double_colon_syntax: false,
497                    format: None,
498                    default: None,
499                })))
500            }
501
502            // Pass through everything else
503            _ => Ok(Expression::Function(Box::new(f))),
504        }
505    }
506
507    fn transform_aggregate_function(
508        &self,
509        f: Box<crate::expressions::AggregateFunction>,
510    ) -> Result<Expression> {
511        let name_upper = f.name.to_uppercase();
512        match name_upper.as_str() {
513            // COUNT_IF -> SUM(CASE WHEN...)
514            "COUNT_IF" if !f.args.is_empty() => {
515                let condition = f.args.into_iter().next().unwrap();
516                let case_expr = Expression::Case(Box::new(Case {
517                    operand: None,
518                    whens: vec![(condition, Expression::number(1))],
519                    else_: Some(Expression::number(0)),
520                    comments: Vec::new(),
521                }));
522                Ok(Expression::Sum(Box::new(AggFunc {
523                    ignore_nulls: None,
524                    having_max: None,
525                    this: case_expr,
526                    distinct: f.distinct,
527                    filter: f.filter,
528                    order_by: Vec::new(),
529                    name: None,
530                    limit: None,
531                })))
532            }
533
534            // ANY_VALUE is native in Redshift
535            "ANY_VALUE" => Ok(Expression::AggregateFunction(f)),
536
537            // GROUP_CONCAT -> LISTAGG in Redshift
538            "GROUP_CONCAT" if !f.args.is_empty() => Ok(Expression::Function(Box::new(
539                Function::new("LISTAGG".to_string(), f.args),
540            ))),
541
542            // STRING_AGG -> LISTAGG in Redshift
543            "STRING_AGG" if !f.args.is_empty() => Ok(Expression::Function(Box::new(
544                Function::new("LISTAGG".to_string(), f.args),
545            ))),
546
547            // LISTAGG is native in Redshift
548            "LISTAGG" => Ok(Expression::AggregateFunction(f)),
549
550            // STDDEV is native in Redshift
551            "STDDEV" => Ok(Expression::AggregateFunction(f)),
552
553            // VARIANCE is native in Redshift
554            "VARIANCE" => Ok(Expression::AggregateFunction(f)),
555
556            // MEDIAN is native in Redshift
557            "MEDIAN" => Ok(Expression::AggregateFunction(f)),
558
559            // Pass through everything else
560            _ => Ok(Expression::AggregateFunction(f)),
561        }
562    }
563
564    fn transform_cast(&self, c: Cast) -> Result<Expression> {
565        // Redshift type mappings are handled in the generator
566        Ok(Expression::Cast(Box::new(c)))
567    }
568}