Skip to main content

polyglot_sql/dialects/
redshift.rs

1//! Redshift Dialect
2//!
3//! Redshift-specific transformations based on sqlglot patterns.
4//! Redshift is based on PostgreSQL but has some differences.
5
6use super::{DialectImpl, DialectType};
7use crate::error::Result;
8use crate::expressions::{
9    AggFunc, Case, Cast, DataType, Expression, Function, Limit, RegexpFunc, VarArgFunc,
10};
11use crate::generator::GeneratorConfig;
12use crate::tokens::TokenizerConfig;
13
14/// Redshift dialect
15pub struct RedshiftDialect;
16
17impl DialectImpl for RedshiftDialect {
18    fn dialect_type(&self) -> DialectType {
19        DialectType::Redshift
20    }
21
22    fn tokenizer_config(&self) -> TokenizerConfig {
23        use crate::tokens::TokenType;
24        let mut config = TokenizerConfig::default();
25        // Redshift uses double quotes for identifiers (like PostgreSQL)
26        config.identifiers.insert('"', '"');
27        // Redshift does NOT support nested comments
28        config.nested_comments = false;
29        // MINUS is an alias for EXCEPT in Redshift
30        config
31            .keywords
32            .insert("MINUS".to_string(), TokenType::Except);
33        config
34    }
35
36    fn generator_config(&self) -> GeneratorConfig {
37        use crate::generator::IdentifierQuoteStyle;
38        GeneratorConfig {
39            identifier_quote: '"',
40            identifier_quote_style: IdentifierQuoteStyle::DOUBLE_QUOTE,
41            dialect: Some(DialectType::Redshift),
42            supports_column_join_marks: true,
43            locking_reads_supported: false,
44            tz_to_with_time_zone: true,
45            ..Default::default()
46        }
47    }
48
49    fn transform_expr(&self, expr: Expression) -> Result<Expression> {
50        match expr {
51            // IFNULL -> COALESCE in Redshift
52            Expression::IfNull(f) => Ok(Expression::Coalesce(Box::new(VarArgFunc {
53                original_name: None,
54                expressions: vec![f.this, f.expression],
55                inferred_type: None,
56            }))),
57
58            // NVL is native in Redshift, but we standardize to COALESCE for consistency
59            Expression::Nvl(f) => Ok(Expression::Coalesce(Box::new(VarArgFunc {
60                original_name: None,
61                expressions: vec![f.this, f.expression],
62                inferred_type: None,
63            }))),
64
65            // TryCast -> TRY_CAST (Redshift supports TRY_CAST via CONVERT)
66            Expression::TryCast(c) => Ok(Expression::TryCast(c)),
67
68            // SafeCast -> TRY_CAST in Redshift
69            Expression::SafeCast(c) => Ok(Expression::TryCast(c)),
70
71            // ILIKE is native in Redshift (inherited from PostgreSQL)
72            Expression::ILike(op) => Ok(Expression::ILike(op)),
73
74            // CountIf -> SUM(CASE WHEN condition THEN 1 ELSE 0 END)
75            Expression::CountIf(f) => {
76                let case_expr = Expression::Case(Box::new(Case {
77                    operand: None,
78                    whens: vec![(f.this.clone(), Expression::number(1))],
79                    else_: Some(Expression::number(0)),
80                    comments: Vec::new(),
81                    inferred_type: None,
82                }));
83                Ok(Expression::Sum(Box::new(AggFunc {
84                    ignore_nulls: None,
85                    having_max: None,
86                    this: case_expr,
87                    distinct: f.distinct,
88                    filter: f.filter,
89                    order_by: Vec::new(),
90                    name: None,
91                    limit: None,
92                    inferred_type: None,
93                })))
94            }
95
96            // EXPLODE is not supported in Redshift
97            Expression::Explode(_) => Ok(expr),
98
99            // ExplodeOuter is not supported in Redshift
100            Expression::ExplodeOuter(_) => Ok(expr),
101
102            // UNNEST is supported in Redshift (but limited)
103            Expression::Unnest(_) => Ok(expr),
104
105            // RAND -> RANDOM in Redshift (like PostgreSQL)
106            Expression::Rand(r) => {
107                let _ = r.seed;
108                Ok(Expression::Random(crate::expressions::Random))
109            }
110
111            // DateAdd -> DATEADD(unit, count, date) in Redshift
112            Expression::DateAdd(f) => {
113                let unit_str = match f.unit {
114                    crate::expressions::IntervalUnit::Year => "YEAR",
115                    crate::expressions::IntervalUnit::Quarter => "QUARTER",
116                    crate::expressions::IntervalUnit::Month => "MONTH",
117                    crate::expressions::IntervalUnit::Week => "WEEK",
118                    crate::expressions::IntervalUnit::Day => "DAY",
119                    crate::expressions::IntervalUnit::Hour => "HOUR",
120                    crate::expressions::IntervalUnit::Minute => "MINUTE",
121                    crate::expressions::IntervalUnit::Second => "SECOND",
122                    crate::expressions::IntervalUnit::Millisecond => "MILLISECOND",
123                    crate::expressions::IntervalUnit::Microsecond => "MICROSECOND",
124                    crate::expressions::IntervalUnit::Nanosecond => "NANOSECOND",
125                };
126                let unit = Expression::Identifier(crate::expressions::Identifier {
127                    name: unit_str.to_string(),
128                    quoted: false,
129                    trailing_comments: Vec::new(),
130                    span: None,
131                });
132                Ok(Expression::Function(Box::new(Function::new(
133                    "DATEADD".to_string(),
134                    vec![unit, f.interval, f.this],
135                ))))
136            }
137
138            // Generic function transformations
139            Expression::Function(f) => self.transform_function(*f),
140
141            // Generic aggregate function transformations
142            Expression::AggregateFunction(f) => self.transform_aggregate_function(f),
143
144            // Cast transformations
145            Expression::Cast(c) => self.transform_cast(*c),
146
147            // CONVERT -> CAST in Redshift
148            Expression::Convert(c) => Ok(Expression::Cast(Box::new(Cast {
149                this: c.this,
150                to: c.to,
151                trailing_comments: Vec::new(),
152                double_colon_syntax: false,
153                format: None,
154                default: None,
155                inferred_type: None,
156            }))),
157
158            // SELECT TOP n -> SELECT ... LIMIT n in Redshift (PostgreSQL-style)
159            Expression::Select(mut select) => {
160                if let Some(top) = select.top.take() {
161                    // Only convert simple TOP (not TOP PERCENT or WITH TIES)
162                    if !top.percent && !top.with_ties {
163                        // Convert TOP to LIMIT
164                        select.limit = Some(Limit {
165                            this: top.this,
166                            percent: false,
167                            comments: Vec::new(),
168                        });
169                    } else {
170                        // Restore TOP if it has PERCENT or WITH TIES (not supported as LIMIT)
171                        select.top = Some(top);
172                    }
173                }
174                Ok(Expression::Select(select))
175            }
176
177            // Pass through everything else
178            _ => Ok(expr),
179        }
180    }
181}
182
183impl RedshiftDialect {
184    fn transform_function(&self, f: Function) -> Result<Expression> {
185        let name_upper = f.name.to_uppercase();
186        match name_upper.as_str() {
187            // IFNULL -> COALESCE
188            "IFNULL" if f.args.len() == 2 => Ok(Expression::Coalesce(Box::new(VarArgFunc {
189                original_name: None,
190                expressions: f.args,
191                inferred_type: None,
192            }))),
193
194            // NVL -> COALESCE (supports 2+ args)
195            "NVL" if f.args.len() >= 2 => Ok(Expression::Coalesce(Box::new(VarArgFunc {
196                original_name: None,
197                expressions: f.args,
198                inferred_type: None,
199            }))),
200
201            // ISNULL -> COALESCE
202            "ISNULL" if f.args.len() == 2 => Ok(Expression::Coalesce(Box::new(VarArgFunc {
203                original_name: None,
204                expressions: f.args,
205                inferred_type: None,
206            }))),
207
208            // GETDATE is native to Redshift
209            "GETDATE" => Ok(Expression::Function(Box::new(Function::new(
210                "GETDATE".to_string(),
211                vec![],
212            )))),
213
214            // NOW -> GETDATE in Redshift
215            "NOW" => Ok(Expression::Function(Box::new(Function::new(
216                "GETDATE".to_string(),
217                vec![],
218            )))),
219
220            // RAND -> RANDOM in Redshift
221            "RAND" => Ok(Expression::Random(crate::expressions::Random)),
222
223            // GROUP_CONCAT -> LISTAGG in Redshift
224            "GROUP_CONCAT" if !f.args.is_empty() => Ok(Expression::Function(Box::new(
225                Function::new("LISTAGG".to_string(), f.args),
226            ))),
227
228            // STRING_AGG -> LISTAGG in Redshift
229            "STRING_AGG" if !f.args.is_empty() => Ok(Expression::Function(Box::new(
230                Function::new("LISTAGG".to_string(), f.args),
231            ))),
232
233            // LISTAGG is native in Redshift
234            "LISTAGG" => Ok(Expression::Function(Box::new(f))),
235
236            // SUBSTR -> SUBSTRING
237            "SUBSTR" => Ok(Expression::Function(Box::new(Function::new(
238                "SUBSTRING".to_string(),
239                f.args,
240            )))),
241
242            // LEN is native in Redshift
243            "LEN" => Ok(Expression::Function(Box::new(f))),
244
245            // LENGTH -> LEN in Redshift
246            "LENGTH" if f.args.len() == 1 => Ok(Expression::Function(Box::new(Function::new(
247                "LEN".to_string(),
248                f.args,
249            )))),
250
251            // CHARINDEX is native in Redshift
252            "CHARINDEX" => Ok(Expression::Function(Box::new(f))),
253
254            // POSITION -> CHARINDEX in Redshift (with swapped args)
255            "POSITION" if f.args.len() == 2 => {
256                let mut args = f.args;
257                let substring = args.remove(0);
258                let string = args.remove(0);
259                // CHARINDEX(substring, string)
260                Ok(Expression::Function(Box::new(Function::new(
261                    "CHARINDEX".to_string(),
262                    vec![substring, string],
263                ))))
264            }
265
266            // STRPOS -> CHARINDEX in Redshift
267            "STRPOS" if f.args.len() == 2 => {
268                let args = f.args;
269                // STRPOS(string, substring) -> CHARINDEX(substring, string)
270                let string = args[0].clone();
271                let substring = args[1].clone();
272                Ok(Expression::Function(Box::new(Function::new(
273                    "CHARINDEX".to_string(),
274                    vec![substring, string],
275                ))))
276            }
277
278            // INSTR -> CHARINDEX in Redshift
279            "INSTR" if f.args.len() >= 2 => {
280                let mut args = f.args;
281                let string = args.remove(0);
282                let substring = args.remove(0);
283                // INSTR(string, substring) -> CHARINDEX(substring, string)
284                Ok(Expression::Function(Box::new(Function::new(
285                    "CHARINDEX".to_string(),
286                    vec![substring, string],
287                ))))
288            }
289
290            // LOCATE -> CHARINDEX in Redshift
291            "LOCATE" if f.args.len() >= 2 => {
292                let mut args = f.args;
293                let substring = args.remove(0);
294                let string = args.remove(0);
295                // LOCATE(substring, string) -> CHARINDEX(substring, string)
296                Ok(Expression::Function(Box::new(Function::new(
297                    "CHARINDEX".to_string(),
298                    vec![substring, string],
299                ))))
300            }
301
302            // ARRAY_LENGTH -> ARRAY_UPPER / custom in Redshift
303            // Redshift doesn't have ARRAY_LENGTH, arrays are limited
304            "ARRAY_LENGTH" => Ok(Expression::Function(Box::new(f))),
305
306            // SIZE -> not directly supported
307            "SIZE" => Ok(Expression::Function(Box::new(f))),
308
309            // TO_DATE -> TO_DATE (native in Redshift)
310            "TO_DATE" => Ok(Expression::Function(Box::new(f))),
311
312            // TO_TIMESTAMP -> TO_TIMESTAMP (native in Redshift)
313            "TO_TIMESTAMP" => Ok(Expression::Function(Box::new(f))),
314
315            // DATE_FORMAT -> TO_CHAR in Redshift
316            "DATE_FORMAT" if f.args.len() >= 2 => Ok(Expression::Function(Box::new(
317                Function::new("TO_CHAR".to_string(), f.args),
318            ))),
319
320            // strftime -> TO_CHAR in Redshift
321            "STRFTIME" if f.args.len() >= 2 => {
322                let mut args = f.args;
323                let format = args.remove(0);
324                let date = args.remove(0);
325                Ok(Expression::Function(Box::new(Function::new(
326                    "TO_CHAR".to_string(),
327                    vec![date, format],
328                ))))
329            }
330
331            // TO_CHAR is native in Redshift
332            "TO_CHAR" => Ok(Expression::Function(Box::new(f))),
333
334            // LEVENSHTEIN -> not directly supported
335            "LEVENSHTEIN" => Ok(Expression::Function(Box::new(f))),
336
337            // JSON_EXTRACT -> JSON_EXTRACT_PATH_TEXT in Redshift
338            "JSON_EXTRACT" if f.args.len() >= 2 => Ok(Expression::Function(Box::new(
339                Function::new("JSON_EXTRACT_PATH_TEXT".to_string(), f.args),
340            ))),
341
342            // JSON_EXTRACT_SCALAR -> JSON_EXTRACT_PATH_TEXT in Redshift
343            "JSON_EXTRACT_SCALAR" if f.args.len() >= 2 => Ok(Expression::Function(Box::new(
344                Function::new("JSON_EXTRACT_PATH_TEXT".to_string(), f.args),
345            ))),
346
347            // GET_JSON_OBJECT -> JSON_EXTRACT_PATH_TEXT in Redshift
348            "GET_JSON_OBJECT" if f.args.len() == 2 => Ok(Expression::Function(Box::new(
349                Function::new("JSON_EXTRACT_PATH_TEXT".to_string(), f.args),
350            ))),
351
352            // COLLECT_LIST -> not directly supported (limited array support)
353            "COLLECT_LIST" => Ok(Expression::Function(Box::new(f))),
354
355            // COLLECT_SET -> not directly supported
356            "COLLECT_SET" => Ok(Expression::Function(Box::new(f))),
357
358            // RLIKE -> REGEXP_MATCHES in Redshift (or SIMILAR TO)
359            "RLIKE" if f.args.len() == 2 => {
360                // Redshift uses ~ for regex matching
361                let mut args = f.args;
362                let string = args.remove(0);
363                let pattern = args.remove(0);
364                Ok(Expression::RegexpLike(Box::new(RegexpFunc {
365                    this: string,
366                    pattern,
367                    flags: None,
368                })))
369            }
370
371            // REGEXP -> RegexpLike
372            "REGEXP" if f.args.len() == 2 => {
373                let mut args = f.args;
374                let string = args.remove(0);
375                let pattern = args.remove(0);
376                Ok(Expression::RegexpLike(Box::new(RegexpFunc {
377                    this: string,
378                    pattern,
379                    flags: None,
380                })))
381            }
382
383            // REGEXP_LIKE -> native in Redshift (PostgreSQL-compatible)
384            "REGEXP_LIKE" => Ok(Expression::Function(Box::new(f))),
385
386            // ADD_MONTHS -> DATEADD in Redshift
387            "ADD_MONTHS" if f.args.len() == 2 => {
388                let mut args = f.args;
389                let date = args.remove(0);
390                let months = args.remove(0);
391                // DATEADD(month, num_months, date)
392                Ok(Expression::Function(Box::new(Function::new(
393                    "DATEADD".to_string(),
394                    vec![Expression::identifier("month"), months, date],
395                ))))
396            }
397
398            // DATEDIFF is native in Redshift
399            "DATEDIFF" => Ok(Expression::Function(Box::new(f))),
400
401            // DATE_DIFF -> DATEDIFF in Redshift
402            "DATE_DIFF" => Ok(Expression::Function(Box::new(Function::new(
403                "DATEDIFF".to_string(),
404                f.args,
405            )))),
406
407            // DATEADD is native in Redshift
408            "DATEADD" => Ok(Expression::Function(Box::new(f))),
409
410            // DATE_ADD -> DATEADD in Redshift
411            "DATE_ADD" => Ok(Expression::Function(Box::new(Function::new(
412                "DATEADD".to_string(),
413                f.args,
414            )))),
415
416            // SPLIT_TO_ARRAY is native in Redshift
417            "SPLIT_TO_ARRAY" => Ok(Expression::Function(Box::new(f))),
418
419            // STRING_TO_ARRAY -> SPLIT_TO_ARRAY in Redshift
420            "STRING_TO_ARRAY" if f.args.len() >= 1 => Ok(Expression::Function(Box::new(
421                Function::new("SPLIT_TO_ARRAY".to_string(), f.args),
422            ))),
423
424            // SPLIT -> SPLIT_TO_ARRAY in Redshift
425            "SPLIT" if f.args.len() >= 1 => Ok(Expression::Function(Box::new(Function::new(
426                "SPLIT_TO_ARRAY".to_string(),
427                f.args,
428            )))),
429
430            // STRTOL is native in Redshift (string to long/base conversion)
431            "STRTOL" => Ok(Expression::Function(Box::new(f))),
432
433            // FROM_BASE -> STRTOL in Redshift
434            "FROM_BASE" if f.args.len() == 2 => Ok(Expression::Function(Box::new(Function::new(
435                "STRTOL".to_string(),
436                f.args,
437            )))),
438
439            // CONVERT_TIMEZONE(target_tz, timestamp) -> CONVERT_TIMEZONE('UTC', target_tz, timestamp)
440            "CONVERT_TIMEZONE" if f.args.len() == 2 => {
441                let mut new_args = vec![Expression::string("UTC")];
442                new_args.extend(f.args);
443                Ok(Expression::Function(Box::new(Function::new(
444                    "CONVERT_TIMEZONE".to_string(),
445                    new_args,
446                ))))
447            }
448            // 3-arg form stays as-is
449            "CONVERT_TIMEZONE" => Ok(Expression::Function(Box::new(f))),
450
451            // CONVERT(type, expr) -> CAST(expr AS type)
452            "CONVERT" if f.args.len() == 2 => {
453                let type_expr = &f.args[0];
454                let value_expr = f.args[1].clone();
455
456                // Extract type name from the first argument (it's likely a Column or Identifier)
457                let type_name = match type_expr {
458                    Expression::Column(c) => c.name.name.clone(),
459                    Expression::Identifier(i) => i.name.clone(),
460                    _ => return Ok(Expression::Function(Box::new(f))), // Can't handle, pass through
461                };
462
463                // Map type name to DataType
464                let data_type = match type_name.to_uppercase().as_str() {
465                    "INT" | "INTEGER" => DataType::Int {
466                        length: None,
467                        integer_spelling: false,
468                    },
469                    "BIGINT" => DataType::BigInt { length: None },
470                    "SMALLINT" => DataType::SmallInt { length: None },
471                    "TINYINT" => DataType::TinyInt { length: None },
472                    "VARCHAR" => DataType::VarChar {
473                        length: None,
474                        parenthesized_length: false,
475                    },
476                    "CHAR" => DataType::Char { length: None },
477                    "FLOAT" | "REAL" => DataType::Float {
478                        precision: None,
479                        scale: None,
480                        real_spelling: false,
481                    },
482                    "DOUBLE" => DataType::Double {
483                        precision: None,
484                        scale: None,
485                    },
486                    "BOOLEAN" | "BOOL" => DataType::Boolean,
487                    "DATE" => DataType::Date,
488                    "TIMESTAMP" => DataType::Timestamp {
489                        precision: None,
490                        timezone: false,
491                    },
492                    "TEXT" => DataType::Text,
493                    "DECIMAL" | "NUMERIC" => DataType::Decimal {
494                        precision: None,
495                        scale: None,
496                    },
497                    _ => return Ok(Expression::Function(Box::new(f))), // Unknown type, pass through
498                };
499
500                Ok(Expression::Cast(Box::new(Cast {
501                    this: value_expr,
502                    to: data_type,
503                    trailing_comments: Vec::new(),
504                    double_colon_syntax: false,
505                    format: None,
506                    default: None,
507                    inferred_type: None,
508                })))
509            }
510
511            // Pass through everything else
512            _ => Ok(Expression::Function(Box::new(f))),
513        }
514    }
515
516    fn transform_aggregate_function(
517        &self,
518        f: Box<crate::expressions::AggregateFunction>,
519    ) -> Result<Expression> {
520        let name_upper = f.name.to_uppercase();
521        match name_upper.as_str() {
522            // COUNT_IF -> SUM(CASE WHEN...)
523            "COUNT_IF" if !f.args.is_empty() => {
524                let condition = f.args.into_iter().next().unwrap();
525                let case_expr = Expression::Case(Box::new(Case {
526                    operand: None,
527                    whens: vec![(condition, Expression::number(1))],
528                    else_: Some(Expression::number(0)),
529                    comments: Vec::new(),
530                    inferred_type: None,
531                }));
532                Ok(Expression::Sum(Box::new(AggFunc {
533                    ignore_nulls: None,
534                    having_max: None,
535                    this: case_expr,
536                    distinct: f.distinct,
537                    filter: f.filter,
538                    order_by: Vec::new(),
539                    name: None,
540                    limit: None,
541                    inferred_type: None,
542                })))
543            }
544
545            // ANY_VALUE is native in Redshift
546            "ANY_VALUE" => Ok(Expression::AggregateFunction(f)),
547
548            // GROUP_CONCAT -> LISTAGG in Redshift
549            "GROUP_CONCAT" if !f.args.is_empty() => Ok(Expression::Function(Box::new(
550                Function::new("LISTAGG".to_string(), f.args),
551            ))),
552
553            // STRING_AGG -> LISTAGG in Redshift
554            "STRING_AGG" if !f.args.is_empty() => Ok(Expression::Function(Box::new(
555                Function::new("LISTAGG".to_string(), f.args),
556            ))),
557
558            // LISTAGG is native in Redshift
559            "LISTAGG" => Ok(Expression::AggregateFunction(f)),
560
561            // STDDEV is native in Redshift
562            "STDDEV" => Ok(Expression::AggregateFunction(f)),
563
564            // VARIANCE is native in Redshift
565            "VARIANCE" => Ok(Expression::AggregateFunction(f)),
566
567            // MEDIAN is native in Redshift
568            "MEDIAN" => Ok(Expression::AggregateFunction(f)),
569
570            // Pass through everything else
571            _ => Ok(Expression::AggregateFunction(f)),
572        }
573    }
574
575    fn transform_cast(&self, c: Cast) -> Result<Expression> {
576        // Redshift type mappings are handled in the generator
577        Ok(Expression::Cast(Box::new(c)))
578    }
579}