Skip to main content

polyglot_sql/dialects/
redshift.rs

1//! Redshift Dialect
2//!
3//! Redshift-specific transformations based on sqlglot patterns.
4//! Redshift is based on PostgreSQL but has some differences.
5
6use super::{DialectImpl, DialectType};
7use crate::error::Result;
8use crate::expressions::{
9    AggFunc, Case, Cast, DataType, Expression, Function, Limit, RegexpFunc, VarArgFunc,
10};
11use crate::generator::GeneratorConfig;
12use crate::tokens::TokenizerConfig;
13
14/// Redshift dialect
15pub struct RedshiftDialect;
16
17impl DialectImpl for RedshiftDialect {
18    fn dialect_type(&self) -> DialectType {
19        DialectType::Redshift
20    }
21
22    fn tokenizer_config(&self) -> TokenizerConfig {
23        use crate::tokens::TokenType;
24        let mut config = TokenizerConfig::default();
25        // Redshift uses double quotes for identifiers (like PostgreSQL)
26        config.identifiers.insert('"', '"');
27        // Redshift does NOT support nested comments
28        config.nested_comments = false;
29        // MINUS is an alias for EXCEPT in Redshift
30        config
31            .keywords
32            .insert("MINUS".to_string(), TokenType::Except);
33        config
34    }
35
36    fn generator_config(&self) -> GeneratorConfig {
37        use crate::generator::IdentifierQuoteStyle;
38        GeneratorConfig {
39            identifier_quote: '"',
40            identifier_quote_style: IdentifierQuoteStyle::DOUBLE_QUOTE,
41            dialect: Some(DialectType::Redshift),
42            supports_column_join_marks: true,
43            locking_reads_supported: false,
44            tz_to_with_time_zone: true,
45            ..Default::default()
46        }
47    }
48
49    fn transform_expr(&self, expr: Expression) -> Result<Expression> {
50        match expr {
51            // IFNULL -> COALESCE in Redshift
52            Expression::IfNull(f) => Ok(Expression::Coalesce(Box::new(VarArgFunc {
53                original_name: None,
54                expressions: vec![f.this, f.expression],
55            }))),
56
57            // NVL is native in Redshift, but we standardize to COALESCE for consistency
58            Expression::Nvl(f) => Ok(Expression::Coalesce(Box::new(VarArgFunc {
59                original_name: None,
60                expressions: vec![f.this, f.expression],
61            }))),
62
63            // TryCast -> TRY_CAST (Redshift supports TRY_CAST via CONVERT)
64            Expression::TryCast(c) => Ok(Expression::TryCast(c)),
65
66            // SafeCast -> TRY_CAST in Redshift
67            Expression::SafeCast(c) => Ok(Expression::TryCast(c)),
68
69            // ILIKE is native in Redshift (inherited from PostgreSQL)
70            Expression::ILike(op) => Ok(Expression::ILike(op)),
71
72            // CountIf -> SUM(CASE WHEN condition THEN 1 ELSE 0 END)
73            Expression::CountIf(f) => {
74                let case_expr = Expression::Case(Box::new(Case {
75                    operand: None,
76                    whens: vec![(f.this.clone(), Expression::number(1))],
77                    else_: Some(Expression::number(0)),
78                    comments: Vec::new(),
79                }));
80                Ok(Expression::Sum(Box::new(AggFunc {
81                    ignore_nulls: None,
82                    having_max: None,
83                    this: case_expr,
84                    distinct: f.distinct,
85                    filter: f.filter,
86                    order_by: Vec::new(),
87                    name: None,
88                    limit: None,
89                })))
90            }
91
92            // EXPLODE is not supported in Redshift
93            Expression::Explode(_) => Ok(expr),
94
95            // ExplodeOuter is not supported in Redshift
96            Expression::ExplodeOuter(_) => Ok(expr),
97
98            // UNNEST is supported in Redshift (but limited)
99            Expression::Unnest(_) => Ok(expr),
100
101            // RAND -> RANDOM in Redshift (like PostgreSQL)
102            Expression::Rand(r) => {
103                let _ = r.seed;
104                Ok(Expression::Random(crate::expressions::Random))
105            }
106
107            // DateAdd -> DATEADD(unit, count, date) in Redshift
108            Expression::DateAdd(f) => {
109                let unit_str = match f.unit {
110                    crate::expressions::IntervalUnit::Year => "YEAR",
111                    crate::expressions::IntervalUnit::Quarter => "QUARTER",
112                    crate::expressions::IntervalUnit::Month => "MONTH",
113                    crate::expressions::IntervalUnit::Week => "WEEK",
114                    crate::expressions::IntervalUnit::Day => "DAY",
115                    crate::expressions::IntervalUnit::Hour => "HOUR",
116                    crate::expressions::IntervalUnit::Minute => "MINUTE",
117                    crate::expressions::IntervalUnit::Second => "SECOND",
118                    crate::expressions::IntervalUnit::Millisecond => "MILLISECOND",
119                    crate::expressions::IntervalUnit::Microsecond => "MICROSECOND",
120                    crate::expressions::IntervalUnit::Nanosecond => "NANOSECOND",
121                };
122                let unit = Expression::Identifier(crate::expressions::Identifier {
123                    name: unit_str.to_string(),
124                    quoted: false,
125                    trailing_comments: Vec::new(),
126                });
127                Ok(Expression::Function(Box::new(Function::new(
128                    "DATEADD".to_string(),
129                    vec![unit, f.interval, f.this],
130                ))))
131            }
132
133            // Generic function transformations
134            Expression::Function(f) => self.transform_function(*f),
135
136            // Generic aggregate function transformations
137            Expression::AggregateFunction(f) => self.transform_aggregate_function(f),
138
139            // Cast transformations
140            Expression::Cast(c) => self.transform_cast(*c),
141
142            // CONVERT -> CAST in Redshift
143            Expression::Convert(c) => Ok(Expression::Cast(Box::new(Cast {
144                this: c.this,
145                to: c.to,
146                trailing_comments: Vec::new(),
147                double_colon_syntax: false,
148                format: None,
149                default: None,
150            }))),
151
152            // SELECT TOP n -> SELECT ... LIMIT n in Redshift (PostgreSQL-style)
153            Expression::Select(mut select) => {
154                if let Some(top) = select.top.take() {
155                    // Only convert simple TOP (not TOP PERCENT or WITH TIES)
156                    if !top.percent && !top.with_ties {
157                        // Convert TOP to LIMIT
158                        select.limit = Some(Limit {
159                            this: top.this,
160                            percent: false,
161                            comments: Vec::new(),
162                        });
163                    } else {
164                        // Restore TOP if it has PERCENT or WITH TIES (not supported as LIMIT)
165                        select.top = Some(top);
166                    }
167                }
168                Ok(Expression::Select(select))
169            }
170
171            // Pass through everything else
172            _ => Ok(expr),
173        }
174    }
175}
176
177impl RedshiftDialect {
178    fn transform_function(&self, f: Function) -> Result<Expression> {
179        let name_upper = f.name.to_uppercase();
180        match name_upper.as_str() {
181            // IFNULL -> COALESCE
182            "IFNULL" if f.args.len() == 2 => Ok(Expression::Coalesce(Box::new(VarArgFunc {
183                original_name: None,
184                expressions: f.args,
185            }))),
186
187            // NVL -> COALESCE (supports 2+ args)
188            "NVL" if f.args.len() >= 2 => Ok(Expression::Coalesce(Box::new(VarArgFunc {
189                original_name: None,
190                expressions: f.args,
191            }))),
192
193            // ISNULL -> COALESCE
194            "ISNULL" if f.args.len() == 2 => Ok(Expression::Coalesce(Box::new(VarArgFunc {
195                original_name: None,
196                expressions: f.args,
197            }))),
198
199            // GETDATE is native to Redshift
200            "GETDATE" => Ok(Expression::Function(Box::new(Function::new(
201                "GETDATE".to_string(),
202                vec![],
203            )))),
204
205            // NOW -> GETDATE in Redshift
206            "NOW" => Ok(Expression::Function(Box::new(Function::new(
207                "GETDATE".to_string(),
208                vec![],
209            )))),
210
211            // RAND -> RANDOM in Redshift
212            "RAND" => Ok(Expression::Random(crate::expressions::Random)),
213
214            // GROUP_CONCAT -> LISTAGG in Redshift
215            "GROUP_CONCAT" if !f.args.is_empty() => Ok(Expression::Function(Box::new(
216                Function::new("LISTAGG".to_string(), f.args),
217            ))),
218
219            // STRING_AGG -> LISTAGG in Redshift
220            "STRING_AGG" if !f.args.is_empty() => Ok(Expression::Function(Box::new(
221                Function::new("LISTAGG".to_string(), f.args),
222            ))),
223
224            // LISTAGG is native in Redshift
225            "LISTAGG" => Ok(Expression::Function(Box::new(f))),
226
227            // SUBSTR -> SUBSTRING
228            "SUBSTR" => Ok(Expression::Function(Box::new(Function::new(
229                "SUBSTRING".to_string(),
230                f.args,
231            )))),
232
233            // LEN is native in Redshift
234            "LEN" => Ok(Expression::Function(Box::new(f))),
235
236            // LENGTH -> LEN in Redshift
237            "LENGTH" if f.args.len() == 1 => Ok(Expression::Function(Box::new(Function::new(
238                "LEN".to_string(),
239                f.args,
240            )))),
241
242            // CHARINDEX is native in Redshift
243            "CHARINDEX" => Ok(Expression::Function(Box::new(f))),
244
245            // POSITION -> CHARINDEX in Redshift (with swapped args)
246            "POSITION" if f.args.len() == 2 => {
247                let mut args = f.args;
248                let substring = args.remove(0);
249                let string = args.remove(0);
250                // CHARINDEX(substring, string)
251                Ok(Expression::Function(Box::new(Function::new(
252                    "CHARINDEX".to_string(),
253                    vec![substring, string],
254                ))))
255            }
256
257            // STRPOS -> CHARINDEX in Redshift
258            "STRPOS" if f.args.len() == 2 => {
259                let args = f.args;
260                // STRPOS(string, substring) -> CHARINDEX(substring, string)
261                let string = args[0].clone();
262                let substring = args[1].clone();
263                Ok(Expression::Function(Box::new(Function::new(
264                    "CHARINDEX".to_string(),
265                    vec![substring, string],
266                ))))
267            }
268
269            // INSTR -> CHARINDEX in Redshift
270            "INSTR" if f.args.len() >= 2 => {
271                let mut args = f.args;
272                let string = args.remove(0);
273                let substring = args.remove(0);
274                // INSTR(string, substring) -> CHARINDEX(substring, string)
275                Ok(Expression::Function(Box::new(Function::new(
276                    "CHARINDEX".to_string(),
277                    vec![substring, string],
278                ))))
279            }
280
281            // LOCATE -> CHARINDEX in Redshift
282            "LOCATE" if f.args.len() >= 2 => {
283                let mut args = f.args;
284                let substring = args.remove(0);
285                let string = args.remove(0);
286                // LOCATE(substring, string) -> CHARINDEX(substring, string)
287                Ok(Expression::Function(Box::new(Function::new(
288                    "CHARINDEX".to_string(),
289                    vec![substring, string],
290                ))))
291            }
292
293            // ARRAY_LENGTH -> ARRAY_UPPER / custom in Redshift
294            // Redshift doesn't have ARRAY_LENGTH, arrays are limited
295            "ARRAY_LENGTH" => Ok(Expression::Function(Box::new(f))),
296
297            // SIZE -> not directly supported
298            "SIZE" => Ok(Expression::Function(Box::new(f))),
299
300            // TO_DATE -> TO_DATE (native in Redshift)
301            "TO_DATE" => Ok(Expression::Function(Box::new(f))),
302
303            // TO_TIMESTAMP -> TO_TIMESTAMP (native in Redshift)
304            "TO_TIMESTAMP" => Ok(Expression::Function(Box::new(f))),
305
306            // DATE_FORMAT -> TO_CHAR in Redshift
307            "DATE_FORMAT" if f.args.len() >= 2 => Ok(Expression::Function(Box::new(
308                Function::new("TO_CHAR".to_string(), f.args),
309            ))),
310
311            // strftime -> TO_CHAR in Redshift
312            "STRFTIME" if f.args.len() >= 2 => {
313                let mut args = f.args;
314                let format = args.remove(0);
315                let date = args.remove(0);
316                Ok(Expression::Function(Box::new(Function::new(
317                    "TO_CHAR".to_string(),
318                    vec![date, format],
319                ))))
320            }
321
322            // TO_CHAR is native in Redshift
323            "TO_CHAR" => Ok(Expression::Function(Box::new(f))),
324
325            // LEVENSHTEIN -> not directly supported
326            "LEVENSHTEIN" => Ok(Expression::Function(Box::new(f))),
327
328            // JSON_EXTRACT -> JSON_EXTRACT_PATH_TEXT in Redshift
329            "JSON_EXTRACT" if f.args.len() >= 2 => Ok(Expression::Function(Box::new(
330                Function::new("JSON_EXTRACT_PATH_TEXT".to_string(), f.args),
331            ))),
332
333            // JSON_EXTRACT_SCALAR -> JSON_EXTRACT_PATH_TEXT in Redshift
334            "JSON_EXTRACT_SCALAR" if f.args.len() >= 2 => Ok(Expression::Function(Box::new(
335                Function::new("JSON_EXTRACT_PATH_TEXT".to_string(), f.args),
336            ))),
337
338            // GET_JSON_OBJECT -> JSON_EXTRACT_PATH_TEXT in Redshift
339            "GET_JSON_OBJECT" if f.args.len() == 2 => Ok(Expression::Function(Box::new(
340                Function::new("JSON_EXTRACT_PATH_TEXT".to_string(), f.args),
341            ))),
342
343            // COLLECT_LIST -> not directly supported (limited array support)
344            "COLLECT_LIST" => Ok(Expression::Function(Box::new(f))),
345
346            // COLLECT_SET -> not directly supported
347            "COLLECT_SET" => Ok(Expression::Function(Box::new(f))),
348
349            // RLIKE -> REGEXP_MATCHES in Redshift (or SIMILAR TO)
350            "RLIKE" if f.args.len() == 2 => {
351                // Redshift uses ~ for regex matching
352                let mut args = f.args;
353                let string = args.remove(0);
354                let pattern = args.remove(0);
355                Ok(Expression::RegexpLike(Box::new(RegexpFunc {
356                    this: string,
357                    pattern,
358                    flags: None,
359                })))
360            }
361
362            // REGEXP -> RegexpLike
363            "REGEXP" if f.args.len() == 2 => {
364                let mut args = f.args;
365                let string = args.remove(0);
366                let pattern = args.remove(0);
367                Ok(Expression::RegexpLike(Box::new(RegexpFunc {
368                    this: string,
369                    pattern,
370                    flags: None,
371                })))
372            }
373
374            // REGEXP_LIKE -> native in Redshift (PostgreSQL-compatible)
375            "REGEXP_LIKE" => Ok(Expression::Function(Box::new(f))),
376
377            // ADD_MONTHS -> DATEADD in Redshift
378            "ADD_MONTHS" if f.args.len() == 2 => {
379                let mut args = f.args;
380                let date = args.remove(0);
381                let months = args.remove(0);
382                // DATEADD(month, num_months, date)
383                Ok(Expression::Function(Box::new(Function::new(
384                    "DATEADD".to_string(),
385                    vec![Expression::identifier("month"), months, date],
386                ))))
387            }
388
389            // DATEDIFF is native in Redshift
390            "DATEDIFF" => Ok(Expression::Function(Box::new(f))),
391
392            // DATE_DIFF -> DATEDIFF in Redshift
393            "DATE_DIFF" => Ok(Expression::Function(Box::new(Function::new(
394                "DATEDIFF".to_string(),
395                f.args,
396            )))),
397
398            // DATEADD is native in Redshift
399            "DATEADD" => Ok(Expression::Function(Box::new(f))),
400
401            // DATE_ADD -> DATEADD in Redshift
402            "DATE_ADD" => Ok(Expression::Function(Box::new(Function::new(
403                "DATEADD".to_string(),
404                f.args,
405            )))),
406
407            // SPLIT_TO_ARRAY is native in Redshift
408            "SPLIT_TO_ARRAY" => Ok(Expression::Function(Box::new(f))),
409
410            // STRING_TO_ARRAY -> SPLIT_TO_ARRAY in Redshift
411            "STRING_TO_ARRAY" if f.args.len() >= 1 => Ok(Expression::Function(Box::new(
412                Function::new("SPLIT_TO_ARRAY".to_string(), f.args),
413            ))),
414
415            // SPLIT -> SPLIT_TO_ARRAY in Redshift
416            "SPLIT" if f.args.len() >= 1 => Ok(Expression::Function(Box::new(Function::new(
417                "SPLIT_TO_ARRAY".to_string(),
418                f.args,
419            )))),
420
421            // STRTOL is native in Redshift (string to long/base conversion)
422            "STRTOL" => Ok(Expression::Function(Box::new(f))),
423
424            // FROM_BASE -> STRTOL in Redshift
425            "FROM_BASE" if f.args.len() == 2 => Ok(Expression::Function(Box::new(Function::new(
426                "STRTOL".to_string(),
427                f.args,
428            )))),
429
430            // CONVERT_TIMEZONE(target_tz, timestamp) -> CONVERT_TIMEZONE('UTC', target_tz, timestamp)
431            "CONVERT_TIMEZONE" if f.args.len() == 2 => {
432                let mut new_args = vec![Expression::string("UTC")];
433                new_args.extend(f.args);
434                Ok(Expression::Function(Box::new(Function::new(
435                    "CONVERT_TIMEZONE".to_string(),
436                    new_args,
437                ))))
438            }
439            // 3-arg form stays as-is
440            "CONVERT_TIMEZONE" => Ok(Expression::Function(Box::new(f))),
441
442            // CONVERT(type, expr) -> CAST(expr AS type)
443            "CONVERT" if f.args.len() == 2 => {
444                let type_expr = &f.args[0];
445                let value_expr = f.args[1].clone();
446
447                // Extract type name from the first argument (it's likely a Column or Identifier)
448                let type_name = match type_expr {
449                    Expression::Column(c) => c.name.name.clone(),
450                    Expression::Identifier(i) => i.name.clone(),
451                    _ => return Ok(Expression::Function(Box::new(f))), // Can't handle, pass through
452                };
453
454                // Map type name to DataType
455                let data_type = match type_name.to_uppercase().as_str() {
456                    "INT" | "INTEGER" => DataType::Int {
457                        length: None,
458                        integer_spelling: false,
459                    },
460                    "BIGINT" => DataType::BigInt { length: None },
461                    "SMALLINT" => DataType::SmallInt { length: None },
462                    "TINYINT" => DataType::TinyInt { length: None },
463                    "VARCHAR" => DataType::VarChar {
464                        length: None,
465                        parenthesized_length: false,
466                    },
467                    "CHAR" => DataType::Char { length: None },
468                    "FLOAT" | "REAL" => DataType::Float {
469                        precision: None,
470                        scale: None,
471                        real_spelling: false,
472                    },
473                    "DOUBLE" => DataType::Double {
474                        precision: None,
475                        scale: None,
476                    },
477                    "BOOLEAN" | "BOOL" => DataType::Boolean,
478                    "DATE" => DataType::Date,
479                    "TIMESTAMP" => DataType::Timestamp {
480                        precision: None,
481                        timezone: false,
482                    },
483                    "TEXT" => DataType::Text,
484                    "DECIMAL" | "NUMERIC" => DataType::Decimal {
485                        precision: None,
486                        scale: None,
487                    },
488                    _ => return Ok(Expression::Function(Box::new(f))), // Unknown type, pass through
489                };
490
491                Ok(Expression::Cast(Box::new(Cast {
492                    this: value_expr,
493                    to: data_type,
494                    trailing_comments: Vec::new(),
495                    double_colon_syntax: false,
496                    format: None,
497                    default: None,
498                })))
499            }
500
501            // Pass through everything else
502            _ => Ok(Expression::Function(Box::new(f))),
503        }
504    }
505
506    fn transform_aggregate_function(
507        &self,
508        f: Box<crate::expressions::AggregateFunction>,
509    ) -> Result<Expression> {
510        let name_upper = f.name.to_uppercase();
511        match name_upper.as_str() {
512            // COUNT_IF -> SUM(CASE WHEN...)
513            "COUNT_IF" if !f.args.is_empty() => {
514                let condition = f.args.into_iter().next().unwrap();
515                let case_expr = Expression::Case(Box::new(Case {
516                    operand: None,
517                    whens: vec![(condition, Expression::number(1))],
518                    else_: Some(Expression::number(0)),
519                    comments: Vec::new(),
520                }));
521                Ok(Expression::Sum(Box::new(AggFunc {
522                    ignore_nulls: None,
523                    having_max: None,
524                    this: case_expr,
525                    distinct: f.distinct,
526                    filter: f.filter,
527                    order_by: Vec::new(),
528                    name: None,
529                    limit: None,
530                })))
531            }
532
533            // ANY_VALUE is native in Redshift
534            "ANY_VALUE" => Ok(Expression::AggregateFunction(f)),
535
536            // GROUP_CONCAT -> LISTAGG in Redshift
537            "GROUP_CONCAT" if !f.args.is_empty() => Ok(Expression::Function(Box::new(
538                Function::new("LISTAGG".to_string(), f.args),
539            ))),
540
541            // STRING_AGG -> LISTAGG in Redshift
542            "STRING_AGG" if !f.args.is_empty() => Ok(Expression::Function(Box::new(
543                Function::new("LISTAGG".to_string(), f.args),
544            ))),
545
546            // LISTAGG is native in Redshift
547            "LISTAGG" => Ok(Expression::AggregateFunction(f)),
548
549            // STDDEV is native in Redshift
550            "STDDEV" => Ok(Expression::AggregateFunction(f)),
551
552            // VARIANCE is native in Redshift
553            "VARIANCE" => Ok(Expression::AggregateFunction(f)),
554
555            // MEDIAN is native in Redshift
556            "MEDIAN" => Ok(Expression::AggregateFunction(f)),
557
558            // Pass through everything else
559            _ => Ok(Expression::AggregateFunction(f)),
560        }
561    }
562
563    fn transform_cast(&self, c: Cast) -> Result<Expression> {
564        // Redshift type mappings are handled in the generator
565        Ok(Expression::Cast(Box::new(c)))
566    }
567}