Skip to main content

polyglot_sql/dialects/
redshift.rs

1//! Redshift Dialect
2//!
3//! Redshift-specific transformations based on sqlglot patterns.
4//! Redshift is based on PostgreSQL but has some differences.
5
6use super::{DialectImpl, DialectType};
7use crate::error::Result;
8use crate::expressions::{
9    AggFunc, Case, Cast, DataType, Expression, Function, Limit, RegexpFunc, VarArgFunc,
10};
11use crate::generator::GeneratorConfig;
12use crate::tokens::TokenizerConfig;
13
14/// Redshift dialect
15pub struct RedshiftDialect;
16
17impl DialectImpl for RedshiftDialect {
18    fn dialect_type(&self) -> DialectType {
19        DialectType::Redshift
20    }
21
22    fn tokenizer_config(&self) -> TokenizerConfig {
23        use crate::tokens::TokenType;
24        let mut config = TokenizerConfig::default();
25        // Redshift uses double quotes for identifiers (like PostgreSQL)
26        config.identifiers.insert('"', '"');
27        // Redshift does NOT support nested comments
28        config.nested_comments = false;
29        // MINUS is an alias for EXCEPT in Redshift
30        config
31            .keywords
32            .insert("MINUS".to_string(), TokenType::Except);
33        config
34    }
35
36    fn generator_config(&self) -> GeneratorConfig {
37        use crate::generator::IdentifierQuoteStyle;
38        GeneratorConfig {
39            identifier_quote: '"',
40            identifier_quote_style: IdentifierQuoteStyle::DOUBLE_QUOTE,
41            dialect: Some(DialectType::Redshift),
42            supports_column_join_marks: true,
43            locking_reads_supported: false,
44            tz_to_with_time_zone: true,
45            single_string_interval: true,
46            ..Default::default()
47        }
48    }
49
50    fn transform_expr(&self, expr: Expression) -> Result<Expression> {
51        match expr {
52            // IFNULL -> COALESCE in Redshift
53            Expression::IfNull(f) => Ok(Expression::Coalesce(Box::new(VarArgFunc {
54                original_name: None,
55                expressions: vec![f.this, f.expression],
56                inferred_type: None,
57            }))),
58
59            // NVL is native in Redshift, but we standardize to COALESCE for consistency
60            Expression::Nvl(f) => Ok(Expression::Coalesce(Box::new(VarArgFunc {
61                original_name: None,
62                expressions: vec![f.this, f.expression],
63                inferred_type: None,
64            }))),
65
66            // TryCast -> TRY_CAST (Redshift supports TRY_CAST via CONVERT)
67            Expression::TryCast(c) => Ok(Expression::TryCast(c)),
68
69            // SafeCast -> TRY_CAST in Redshift
70            Expression::SafeCast(c) => Ok(Expression::TryCast(c)),
71
72            // ILIKE is native in Redshift (inherited from PostgreSQL)
73            Expression::ILike(op) => Ok(Expression::ILike(op)),
74
75            // CountIf -> SUM(CASE WHEN condition THEN 1 ELSE 0 END)
76            Expression::CountIf(f) => {
77                let case_expr = Expression::Case(Box::new(Case {
78                    operand: None,
79                    whens: vec![(f.this.clone(), Expression::number(1))],
80                    else_: Some(Expression::number(0)),
81                    comments: Vec::new(),
82                    inferred_type: None,
83                }));
84                Ok(Expression::Sum(Box::new(AggFunc {
85                    ignore_nulls: None,
86                    having_max: None,
87                    this: case_expr,
88                    distinct: f.distinct,
89                    filter: f.filter,
90                    order_by: Vec::new(),
91                    name: None,
92                    limit: None,
93                    inferred_type: None,
94                })))
95            }
96
97            // EXPLODE is not supported in Redshift
98            Expression::Explode(_) => Ok(expr),
99
100            // ExplodeOuter is not supported in Redshift
101            Expression::ExplodeOuter(_) => Ok(expr),
102
103            // UNNEST is supported in Redshift (but limited)
104            Expression::Unnest(_) => Ok(expr),
105
106            // RAND -> RANDOM in Redshift (like PostgreSQL)
107            Expression::Rand(r) => {
108                let _ = r.seed;
109                Ok(Expression::Random(crate::expressions::Random))
110            }
111
112            // DateAdd -> DATEADD(unit, count, date) in Redshift
113            Expression::DateAdd(f) => {
114                let unit_str = match f.unit {
115                    crate::expressions::IntervalUnit::Year => "YEAR",
116                    crate::expressions::IntervalUnit::Quarter => "QUARTER",
117                    crate::expressions::IntervalUnit::Month => "MONTH",
118                    crate::expressions::IntervalUnit::Week => "WEEK",
119                    crate::expressions::IntervalUnit::Day => "DAY",
120                    crate::expressions::IntervalUnit::Hour => "HOUR",
121                    crate::expressions::IntervalUnit::Minute => "MINUTE",
122                    crate::expressions::IntervalUnit::Second => "SECOND",
123                    crate::expressions::IntervalUnit::Millisecond => "MILLISECOND",
124                    crate::expressions::IntervalUnit::Microsecond => "MICROSECOND",
125                    crate::expressions::IntervalUnit::Nanosecond => "NANOSECOND",
126                };
127                let unit = Expression::Identifier(crate::expressions::Identifier {
128                    name: unit_str.to_string(),
129                    quoted: false,
130                    trailing_comments: Vec::new(),
131                    span: None,
132                });
133                Ok(Expression::Function(Box::new(Function::new(
134                    "DATEADD".to_string(),
135                    vec![unit, f.interval, f.this],
136                ))))
137            }
138
139            // Generic function transformations
140            Expression::Function(f) => self.transform_function(*f),
141
142            // Generic aggregate function transformations
143            Expression::AggregateFunction(f) => self.transform_aggregate_function(f),
144
145            // Cast transformations
146            Expression::Cast(c) => self.transform_cast(*c),
147
148            // CONVERT -> CAST in Redshift
149            Expression::Convert(c) => Ok(Expression::Cast(Box::new(Cast {
150                this: c.this,
151                to: c.to,
152                trailing_comments: Vec::new(),
153                double_colon_syntax: false,
154                format: None,
155                default: None,
156                inferred_type: None,
157            }))),
158
159            // SELECT TOP n -> SELECT ... LIMIT n in Redshift (PostgreSQL-style)
160            Expression::Select(mut select) => {
161                if let Some(top) = select.top.take() {
162                    // Only convert simple TOP (not TOP PERCENT or WITH TIES)
163                    if !top.percent && !top.with_ties {
164                        // Convert TOP to LIMIT
165                        select.limit = Some(Limit {
166                            this: top.this,
167                            percent: false,
168                            comments: Vec::new(),
169                        });
170                    } else {
171                        // Restore TOP if it has PERCENT or WITH TIES (not supported as LIMIT)
172                        select.top = Some(top);
173                    }
174                }
175                Ok(Expression::Select(select))
176            }
177
178            // Pass through everything else
179            _ => Ok(expr),
180        }
181    }
182}
183
184impl RedshiftDialect {
185    fn transform_function(&self, f: Function) -> Result<Expression> {
186        let name_upper = f.name.to_uppercase();
187        match name_upper.as_str() {
188            // IFNULL -> COALESCE
189            "IFNULL" if f.args.len() == 2 => Ok(Expression::Coalesce(Box::new(VarArgFunc {
190                original_name: None,
191                expressions: f.args,
192                inferred_type: None,
193            }))),
194
195            // NVL -> COALESCE (supports 2+ args)
196            "NVL" if f.args.len() >= 2 => Ok(Expression::Coalesce(Box::new(VarArgFunc {
197                original_name: None,
198                expressions: f.args,
199                inferred_type: None,
200            }))),
201
202            // ISNULL -> COALESCE
203            "ISNULL" if f.args.len() == 2 => Ok(Expression::Coalesce(Box::new(VarArgFunc {
204                original_name: None,
205                expressions: f.args,
206                inferred_type: None,
207            }))),
208
209            // GETDATE is native to Redshift
210            "GETDATE" => Ok(Expression::Function(Box::new(Function::new(
211                "GETDATE".to_string(),
212                vec![],
213            )))),
214
215            // NOW -> GETDATE in Redshift
216            "NOW" => Ok(Expression::Function(Box::new(Function::new(
217                "GETDATE".to_string(),
218                vec![],
219            )))),
220
221            // RAND -> RANDOM in Redshift
222            "RAND" => Ok(Expression::Random(crate::expressions::Random)),
223
224            // GROUP_CONCAT -> LISTAGG in Redshift
225            "GROUP_CONCAT" if !f.args.is_empty() => Ok(Expression::Function(Box::new(
226                Function::new("LISTAGG".to_string(), f.args),
227            ))),
228
229            // STRING_AGG -> LISTAGG in Redshift
230            "STRING_AGG" if !f.args.is_empty() => Ok(Expression::Function(Box::new(
231                Function::new("LISTAGG".to_string(), f.args),
232            ))),
233
234            // LISTAGG is native in Redshift
235            "LISTAGG" => Ok(Expression::Function(Box::new(f))),
236
237            // SUBSTR -> SUBSTRING
238            "SUBSTR" => Ok(Expression::Function(Box::new(Function::new(
239                "SUBSTRING".to_string(),
240                f.args,
241            )))),
242
243            // LEN is native in Redshift
244            "LEN" => Ok(Expression::Function(Box::new(f))),
245
246            // LENGTH -> LEN in Redshift
247            "LENGTH" if f.args.len() == 1 => Ok(Expression::Function(Box::new(Function::new(
248                "LEN".to_string(),
249                f.args,
250            )))),
251
252            // CHARINDEX is native in Redshift
253            "CHARINDEX" => Ok(Expression::Function(Box::new(f))),
254
255            // POSITION -> CHARINDEX in Redshift (with swapped args)
256            "POSITION" if f.args.len() == 2 => {
257                let mut args = f.args;
258                let substring = args.remove(0);
259                let string = args.remove(0);
260                // CHARINDEX(substring, string)
261                Ok(Expression::Function(Box::new(Function::new(
262                    "CHARINDEX".to_string(),
263                    vec![substring, string],
264                ))))
265            }
266
267            // STRPOS -> CHARINDEX in Redshift
268            "STRPOS" if f.args.len() == 2 => {
269                let args = f.args;
270                // STRPOS(string, substring) -> CHARINDEX(substring, string)
271                let string = args[0].clone();
272                let substring = args[1].clone();
273                Ok(Expression::Function(Box::new(Function::new(
274                    "CHARINDEX".to_string(),
275                    vec![substring, string],
276                ))))
277            }
278
279            // INSTR -> CHARINDEX in Redshift
280            "INSTR" if f.args.len() >= 2 => {
281                let mut args = f.args;
282                let string = args.remove(0);
283                let substring = args.remove(0);
284                // INSTR(string, substring) -> CHARINDEX(substring, string)
285                Ok(Expression::Function(Box::new(Function::new(
286                    "CHARINDEX".to_string(),
287                    vec![substring, string],
288                ))))
289            }
290
291            // LOCATE -> CHARINDEX in Redshift
292            "LOCATE" if f.args.len() >= 2 => {
293                let mut args = f.args;
294                let substring = args.remove(0);
295                let string = args.remove(0);
296                // LOCATE(substring, string) -> CHARINDEX(substring, string)
297                Ok(Expression::Function(Box::new(Function::new(
298                    "CHARINDEX".to_string(),
299                    vec![substring, string],
300                ))))
301            }
302
303            // ARRAY_LENGTH -> ARRAY_UPPER / custom in Redshift
304            // Redshift doesn't have ARRAY_LENGTH, arrays are limited
305            "ARRAY_LENGTH" => Ok(Expression::Function(Box::new(f))),
306
307            // SIZE -> not directly supported
308            "SIZE" => Ok(Expression::Function(Box::new(f))),
309
310            // TO_DATE -> TO_DATE (native in Redshift)
311            "TO_DATE" => Ok(Expression::Function(Box::new(f))),
312
313            // TO_TIMESTAMP -> TO_TIMESTAMP (native in Redshift)
314            "TO_TIMESTAMP" => Ok(Expression::Function(Box::new(f))),
315
316            // DATE_FORMAT -> TO_CHAR in Redshift
317            "DATE_FORMAT" if f.args.len() >= 2 => Ok(Expression::Function(Box::new(
318                Function::new("TO_CHAR".to_string(), f.args),
319            ))),
320
321            // strftime -> TO_CHAR in Redshift
322            "STRFTIME" if f.args.len() >= 2 => {
323                let mut args = f.args;
324                let format = args.remove(0);
325                let date = args.remove(0);
326                Ok(Expression::Function(Box::new(Function::new(
327                    "TO_CHAR".to_string(),
328                    vec![date, format],
329                ))))
330            }
331
332            // TO_CHAR is native in Redshift
333            "TO_CHAR" => Ok(Expression::Function(Box::new(f))),
334
335            // LEVENSHTEIN -> not directly supported
336            "LEVENSHTEIN" => Ok(Expression::Function(Box::new(f))),
337
338            // JSON_EXTRACT -> JSON_EXTRACT_PATH_TEXT in Redshift
339            "JSON_EXTRACT" if f.args.len() >= 2 => Ok(Expression::Function(Box::new(
340                Function::new("JSON_EXTRACT_PATH_TEXT".to_string(), f.args),
341            ))),
342
343            // JSON_EXTRACT_SCALAR -> JSON_EXTRACT_PATH_TEXT in Redshift
344            "JSON_EXTRACT_SCALAR" if f.args.len() >= 2 => Ok(Expression::Function(Box::new(
345                Function::new("JSON_EXTRACT_PATH_TEXT".to_string(), f.args),
346            ))),
347
348            // GET_JSON_OBJECT -> JSON_EXTRACT_PATH_TEXT in Redshift
349            "GET_JSON_OBJECT" if f.args.len() == 2 => Ok(Expression::Function(Box::new(
350                Function::new("JSON_EXTRACT_PATH_TEXT".to_string(), f.args),
351            ))),
352
353            // COLLECT_LIST -> not directly supported (limited array support)
354            "COLLECT_LIST" => Ok(Expression::Function(Box::new(f))),
355
356            // COLLECT_SET -> not directly supported
357            "COLLECT_SET" => Ok(Expression::Function(Box::new(f))),
358
359            // RLIKE -> REGEXP_MATCHES in Redshift (or SIMILAR TO)
360            "RLIKE" if f.args.len() == 2 => {
361                // Redshift uses ~ for regex matching
362                let mut args = f.args;
363                let string = args.remove(0);
364                let pattern = args.remove(0);
365                Ok(Expression::RegexpLike(Box::new(RegexpFunc {
366                    this: string,
367                    pattern,
368                    flags: None,
369                })))
370            }
371
372            // REGEXP -> RegexpLike
373            "REGEXP" if f.args.len() == 2 => {
374                let mut args = f.args;
375                let string = args.remove(0);
376                let pattern = args.remove(0);
377                Ok(Expression::RegexpLike(Box::new(RegexpFunc {
378                    this: string,
379                    pattern,
380                    flags: None,
381                })))
382            }
383
384            // REGEXP_LIKE -> native in Redshift (PostgreSQL-compatible)
385            "REGEXP_LIKE" => Ok(Expression::Function(Box::new(f))),
386
387            // ADD_MONTHS -> DATEADD in Redshift
388            "ADD_MONTHS" if f.args.len() == 2 => {
389                let mut args = f.args;
390                let date = args.remove(0);
391                let months = args.remove(0);
392                // DATEADD(month, num_months, date)
393                Ok(Expression::Function(Box::new(Function::new(
394                    "DATEADD".to_string(),
395                    vec![Expression::identifier("month"), months, date],
396                ))))
397            }
398
399            // DATEDIFF is native in Redshift
400            "DATEDIFF" => Ok(Expression::Function(Box::new(f))),
401
402            // DATE_DIFF -> DATEDIFF in Redshift
403            "DATE_DIFF" => Ok(Expression::Function(Box::new(Function::new(
404                "DATEDIFF".to_string(),
405                f.args,
406            )))),
407
408            // DATEADD is native in Redshift
409            "DATEADD" => Ok(Expression::Function(Box::new(f))),
410
411            // DATE_ADD -> DATEADD in Redshift
412            "DATE_ADD" => Ok(Expression::Function(Box::new(Function::new(
413                "DATEADD".to_string(),
414                f.args,
415            )))),
416
417            // SPLIT_TO_ARRAY is native in Redshift
418            "SPLIT_TO_ARRAY" => Ok(Expression::Function(Box::new(f))),
419
420            // STRING_TO_ARRAY -> SPLIT_TO_ARRAY in Redshift
421            "STRING_TO_ARRAY" if f.args.len() >= 1 => Ok(Expression::Function(Box::new(
422                Function::new("SPLIT_TO_ARRAY".to_string(), f.args),
423            ))),
424
425            // SPLIT -> SPLIT_TO_ARRAY in Redshift
426            "SPLIT" if f.args.len() >= 1 => Ok(Expression::Function(Box::new(Function::new(
427                "SPLIT_TO_ARRAY".to_string(),
428                f.args,
429            )))),
430
431            // STRTOL is native in Redshift (string to long/base conversion)
432            "STRTOL" => Ok(Expression::Function(Box::new(f))),
433
434            // FROM_BASE -> STRTOL in Redshift
435            "FROM_BASE" if f.args.len() == 2 => Ok(Expression::Function(Box::new(Function::new(
436                "STRTOL".to_string(),
437                f.args,
438            )))),
439
440            // CONVERT_TIMEZONE(target_tz, timestamp) -> CONVERT_TIMEZONE('UTC', target_tz, timestamp)
441            "CONVERT_TIMEZONE" if f.args.len() == 2 => {
442                let mut new_args = vec![Expression::string("UTC")];
443                new_args.extend(f.args);
444                Ok(Expression::Function(Box::new(Function::new(
445                    "CONVERT_TIMEZONE".to_string(),
446                    new_args,
447                ))))
448            }
449            // 3-arg form stays as-is
450            "CONVERT_TIMEZONE" => Ok(Expression::Function(Box::new(f))),
451
452            // CONVERT(type, expr) -> CAST(expr AS type)
453            "CONVERT" if f.args.len() == 2 => {
454                let type_expr = &f.args[0];
455                let value_expr = f.args[1].clone();
456
457                // Extract type name from the first argument (it's likely a Column or Identifier)
458                let type_name = match type_expr {
459                    Expression::Column(c) => c.name.name.clone(),
460                    Expression::Identifier(i) => i.name.clone(),
461                    _ => return Ok(Expression::Function(Box::new(f))), // Can't handle, pass through
462                };
463
464                // Map type name to DataType
465                let data_type = match type_name.to_uppercase().as_str() {
466                    "INT" | "INTEGER" => DataType::Int {
467                        length: None,
468                        integer_spelling: false,
469                    },
470                    "BIGINT" => DataType::BigInt { length: None },
471                    "SMALLINT" => DataType::SmallInt { length: None },
472                    "TINYINT" => DataType::TinyInt { length: None },
473                    "VARCHAR" => DataType::VarChar {
474                        length: None,
475                        parenthesized_length: false,
476                    },
477                    "CHAR" => DataType::Char { length: None },
478                    "FLOAT" | "REAL" => DataType::Float {
479                        precision: None,
480                        scale: None,
481                        real_spelling: false,
482                    },
483                    "DOUBLE" => DataType::Double {
484                        precision: None,
485                        scale: None,
486                    },
487                    "BOOLEAN" | "BOOL" => DataType::Boolean,
488                    "DATE" => DataType::Date,
489                    "TIMESTAMP" => DataType::Timestamp {
490                        precision: None,
491                        timezone: false,
492                    },
493                    "TEXT" => DataType::Text,
494                    "DECIMAL" | "NUMERIC" => DataType::Decimal {
495                        precision: None,
496                        scale: None,
497                    },
498                    _ => return Ok(Expression::Function(Box::new(f))), // Unknown type, pass through
499                };
500
501                Ok(Expression::Cast(Box::new(Cast {
502                    this: value_expr,
503                    to: data_type,
504                    trailing_comments: Vec::new(),
505                    double_colon_syntax: false,
506                    format: None,
507                    default: None,
508                    inferred_type: None,
509                })))
510            }
511
512            // Pass through everything else
513            _ => Ok(Expression::Function(Box::new(f))),
514        }
515    }
516
517    fn transform_aggregate_function(
518        &self,
519        f: Box<crate::expressions::AggregateFunction>,
520    ) -> Result<Expression> {
521        let name_upper = f.name.to_uppercase();
522        match name_upper.as_str() {
523            // COUNT_IF -> SUM(CASE WHEN...)
524            "COUNT_IF" if !f.args.is_empty() => {
525                let condition = f.args.into_iter().next().unwrap();
526                let case_expr = Expression::Case(Box::new(Case {
527                    operand: None,
528                    whens: vec![(condition, Expression::number(1))],
529                    else_: Some(Expression::number(0)),
530                    comments: Vec::new(),
531                    inferred_type: None,
532                }));
533                Ok(Expression::Sum(Box::new(AggFunc {
534                    ignore_nulls: None,
535                    having_max: None,
536                    this: case_expr,
537                    distinct: f.distinct,
538                    filter: f.filter,
539                    order_by: Vec::new(),
540                    name: None,
541                    limit: None,
542                    inferred_type: None,
543                })))
544            }
545
546            // ANY_VALUE is native in Redshift
547            "ANY_VALUE" => Ok(Expression::AggregateFunction(f)),
548
549            // GROUP_CONCAT -> LISTAGG in Redshift
550            "GROUP_CONCAT" if !f.args.is_empty() => Ok(Expression::Function(Box::new(
551                Function::new("LISTAGG".to_string(), f.args),
552            ))),
553
554            // STRING_AGG -> LISTAGG in Redshift
555            "STRING_AGG" if !f.args.is_empty() => Ok(Expression::Function(Box::new(
556                Function::new("LISTAGG".to_string(), f.args),
557            ))),
558
559            // LISTAGG is native in Redshift
560            "LISTAGG" => Ok(Expression::AggregateFunction(f)),
561
562            // STDDEV is native in Redshift
563            "STDDEV" => Ok(Expression::AggregateFunction(f)),
564
565            // VARIANCE is native in Redshift
566            "VARIANCE" => Ok(Expression::AggregateFunction(f)),
567
568            // MEDIAN is native in Redshift
569            "MEDIAN" => Ok(Expression::AggregateFunction(f)),
570
571            // Pass through everything else
572            _ => Ok(Expression::AggregateFunction(f)),
573        }
574    }
575
576    fn transform_cast(&self, c: Cast) -> Result<Expression> {
577        // Redshift type mappings are handled in the generator
578        Ok(Expression::Cast(Box::new(c)))
579    }
580}