Skip to main content

polyglot_sql/dialects/
redshift.rs

1//! Redshift Dialect
2//!
3//! Redshift-specific transformations based on sqlglot patterns.
4//! Redshift is based on PostgreSQL but has some differences.
5
6use super::{DialectImpl, DialectType};
7use crate::error::Result;
8use crate::expressions::{
9    AggFunc, Case, Cast, DataType, Expression, Function, Limit, RegexpFunc, VarArgFunc,
10};
11#[cfg(feature = "generate")]
12use crate::generator::GeneratorConfig;
13use crate::tokens::TokenizerConfig;
14
15/// Redshift dialect
16pub struct RedshiftDialect;
17
18impl DialectImpl for RedshiftDialect {
19    fn dialect_type(&self) -> DialectType {
20        DialectType::Redshift
21    }
22
23    fn tokenizer_config(&self) -> TokenizerConfig {
24        use crate::tokens::TokenType;
25        let mut config = TokenizerConfig::default();
26        // Redshift uses double quotes for identifiers (like PostgreSQL)
27        config.identifiers.insert('"', '"');
28        // Redshift does NOT support nested comments
29        config.nested_comments = false;
30        // MINUS is an alias for EXCEPT in Redshift
31        config
32            .keywords
33            .insert("MINUS".to_string(), TokenType::Except);
34        config
35    }
36
37    #[cfg(feature = "generate")]
38
39    fn generator_config(&self) -> GeneratorConfig {
40        use crate::generator::IdentifierQuoteStyle;
41        GeneratorConfig {
42            identifier_quote: '"',
43            identifier_quote_style: IdentifierQuoteStyle::DOUBLE_QUOTE,
44            dialect: Some(DialectType::Redshift),
45            supports_column_join_marks: true,
46            locking_reads_supported: false,
47            tz_to_with_time_zone: true,
48            single_string_interval: true,
49            ..Default::default()
50        }
51    }
52
53    #[cfg(feature = "transpile")]
54
55    fn transform_expr(&self, expr: Expression) -> Result<Expression> {
56        match expr {
57            // IFNULL -> COALESCE in Redshift
58            Expression::IfNull(f) => Ok(Expression::Coalesce(Box::new(VarArgFunc {
59                original_name: None,
60                expressions: vec![f.this, f.expression],
61                inferred_type: None,
62            }))),
63
64            // NVL is native in Redshift, but we standardize to COALESCE for consistency
65            Expression::Nvl(f) => Ok(Expression::Coalesce(Box::new(VarArgFunc {
66                original_name: None,
67                expressions: vec![f.this, f.expression],
68                inferred_type: None,
69            }))),
70
71            // TryCast -> TRY_CAST (Redshift supports TRY_CAST via CONVERT)
72            Expression::TryCast(c) => Ok(Expression::TryCast(c)),
73
74            // SafeCast -> TRY_CAST in Redshift
75            Expression::SafeCast(c) => Ok(Expression::TryCast(c)),
76
77            // ILIKE is native in Redshift (inherited from PostgreSQL)
78            Expression::ILike(op) => Ok(Expression::ILike(op)),
79
80            // CountIf -> SUM(CASE WHEN condition THEN 1 ELSE 0 END)
81            Expression::CountIf(f) => {
82                let case_expr = Expression::Case(Box::new(Case {
83                    operand: None,
84                    whens: vec![(f.this.clone(), Expression::number(1))],
85                    else_: Some(Expression::number(0)),
86                    comments: Vec::new(),
87                    inferred_type: None,
88                }));
89                Ok(Expression::Sum(Box::new(AggFunc {
90                    ignore_nulls: None,
91                    having_max: None,
92                    this: case_expr,
93                    distinct: f.distinct,
94                    filter: f.filter,
95                    order_by: Vec::new(),
96                    name: None,
97                    limit: None,
98                    inferred_type: None,
99                })))
100            }
101
102            // EXPLODE is not supported in Redshift
103            Expression::Explode(_) => Ok(expr),
104
105            // ExplodeOuter is not supported in Redshift
106            Expression::ExplodeOuter(_) => Ok(expr),
107
108            // UNNEST is supported in Redshift (but limited)
109            Expression::Unnest(_) => Ok(expr),
110
111            // RAND -> RANDOM in Redshift (like PostgreSQL)
112            Expression::Rand(r) => {
113                let _ = r.seed;
114                Ok(Expression::Random(crate::expressions::Random))
115            }
116
117            // DateAdd -> DATEADD(unit, count, date) in Redshift
118            Expression::DateAdd(f) => {
119                let unit_str = match f.unit {
120                    crate::expressions::IntervalUnit::Year => "YEAR",
121                    crate::expressions::IntervalUnit::Quarter => "QUARTER",
122                    crate::expressions::IntervalUnit::Month => "MONTH",
123                    crate::expressions::IntervalUnit::Week => "WEEK",
124                    crate::expressions::IntervalUnit::Day => "DAY",
125                    crate::expressions::IntervalUnit::Hour => "HOUR",
126                    crate::expressions::IntervalUnit::Minute => "MINUTE",
127                    crate::expressions::IntervalUnit::Second => "SECOND",
128                    crate::expressions::IntervalUnit::Millisecond => "MILLISECOND",
129                    crate::expressions::IntervalUnit::Microsecond => "MICROSECOND",
130                    crate::expressions::IntervalUnit::Nanosecond => "NANOSECOND",
131                };
132                let unit = Expression::Identifier(crate::expressions::Identifier {
133                    name: unit_str.to_string(),
134                    quoted: false,
135                    trailing_comments: Vec::new(),
136                    span: None,
137                });
138                Ok(Expression::Function(Box::new(Function::new(
139                    "DATEADD".to_string(),
140                    vec![unit, f.interval, f.this],
141                ))))
142            }
143
144            // Generic function transformations
145            Expression::Function(f) => self.transform_function(*f),
146
147            // Generic aggregate function transformations
148            Expression::AggregateFunction(f) => self.transform_aggregate_function(f),
149
150            // Cast transformations
151            Expression::Cast(c) => self.transform_cast(*c),
152
153            // CONVERT -> CAST in Redshift
154            Expression::Convert(c) => Ok(Expression::Cast(Box::new(Cast {
155                this: c.this,
156                to: c.to,
157                trailing_comments: Vec::new(),
158                double_colon_syntax: false,
159                format: None,
160                default: None,
161                inferred_type: None,
162            }))),
163
164            // SELECT TOP n -> SELECT ... LIMIT n in Redshift (PostgreSQL-style)
165            Expression::Select(mut select) => {
166                if let Some(top) = select.top.take() {
167                    // Only convert simple TOP (not TOP PERCENT or WITH TIES)
168                    if !top.percent && !top.with_ties {
169                        // Convert TOP to LIMIT
170                        select.limit = Some(Limit {
171                            this: top.this,
172                            percent: false,
173                            comments: Vec::new(),
174                        });
175                    } else {
176                        // Restore TOP if it has PERCENT or WITH TIES (not supported as LIMIT)
177                        select.top = Some(top);
178                    }
179                }
180                Ok(Expression::Select(select))
181            }
182
183            // Pass through everything else
184            _ => Ok(expr),
185        }
186    }
187}
188
189#[cfg(feature = "transpile")]
190impl RedshiftDialect {
191    fn transform_function(&self, f: Function) -> Result<Expression> {
192        let name_upper = f.name.to_uppercase();
193        match name_upper.as_str() {
194            // IFNULL -> COALESCE
195            "IFNULL" if f.args.len() == 2 => Ok(Expression::Coalesce(Box::new(VarArgFunc {
196                original_name: None,
197                expressions: f.args,
198                inferred_type: None,
199            }))),
200
201            // NVL -> COALESCE (supports 2+ args)
202            "NVL" if f.args.len() >= 2 => Ok(Expression::Coalesce(Box::new(VarArgFunc {
203                original_name: None,
204                expressions: f.args,
205                inferred_type: None,
206            }))),
207
208            // ISNULL -> COALESCE
209            "ISNULL" if f.args.len() == 2 => Ok(Expression::Coalesce(Box::new(VarArgFunc {
210                original_name: None,
211                expressions: f.args,
212                inferred_type: None,
213            }))),
214
215            // GETDATE is native to Redshift
216            "GETDATE" => Ok(Expression::Function(Box::new(Function::new(
217                "GETDATE".to_string(),
218                vec![],
219            )))),
220
221            // NOW -> GETDATE in Redshift
222            "NOW" => Ok(Expression::Function(Box::new(Function::new(
223                "GETDATE".to_string(),
224                vec![],
225            )))),
226
227            // RAND -> RANDOM in Redshift
228            "RAND" => Ok(Expression::Random(crate::expressions::Random)),
229
230            // GROUP_CONCAT -> LISTAGG in Redshift
231            "GROUP_CONCAT" if !f.args.is_empty() => Ok(Expression::Function(Box::new(
232                Function::new("LISTAGG".to_string(), f.args),
233            ))),
234
235            // STRING_AGG -> LISTAGG in Redshift
236            "STRING_AGG" if !f.args.is_empty() => Ok(Expression::Function(Box::new(
237                Function::new("LISTAGG".to_string(), f.args),
238            ))),
239
240            // LISTAGG is native in Redshift
241            "LISTAGG" => Ok(Expression::Function(Box::new(f))),
242
243            // SUBSTR -> SUBSTRING
244            "SUBSTR" => Ok(Expression::Function(Box::new(Function::new(
245                "SUBSTRING".to_string(),
246                f.args,
247            )))),
248
249            // LEN is native in Redshift
250            "LEN" => Ok(Expression::Function(Box::new(f))),
251
252            // LENGTH -> LEN in Redshift
253            "LENGTH" if f.args.len() == 1 => Ok(Expression::Function(Box::new(Function::new(
254                "LEN".to_string(),
255                f.args,
256            )))),
257
258            // CHARINDEX is native in Redshift
259            "CHARINDEX" => Ok(Expression::Function(Box::new(f))),
260
261            // POSITION -> CHARINDEX in Redshift (with swapped args)
262            "POSITION" if f.args.len() == 2 => {
263                let mut args = f.args;
264                let substring = args.remove(0);
265                let string = args.remove(0);
266                // CHARINDEX(substring, string)
267                Ok(Expression::Function(Box::new(Function::new(
268                    "CHARINDEX".to_string(),
269                    vec![substring, string],
270                ))))
271            }
272
273            // STRPOS -> CHARINDEX in Redshift
274            "STRPOS" if f.args.len() == 2 => {
275                let args = f.args;
276                // STRPOS(string, substring) -> CHARINDEX(substring, string)
277                let string = args[0].clone();
278                let substring = args[1].clone();
279                Ok(Expression::Function(Box::new(Function::new(
280                    "CHARINDEX".to_string(),
281                    vec![substring, string],
282                ))))
283            }
284
285            // INSTR -> CHARINDEX in Redshift
286            "INSTR" if f.args.len() >= 2 => {
287                let mut args = f.args;
288                let string = args.remove(0);
289                let substring = args.remove(0);
290                // INSTR(string, substring) -> CHARINDEX(substring, string)
291                Ok(Expression::Function(Box::new(Function::new(
292                    "CHARINDEX".to_string(),
293                    vec![substring, string],
294                ))))
295            }
296
297            // LOCATE -> CHARINDEX in Redshift
298            "LOCATE" if f.args.len() >= 2 => {
299                let mut args = f.args;
300                let substring = args.remove(0);
301                let string = args.remove(0);
302                // LOCATE(substring, string) -> CHARINDEX(substring, string)
303                Ok(Expression::Function(Box::new(Function::new(
304                    "CHARINDEX".to_string(),
305                    vec![substring, string],
306                ))))
307            }
308
309            // ARRAY_LENGTH -> ARRAY_UPPER / custom in Redshift
310            // Redshift doesn't have ARRAY_LENGTH, arrays are limited
311            "ARRAY_LENGTH" => Ok(Expression::Function(Box::new(f))),
312
313            // SIZE -> not directly supported
314            "SIZE" => Ok(Expression::Function(Box::new(f))),
315
316            // TO_DATE -> TO_DATE (native in Redshift)
317            "TO_DATE" => Ok(Expression::Function(Box::new(f))),
318
319            // TO_TIMESTAMP -> TO_TIMESTAMP (native in Redshift)
320            "TO_TIMESTAMP" => Ok(Expression::Function(Box::new(f))),
321
322            // DATE_FORMAT -> TO_CHAR in Redshift
323            "DATE_FORMAT" if f.args.len() >= 2 => Ok(Expression::Function(Box::new(
324                Function::new("TO_CHAR".to_string(), f.args),
325            ))),
326
327            // strftime -> TO_CHAR in Redshift
328            "STRFTIME" if f.args.len() >= 2 => {
329                let mut args = f.args;
330                let format = args.remove(0);
331                let date = args.remove(0);
332                Ok(Expression::Function(Box::new(Function::new(
333                    "TO_CHAR".to_string(),
334                    vec![date, format],
335                ))))
336            }
337
338            // TO_CHAR is native in Redshift
339            "TO_CHAR" => Ok(Expression::Function(Box::new(f))),
340
341            // LEVENSHTEIN -> not directly supported
342            "LEVENSHTEIN" => Ok(Expression::Function(Box::new(f))),
343
344            // JSON_EXTRACT -> JSON_EXTRACT_PATH_TEXT in Redshift
345            "JSON_EXTRACT" if f.args.len() >= 2 => Ok(Expression::Function(Box::new(
346                Function::new("JSON_EXTRACT_PATH_TEXT".to_string(), f.args),
347            ))),
348
349            // JSON_EXTRACT_SCALAR -> JSON_EXTRACT_PATH_TEXT in Redshift
350            "JSON_EXTRACT_SCALAR" if f.args.len() >= 2 => Ok(Expression::Function(Box::new(
351                Function::new("JSON_EXTRACT_PATH_TEXT".to_string(), f.args),
352            ))),
353
354            // GET_JSON_OBJECT -> JSON_EXTRACT_PATH_TEXT in Redshift
355            "GET_JSON_OBJECT" if f.args.len() == 2 => Ok(Expression::Function(Box::new(
356                Function::new("JSON_EXTRACT_PATH_TEXT".to_string(), f.args),
357            ))),
358
359            // COLLECT_LIST -> not directly supported (limited array support)
360            "COLLECT_LIST" => Ok(Expression::Function(Box::new(f))),
361
362            // COLLECT_SET -> not directly supported
363            "COLLECT_SET" => Ok(Expression::Function(Box::new(f))),
364
365            // RLIKE -> REGEXP_MATCHES in Redshift (or SIMILAR TO)
366            "RLIKE" if f.args.len() == 2 => {
367                // Redshift uses ~ for regex matching
368                let mut args = f.args;
369                let string = args.remove(0);
370                let pattern = args.remove(0);
371                Ok(Expression::RegexpLike(Box::new(RegexpFunc {
372                    this: string,
373                    pattern,
374                    flags: None,
375                })))
376            }
377
378            // REGEXP -> RegexpLike
379            "REGEXP" if f.args.len() == 2 => {
380                let mut args = f.args;
381                let string = args.remove(0);
382                let pattern = args.remove(0);
383                Ok(Expression::RegexpLike(Box::new(RegexpFunc {
384                    this: string,
385                    pattern,
386                    flags: None,
387                })))
388            }
389
390            // REGEXP_LIKE -> native in Redshift (PostgreSQL-compatible)
391            "REGEXP_LIKE" => Ok(Expression::Function(Box::new(f))),
392
393            // ADD_MONTHS -> DATEADD in Redshift
394            "ADD_MONTHS" if f.args.len() == 2 => {
395                let mut args = f.args;
396                let date = args.remove(0);
397                let months = args.remove(0);
398                // DATEADD(month, num_months, date)
399                Ok(Expression::Function(Box::new(Function::new(
400                    "DATEADD".to_string(),
401                    vec![Expression::identifier("month"), months, date],
402                ))))
403            }
404
405            // DATEDIFF is native in Redshift
406            "DATEDIFF" => Ok(Expression::Function(Box::new(f))),
407
408            // DATE_DIFF -> DATEDIFF in Redshift
409            "DATE_DIFF" => Ok(Expression::Function(Box::new(Function::new(
410                "DATEDIFF".to_string(),
411                f.args,
412            )))),
413
414            // DATEADD is native in Redshift
415            "DATEADD" => Ok(Expression::Function(Box::new(f))),
416
417            // DATE_ADD -> DATEADD in Redshift
418            "DATE_ADD" => Ok(Expression::Function(Box::new(Function::new(
419                "DATEADD".to_string(),
420                f.args,
421            )))),
422
423            // SPLIT_TO_ARRAY is native in Redshift
424            "SPLIT_TO_ARRAY" => Ok(Expression::Function(Box::new(f))),
425
426            // STRING_TO_ARRAY -> SPLIT_TO_ARRAY in Redshift
427            "STRING_TO_ARRAY" if f.args.len() >= 1 => Ok(Expression::Function(Box::new(
428                Function::new("SPLIT_TO_ARRAY".to_string(), f.args),
429            ))),
430
431            // SPLIT -> SPLIT_TO_ARRAY in Redshift
432            "SPLIT" if f.args.len() >= 1 => Ok(Expression::Function(Box::new(Function::new(
433                "SPLIT_TO_ARRAY".to_string(),
434                f.args,
435            )))),
436
437            // STRTOL is native in Redshift (string to long/base conversion)
438            "STRTOL" => Ok(Expression::Function(Box::new(f))),
439
440            // FROM_BASE -> STRTOL in Redshift
441            "FROM_BASE" if f.args.len() == 2 => Ok(Expression::Function(Box::new(Function::new(
442                "STRTOL".to_string(),
443                f.args,
444            )))),
445
446            // CONVERT_TIMEZONE(target_tz, timestamp) -> CONVERT_TIMEZONE('UTC', target_tz, timestamp)
447            "CONVERT_TIMEZONE" if f.args.len() == 2 => {
448                let mut new_args = vec![Expression::string("UTC")];
449                new_args.extend(f.args);
450                Ok(Expression::Function(Box::new(Function::new(
451                    "CONVERT_TIMEZONE".to_string(),
452                    new_args,
453                ))))
454            }
455            // 3-arg form stays as-is
456            "CONVERT_TIMEZONE" => Ok(Expression::Function(Box::new(f))),
457
458            // CONVERT(type, expr) -> CAST(expr AS type)
459            "CONVERT" if f.args.len() == 2 => {
460                let type_expr = &f.args[0];
461                let value_expr = f.args[1].clone();
462
463                // Extract type name from the first argument (it's likely a Column or Identifier)
464                let type_name = match type_expr {
465                    Expression::Column(c) => c.name.name.clone(),
466                    Expression::Identifier(i) => i.name.clone(),
467                    _ => return Ok(Expression::Function(Box::new(f))), // Can't handle, pass through
468                };
469
470                // Map type name to DataType
471                let data_type = match type_name.to_uppercase().as_str() {
472                    "INT" | "INTEGER" => DataType::Int {
473                        length: None,
474                        integer_spelling: false,
475                    },
476                    "BIGINT" => DataType::BigInt { length: None },
477                    "SMALLINT" => DataType::SmallInt { length: None },
478                    "TINYINT" => DataType::TinyInt { length: None },
479                    "VARCHAR" => DataType::VarChar {
480                        length: None,
481                        parenthesized_length: false,
482                    },
483                    "CHAR" => DataType::Char { length: None },
484                    "FLOAT" | "REAL" => DataType::Float {
485                        precision: None,
486                        scale: None,
487                        real_spelling: false,
488                    },
489                    "DOUBLE" => DataType::Double {
490                        precision: None,
491                        scale: None,
492                    },
493                    "BOOLEAN" | "BOOL" => DataType::Boolean,
494                    "DATE" => DataType::Date,
495                    "TIMESTAMP" => DataType::Timestamp {
496                        precision: None,
497                        timezone: false,
498                    },
499                    "TEXT" => DataType::Text,
500                    "DECIMAL" | "NUMERIC" => DataType::Decimal {
501                        precision: None,
502                        scale: None,
503                    },
504                    _ => return Ok(Expression::Function(Box::new(f))), // Unknown type, pass through
505                };
506
507                Ok(Expression::Cast(Box::new(Cast {
508                    this: value_expr,
509                    to: data_type,
510                    trailing_comments: Vec::new(),
511                    double_colon_syntax: false,
512                    format: None,
513                    default: None,
514                    inferred_type: None,
515                })))
516            }
517
518            // Pass through everything else
519            _ => Ok(Expression::Function(Box::new(f))),
520        }
521    }
522
523    fn transform_aggregate_function(
524        &self,
525        f: Box<crate::expressions::AggregateFunction>,
526    ) -> Result<Expression> {
527        let name_upper = f.name.to_uppercase();
528        match name_upper.as_str() {
529            // COUNT_IF -> SUM(CASE WHEN...)
530            "COUNT_IF" if !f.args.is_empty() => {
531                let condition = f.args.into_iter().next().unwrap();
532                let case_expr = Expression::Case(Box::new(Case {
533                    operand: None,
534                    whens: vec![(condition, Expression::number(1))],
535                    else_: Some(Expression::number(0)),
536                    comments: Vec::new(),
537                    inferred_type: None,
538                }));
539                Ok(Expression::Sum(Box::new(AggFunc {
540                    ignore_nulls: None,
541                    having_max: None,
542                    this: case_expr,
543                    distinct: f.distinct,
544                    filter: f.filter,
545                    order_by: Vec::new(),
546                    name: None,
547                    limit: None,
548                    inferred_type: None,
549                })))
550            }
551
552            // ANY_VALUE is native in Redshift
553            "ANY_VALUE" => Ok(Expression::AggregateFunction(f)),
554
555            // GROUP_CONCAT -> LISTAGG in Redshift
556            "GROUP_CONCAT" if !f.args.is_empty() => Ok(Expression::Function(Box::new(
557                Function::new("LISTAGG".to_string(), f.args),
558            ))),
559
560            // STRING_AGG -> LISTAGG in Redshift
561            "STRING_AGG" if !f.args.is_empty() => Ok(Expression::Function(Box::new(
562                Function::new("LISTAGG".to_string(), f.args),
563            ))),
564
565            // LISTAGG is native in Redshift
566            "LISTAGG" => Ok(Expression::AggregateFunction(f)),
567
568            // STDDEV is native in Redshift
569            "STDDEV" => Ok(Expression::AggregateFunction(f)),
570
571            // VARIANCE is native in Redshift
572            "VARIANCE" => Ok(Expression::AggregateFunction(f)),
573
574            // MEDIAN is native in Redshift
575            "MEDIAN" => Ok(Expression::AggregateFunction(f)),
576
577            // Pass through everything else
578            _ => Ok(Expression::AggregateFunction(f)),
579        }
580    }
581
582    fn transform_cast(&self, c: Cast) -> Result<Expression> {
583        // Redshift type mappings are handled in the generator
584        Ok(Expression::Cast(Box::new(c)))
585    }
586}