Skip to main content

polyglot_sql/dialects/
teradata.rs

1//! Teradata Dialect
2//!
3//! Teradata-specific transformations based on sqlglot patterns.
4//! Teradata has unique syntax including ** for exponentiation,
5//! TOP instead of LIMIT, and TRYCAST for safe casting.
6
7use super::{DialectImpl, DialectType};
8use crate::error::Result;
9use crate::expressions::{AggFunc, Case, Cast, Expression, Function, UnaryFunc, VarArgFunc};
10#[cfg(feature = "generate")]
11use crate::generator::GeneratorConfig;
12use crate::tokens::TokenizerConfig;
13
14/// Teradata dialect
15pub struct TeradataDialect;
16
17impl DialectImpl for TeradataDialect {
18    fn dialect_type(&self) -> DialectType {
19        DialectType::Teradata
20    }
21
22    fn tokenizer_config(&self) -> TokenizerConfig {
23        let mut config = TokenizerConfig::default();
24        // Teradata uses double quotes for identifiers
25        config.identifiers.insert('"', '"');
26        // Teradata does NOT support nested comments
27        config.nested_comments = false;
28        // Teradata-specific keywords and operators
29        config
30            .keywords
31            .insert("SEL".to_string(), crate::tokens::TokenType::Select);
32        config
33            .keywords
34            .insert("UPD".to_string(), crate::tokens::TokenType::Update);
35        config
36            .keywords
37            .insert("DEL".to_string(), crate::tokens::TokenType::Delete);
38        config
39            .keywords
40            .insert("INS".to_string(), crate::tokens::TokenType::Insert);
41        config
42            .keywords
43            .insert("SAMPLE".to_string(), crate::tokens::TokenType::Sample);
44        config
45            .keywords
46            .insert("LOCKING".to_string(), crate::tokens::TokenType::Lock);
47        config
48            .keywords
49            .insert("HELP".to_string(), crate::tokens::TokenType::Command);
50        config
51            .keywords
52            .insert("COLLECT".to_string(), crate::tokens::TokenType::Command);
53        config
54            .keywords
55            .insert("EQ".to_string(), crate::tokens::TokenType::Eq);
56        config
57            .keywords
58            .insert("NE".to_string(), crate::tokens::TokenType::Neq);
59        config
60            .keywords
61            .insert("GE".to_string(), crate::tokens::TokenType::Gte);
62        config
63            .keywords
64            .insert("GT".to_string(), crate::tokens::TokenType::Gt);
65        config
66            .keywords
67            .insert("LE".to_string(), crate::tokens::TokenType::Lte);
68        config
69            .keywords
70            .insert("LT".to_string(), crate::tokens::TokenType::Lt);
71        config
72            .keywords
73            .insert("MOD".to_string(), crate::tokens::TokenType::Mod);
74        config
75            .keywords
76            .insert("BYTEINT".to_string(), crate::tokens::TokenType::SmallInt);
77        config.keywords.insert(
78            "ST_GEOMETRY".to_string(),
79            crate::tokens::TokenType::Geometry,
80        );
81        // Teradata does not support % as modulo operator
82        config.single_tokens.remove(&'%');
83        // Teradata treats 0x prefix as hex string literals
84        config.hex_number_strings = true;
85        config
86    }
87
88    #[cfg(feature = "generate")]
89
90    fn generator_config(&self) -> GeneratorConfig {
91        use crate::generator::IdentifierQuoteStyle;
92        GeneratorConfig {
93            identifier_quote: '"',
94            identifier_quote_style: IdentifierQuoteStyle::DOUBLE_QUOTE,
95            dialect: Some(DialectType::Teradata),
96            tablesample_keywords: "SAMPLE",
97            tablesample_requires_parens: false,
98            tz_to_with_time_zone: true,
99            ..Default::default()
100        }
101    }
102
103    #[cfg(feature = "transpile")]
104
105    fn transform_expr(&self, expr: Expression) -> Result<Expression> {
106        match expr {
107            // IFNULL -> COALESCE in Teradata
108            Expression::IfNull(f) => Ok(Expression::Coalesce(Box::new(VarArgFunc {
109                original_name: None,
110                expressions: vec![f.this, f.expression],
111                inferred_type: None,
112            }))),
113
114            // NVL -> COALESCE in Teradata
115            Expression::Nvl(f) => Ok(Expression::Coalesce(Box::new(VarArgFunc {
116                original_name: None,
117                expressions: vec![f.this, f.expression],
118                inferred_type: None,
119            }))),
120
121            // Coalesce with original_name (e.g., IFNULL parsed as Coalesce) -> clear original_name
122            Expression::Coalesce(mut f) => {
123                f.original_name = None;
124                Ok(Expression::Coalesce(f))
125            }
126
127            // TryCast -> TRYCAST in Teradata (native)
128            Expression::TryCast(c) => Ok(Expression::TryCast(c)),
129
130            // SafeCast -> TRYCAST in Teradata
131            Expression::SafeCast(c) => Ok(Expression::TryCast(c)),
132
133            // CountIf -> SUM(CASE WHEN condition THEN 1 ELSE 0 END)
134            Expression::CountIf(f) => {
135                let case_expr = Expression::Case(Box::new(Case {
136                    operand: None,
137                    whens: vec![(f.this.clone(), Expression::number(1))],
138                    else_: Some(Expression::number(0)),
139                    comments: Vec::new(),
140                    inferred_type: None,
141                }));
142                Ok(Expression::Sum(Box::new(AggFunc {
143                    ignore_nulls: None,
144                    having_max: None,
145                    this: case_expr,
146                    distinct: f.distinct,
147                    filter: f.filter,
148                    order_by: Vec::new(),
149                    name: None,
150                    limit: None,
151                    inferred_type: None,
152                })))
153            }
154
155            // RAND -> RANDOM in Teradata (but preserve lower/upper for RANDOM(l, u))
156            Expression::Rand(r) => {
157                if r.lower.is_some() || r.upper.is_some() {
158                    // Keep as Rand with lower/upper for Teradata RANDOM(l, u)
159                    Ok(Expression::Rand(r))
160                } else {
161                    Ok(Expression::Random(crate::expressions::Random))
162                }
163            }
164
165            // Generic function transformations
166            Expression::Function(f) => self.transform_function(*f),
167
168            // Generic aggregate function transformations
169            Expression::AggregateFunction(f) => self.transform_aggregate_function(f),
170
171            // Cast transformations
172            Expression::Cast(c) => self.transform_cast(*c),
173
174            // Pass through everything else
175            _ => Ok(expr),
176        }
177    }
178}
179
180#[cfg(feature = "transpile")]
181impl TeradataDialect {
182    fn transform_function(&self, f: Function) -> Result<Expression> {
183        let name_upper = f.name.to_uppercase();
184        match name_upper.as_str() {
185            // IFNULL -> COALESCE
186            "IFNULL" if f.args.len() == 2 => Ok(Expression::Coalesce(Box::new(VarArgFunc {
187                original_name: None,
188                expressions: f.args,
189                inferred_type: None,
190            }))),
191
192            // NVL -> COALESCE
193            "NVL" if f.args.len() == 2 => Ok(Expression::Coalesce(Box::new(VarArgFunc {
194                original_name: None,
195                expressions: f.args,
196                inferred_type: None,
197            }))),
198
199            // ISNULL -> COALESCE
200            "ISNULL" if f.args.len() == 2 => Ok(Expression::Coalesce(Box::new(VarArgFunc {
201                original_name: None,
202                expressions: f.args,
203                inferred_type: None,
204            }))),
205
206            // NOW -> CURRENT_TIMESTAMP
207            "NOW" => Ok(Expression::CurrentTimestamp(
208                crate::expressions::CurrentTimestamp {
209                    precision: None,
210                    sysdate: false,
211                },
212            )),
213
214            // GETDATE -> CURRENT_TIMESTAMP
215            "GETDATE" => Ok(Expression::CurrentTimestamp(
216                crate::expressions::CurrentTimestamp {
217                    precision: None,
218                    sysdate: false,
219                },
220            )),
221
222            // RAND -> RANDOM in Teradata
223            "RAND" => Ok(Expression::Random(crate::expressions::Random)),
224
225            // LEN -> CHARACTER_LENGTH in Teradata
226            "LEN" if f.args.len() == 1 => Ok(Expression::Length(Box::new(UnaryFunc::new(
227                f.args.into_iter().next().unwrap(),
228            )))),
229
230            // LENGTH -> CHARACTER_LENGTH in Teradata
231            "LENGTH" if f.args.len() == 1 => Ok(Expression::Length(Box::new(UnaryFunc::new(
232                f.args.into_iter().next().unwrap(),
233            )))),
234
235            // CHARINDEX -> INSTR in Teradata (with swapped args)
236            "CHARINDEX" if f.args.len() >= 2 => {
237                let mut args = f.args;
238                let substring = args.remove(0);
239                let string = args.remove(0);
240                Ok(Expression::Function(Box::new(Function::new(
241                    "INSTR".to_string(),
242                    vec![string, substring],
243                ))))
244            }
245
246            // STRPOS -> INSTR in Teradata
247            "STRPOS" if f.args.len() >= 2 => Ok(Expression::Function(Box::new(Function::new(
248                "INSTR".to_string(),
249                f.args,
250            )))),
251
252            // LOCATE -> INSTR in Teradata (with swapped args)
253            "LOCATE" if f.args.len() >= 2 => {
254                let mut args = f.args;
255                let substring = args.remove(0);
256                let string = args.remove(0);
257                Ok(Expression::Function(Box::new(Function::new(
258                    "INSTR".to_string(),
259                    vec![string, substring],
260                ))))
261            }
262
263            // ARRAY_LENGTH -> CARDINALITY in Teradata
264            "ARRAY_LENGTH" if f.args.len() == 1 => Ok(Expression::Function(Box::new(
265                Function::new("CARDINALITY".to_string(), f.args),
266            ))),
267
268            // SIZE -> CARDINALITY in Teradata
269            "SIZE" if f.args.len() == 1 => Ok(Expression::Function(Box::new(Function::new(
270                "CARDINALITY".to_string(),
271                f.args,
272            )))),
273
274            // SUBSTR -> SUBSTRING
275            "SUBSTR" => Ok(Expression::Function(Box::new(Function::new(
276                "SUBSTRING".to_string(),
277                f.args,
278            )))),
279
280            // DATE_FORMAT -> TO_CHAR in Teradata
281            "DATE_FORMAT" if f.args.len() >= 2 => Ok(Expression::Function(Box::new(
282                Function::new("TO_CHAR".to_string(), f.args),
283            ))),
284
285            // strftime -> TO_CHAR in Teradata
286            "STRFTIME" if f.args.len() >= 2 => {
287                let mut args = f.args;
288                let format = args.remove(0);
289                let date = args.remove(0);
290                Ok(Expression::Function(Box::new(Function::new(
291                    "TO_CHAR".to_string(),
292                    vec![date, format],
293                ))))
294            }
295
296            // GREATEST is native in Teradata
297            "GREATEST" => Ok(Expression::Function(Box::new(f))),
298
299            // LEAST is native in Teradata
300            "LEAST" => Ok(Expression::Function(Box::new(f))),
301
302            // Pass through everything else
303            _ => Ok(Expression::Function(Box::new(f))),
304        }
305    }
306
307    fn transform_aggregate_function(
308        &self,
309        f: Box<crate::expressions::AggregateFunction>,
310    ) -> Result<Expression> {
311        let name_upper = f.name.to_uppercase();
312        match name_upper.as_str() {
313            // COUNT_IF -> SUM(CASE WHEN...)
314            "COUNT_IF" if !f.args.is_empty() => {
315                let condition = f.args.into_iter().next().unwrap();
316                let case_expr = Expression::Case(Box::new(Case {
317                    operand: None,
318                    whens: vec![(condition, Expression::number(1))],
319                    else_: Some(Expression::number(0)),
320                    comments: Vec::new(),
321                    inferred_type: None,
322                }));
323                Ok(Expression::Sum(Box::new(AggFunc {
324                    ignore_nulls: None,
325                    having_max: None,
326                    this: case_expr,
327                    distinct: f.distinct,
328                    filter: f.filter,
329                    order_by: Vec::new(),
330                    name: None,
331                    limit: None,
332                    inferred_type: None,
333                })))
334            }
335
336            // MAX_BY is native in Teradata
337            "MAX_BY" => Ok(Expression::AggregateFunction(f)),
338
339            // MIN_BY is native in Teradata
340            "MIN_BY" => Ok(Expression::AggregateFunction(f)),
341
342            // Pass through everything else
343            _ => Ok(Expression::AggregateFunction(f)),
344        }
345    }
346
347    fn transform_cast(&self, c: Cast) -> Result<Expression> {
348        // Teradata CAST(x AS DATE FORMAT 'fmt') -> StrToDate(x, converted_fmt)
349        // Teradata CAST(x AS TIMESTAMP FORMAT 'fmt') -> StrToTime(x, converted_fmt)
350        if let Some(format_expr) = &c.format {
351            let is_date = matches!(c.to, crate::expressions::DataType::Date);
352            let is_timestamp = matches!(c.to, crate::expressions::DataType::Timestamp { .. });
353
354            if is_date || is_timestamp {
355                // Extract the format string from the expression
356                let fmt_str = match format_expr.as_ref() {
357                    Expression::Literal(lit)
358                        if matches!(lit.as_ref(), crate::expressions::Literal::String(_)) =>
359                    {
360                        let crate::expressions::Literal::String(s) = lit.as_ref() else {
361                            unreachable!()
362                        };
363                        Some(s.clone())
364                    }
365                    _ => None,
366                };
367
368                if let Some(teradata_fmt) = fmt_str {
369                    // Convert Teradata format to strftime format
370                    let strftime_fmt = Self::teradata_to_strftime(&teradata_fmt);
371
372                    if is_date {
373                        return Ok(Expression::StrToDate(Box::new(
374                            crate::expressions::StrToDate {
375                                this: Box::new(c.this),
376                                format: Some(strftime_fmt),
377                                safe: None,
378                            },
379                        )));
380                    } else {
381                        return Ok(Expression::StrToTime(Box::new(
382                            crate::expressions::StrToTime {
383                                this: Box::new(c.this),
384                                format: strftime_fmt,
385                                zone: None,
386                                safe: None,
387                                target_type: None,
388                            },
389                        )));
390                    }
391                }
392            }
393        }
394        // Teradata type mappings are handled in the generator
395        Ok(Expression::Cast(Box::new(c)))
396    }
397
398    /// Convert Teradata date/time format string to strftime format
399    fn teradata_to_strftime(fmt: &str) -> String {
400        // Teradata TIME_MAPPING: longest tokens first to avoid partial matches
401        let mut result = fmt.to_string();
402        // Order matters: replace longer tokens first
403        result = result.replace("YYYY", "%Y");
404        result = result.replace("Y4", "%Y");
405        result = result.replace("YY", "%y");
406        result = result.replace("MMMM", "%B");
407        result = result.replace("MMM", "%b");
408        result = result.replace("MM", "%m");
409        result = result.replace("M4", "%B");
410        result = result.replace("M3", "%b");
411        result = result.replace("EEEE", "%A");
412        result = result.replace("EEE", "%a");
413        result = result.replace("EE", "%a");
414        result = result.replace("E4", "%A");
415        result = result.replace("E3", "%a");
416        result = result.replace("DDD", "%j");
417        result = result.replace("DD", "%d");
418        result = result.replace("D3", "%j");
419        result = result.replace("HH24", "%H");
420        result = result.replace("HH", "%H");
421        result = result.replace("SSSSSS", "%f");
422        result = result.replace("SS", "%S");
423        result = result.replace("MI", "%M");
424        result
425    }
426}