Skip to main content

polyglot_sql/dialects/
teradata.rs

1//! Teradata Dialect
2//!
3//! Teradata-specific transformations based on sqlglot patterns.
4//! Teradata has unique syntax including ** for exponentiation,
5//! TOP instead of LIMIT, and TRYCAST for safe casting.
6
7use super::{DialectImpl, DialectType};
8use crate::error::Result;
9use crate::expressions::{AggFunc, Case, Cast, Expression, Function, UnaryFunc, VarArgFunc};
10use crate::generator::GeneratorConfig;
11use crate::tokens::TokenizerConfig;
12
13/// Teradata dialect
14pub struct TeradataDialect;
15
16impl DialectImpl for TeradataDialect {
17    fn dialect_type(&self) -> DialectType {
18        DialectType::Teradata
19    }
20
21    fn tokenizer_config(&self) -> TokenizerConfig {
22        let mut config = TokenizerConfig::default();
23        // Teradata uses double quotes for identifiers
24        config.identifiers.insert('"', '"');
25        // Teradata does NOT support nested comments
26        config.nested_comments = false;
27        // Teradata-specific keywords and operators
28        config
29            .keywords
30            .insert("SEL".to_string(), crate::tokens::TokenType::Select);
31        config
32            .keywords
33            .insert("UPD".to_string(), crate::tokens::TokenType::Update);
34        config
35            .keywords
36            .insert("DEL".to_string(), crate::tokens::TokenType::Delete);
37        config
38            .keywords
39            .insert("INS".to_string(), crate::tokens::TokenType::Insert);
40        config
41            .keywords
42            .insert("SAMPLE".to_string(), crate::tokens::TokenType::Sample);
43        config
44            .keywords
45            .insert("LOCKING".to_string(), crate::tokens::TokenType::Lock);
46        config
47            .keywords
48            .insert("HELP".to_string(), crate::tokens::TokenType::Command);
49        config
50            .keywords
51            .insert("COLLECT".to_string(), crate::tokens::TokenType::Command);
52        config
53            .keywords
54            .insert("EQ".to_string(), crate::tokens::TokenType::Eq);
55        config
56            .keywords
57            .insert("NE".to_string(), crate::tokens::TokenType::Neq);
58        config
59            .keywords
60            .insert("GE".to_string(), crate::tokens::TokenType::Gte);
61        config
62            .keywords
63            .insert("GT".to_string(), crate::tokens::TokenType::Gt);
64        config
65            .keywords
66            .insert("LE".to_string(), crate::tokens::TokenType::Lte);
67        config
68            .keywords
69            .insert("LT".to_string(), crate::tokens::TokenType::Lt);
70        config
71            .keywords
72            .insert("MOD".to_string(), crate::tokens::TokenType::Mod);
73        config
74            .keywords
75            .insert("BYTEINT".to_string(), crate::tokens::TokenType::SmallInt);
76        config.keywords.insert(
77            "ST_GEOMETRY".to_string(),
78            crate::tokens::TokenType::Geometry,
79        );
80        // Teradata does not support % as modulo operator
81        config.single_tokens.remove(&'%');
82        // Teradata treats 0x prefix as hex string literals
83        config.hex_number_strings = true;
84        config
85    }
86
87    fn generator_config(&self) -> GeneratorConfig {
88        use crate::generator::IdentifierQuoteStyle;
89        GeneratorConfig {
90            identifier_quote: '"',
91            identifier_quote_style: IdentifierQuoteStyle::DOUBLE_QUOTE,
92            dialect: Some(DialectType::Teradata),
93            tablesample_keywords: "SAMPLE",
94            tablesample_requires_parens: false,
95            tz_to_with_time_zone: true,
96            ..Default::default()
97        }
98    }
99
100    fn transform_expr(&self, expr: Expression) -> Result<Expression> {
101        match expr {
102            // IFNULL -> COALESCE in Teradata
103            Expression::IfNull(f) => Ok(Expression::Coalesce(Box::new(VarArgFunc {
104                original_name: None,
105                expressions: vec![f.this, f.expression],
106            }))),
107
108            // NVL -> COALESCE in Teradata
109            Expression::Nvl(f) => Ok(Expression::Coalesce(Box::new(VarArgFunc {
110                original_name: None,
111                expressions: vec![f.this, f.expression],
112            }))),
113
114            // Coalesce with original_name (e.g., IFNULL parsed as Coalesce) -> clear original_name
115            Expression::Coalesce(mut f) => {
116                f.original_name = None;
117                Ok(Expression::Coalesce(f))
118            }
119
120            // TryCast -> TRYCAST in Teradata (native)
121            Expression::TryCast(c) => Ok(Expression::TryCast(c)),
122
123            // SafeCast -> TRYCAST in Teradata
124            Expression::SafeCast(c) => Ok(Expression::TryCast(c)),
125
126            // CountIf -> SUM(CASE WHEN condition THEN 1 ELSE 0 END)
127            Expression::CountIf(f) => {
128                let case_expr = Expression::Case(Box::new(Case {
129                    operand: None,
130                    whens: vec![(f.this.clone(), Expression::number(1))],
131                    else_: Some(Expression::number(0)),
132                    comments: Vec::new(),
133                }));
134                Ok(Expression::Sum(Box::new(AggFunc {
135                    ignore_nulls: None,
136                    having_max: None,
137                    this: case_expr,
138                    distinct: f.distinct,
139                    filter: f.filter,
140                    order_by: Vec::new(),
141                    name: None,
142                    limit: None,
143                })))
144            }
145
146            // RAND -> RANDOM in Teradata (but preserve lower/upper for RANDOM(l, u))
147            Expression::Rand(r) => {
148                if r.lower.is_some() || r.upper.is_some() {
149                    // Keep as Rand with lower/upper for Teradata RANDOM(l, u)
150                    Ok(Expression::Rand(r))
151                } else {
152                    Ok(Expression::Random(crate::expressions::Random))
153                }
154            }
155
156            // Generic function transformations
157            Expression::Function(f) => self.transform_function(*f),
158
159            // Generic aggregate function transformations
160            Expression::AggregateFunction(f) => self.transform_aggregate_function(f),
161
162            // Cast transformations
163            Expression::Cast(c) => self.transform_cast(*c),
164
165            // Pass through everything else
166            _ => Ok(expr),
167        }
168    }
169}
170
171impl TeradataDialect {
172    fn transform_function(&self, f: Function) -> Result<Expression> {
173        let name_upper = f.name.to_uppercase();
174        match name_upper.as_str() {
175            // IFNULL -> COALESCE
176            "IFNULL" if f.args.len() == 2 => Ok(Expression::Coalesce(Box::new(VarArgFunc {
177                original_name: None,
178                expressions: f.args,
179            }))),
180
181            // NVL -> COALESCE
182            "NVL" if f.args.len() == 2 => Ok(Expression::Coalesce(Box::new(VarArgFunc {
183                original_name: None,
184                expressions: f.args,
185            }))),
186
187            // ISNULL -> COALESCE
188            "ISNULL" if f.args.len() == 2 => Ok(Expression::Coalesce(Box::new(VarArgFunc {
189                original_name: None,
190                expressions: f.args,
191            }))),
192
193            // NOW -> CURRENT_TIMESTAMP
194            "NOW" => Ok(Expression::CurrentTimestamp(
195                crate::expressions::CurrentTimestamp {
196                    precision: None,
197                    sysdate: false,
198                },
199            )),
200
201            // GETDATE -> CURRENT_TIMESTAMP
202            "GETDATE" => Ok(Expression::CurrentTimestamp(
203                crate::expressions::CurrentTimestamp {
204                    precision: None,
205                    sysdate: false,
206                },
207            )),
208
209            // RAND -> RANDOM in Teradata
210            "RAND" => Ok(Expression::Random(crate::expressions::Random)),
211
212            // LEN -> CHARACTER_LENGTH in Teradata
213            "LEN" if f.args.len() == 1 => Ok(Expression::Length(Box::new(UnaryFunc::new(
214                f.args.into_iter().next().unwrap(),
215            )))),
216
217            // LENGTH -> CHARACTER_LENGTH in Teradata
218            "LENGTH" if f.args.len() == 1 => Ok(Expression::Length(Box::new(UnaryFunc::new(
219                f.args.into_iter().next().unwrap(),
220            )))),
221
222            // CHARINDEX -> INSTR in Teradata (with swapped args)
223            "CHARINDEX" if f.args.len() >= 2 => {
224                let mut args = f.args;
225                let substring = args.remove(0);
226                let string = args.remove(0);
227                Ok(Expression::Function(Box::new(Function::new(
228                    "INSTR".to_string(),
229                    vec![string, substring],
230                ))))
231            }
232
233            // STRPOS -> INSTR in Teradata
234            "STRPOS" if f.args.len() >= 2 => Ok(Expression::Function(Box::new(Function::new(
235                "INSTR".to_string(),
236                f.args,
237            )))),
238
239            // LOCATE -> INSTR in Teradata (with swapped args)
240            "LOCATE" if f.args.len() >= 2 => {
241                let mut args = f.args;
242                let substring = args.remove(0);
243                let string = args.remove(0);
244                Ok(Expression::Function(Box::new(Function::new(
245                    "INSTR".to_string(),
246                    vec![string, substring],
247                ))))
248            }
249
250            // ARRAY_LENGTH -> CARDINALITY in Teradata
251            "ARRAY_LENGTH" if f.args.len() == 1 => Ok(Expression::Function(Box::new(
252                Function::new("CARDINALITY".to_string(), f.args),
253            ))),
254
255            // SIZE -> CARDINALITY in Teradata
256            "SIZE" if f.args.len() == 1 => Ok(Expression::Function(Box::new(Function::new(
257                "CARDINALITY".to_string(),
258                f.args,
259            )))),
260
261            // SUBSTR -> SUBSTRING
262            "SUBSTR" => Ok(Expression::Function(Box::new(Function::new(
263                "SUBSTRING".to_string(),
264                f.args,
265            )))),
266
267            // DATE_FORMAT -> TO_CHAR in Teradata
268            "DATE_FORMAT" if f.args.len() >= 2 => Ok(Expression::Function(Box::new(
269                Function::new("TO_CHAR".to_string(), f.args),
270            ))),
271
272            // strftime -> TO_CHAR in Teradata
273            "STRFTIME" if f.args.len() >= 2 => {
274                let mut args = f.args;
275                let format = args.remove(0);
276                let date = args.remove(0);
277                Ok(Expression::Function(Box::new(Function::new(
278                    "TO_CHAR".to_string(),
279                    vec![date, format],
280                ))))
281            }
282
283            // GREATEST is native in Teradata
284            "GREATEST" => Ok(Expression::Function(Box::new(f))),
285
286            // LEAST is native in Teradata
287            "LEAST" => Ok(Expression::Function(Box::new(f))),
288
289            // Pass through everything else
290            _ => Ok(Expression::Function(Box::new(f))),
291        }
292    }
293
294    fn transform_aggregate_function(
295        &self,
296        f: Box<crate::expressions::AggregateFunction>,
297    ) -> Result<Expression> {
298        let name_upper = f.name.to_uppercase();
299        match name_upper.as_str() {
300            // COUNT_IF -> SUM(CASE WHEN...)
301            "COUNT_IF" if !f.args.is_empty() => {
302                let condition = f.args.into_iter().next().unwrap();
303                let case_expr = Expression::Case(Box::new(Case {
304                    operand: None,
305                    whens: vec![(condition, Expression::number(1))],
306                    else_: Some(Expression::number(0)),
307                    comments: Vec::new(),
308                }));
309                Ok(Expression::Sum(Box::new(AggFunc {
310                    ignore_nulls: None,
311                    having_max: None,
312                    this: case_expr,
313                    distinct: f.distinct,
314                    filter: f.filter,
315                    order_by: Vec::new(),
316                    name: None,
317                    limit: None,
318                })))
319            }
320
321            // MAX_BY is native in Teradata
322            "MAX_BY" => Ok(Expression::AggregateFunction(f)),
323
324            // MIN_BY is native in Teradata
325            "MIN_BY" => Ok(Expression::AggregateFunction(f)),
326
327            // Pass through everything else
328            _ => Ok(Expression::AggregateFunction(f)),
329        }
330    }
331
332    fn transform_cast(&self, c: Cast) -> Result<Expression> {
333        // Teradata CAST(x AS DATE FORMAT 'fmt') -> StrToDate(x, converted_fmt)
334        // Teradata CAST(x AS TIMESTAMP FORMAT 'fmt') -> StrToTime(x, converted_fmt)
335        if let Some(format_expr) = &c.format {
336            let is_date = matches!(c.to, crate::expressions::DataType::Date);
337            let is_timestamp = matches!(c.to, crate::expressions::DataType::Timestamp { .. });
338
339            if is_date || is_timestamp {
340                // Extract the format string from the expression
341                let fmt_str = match format_expr.as_ref() {
342                    Expression::Literal(crate::expressions::Literal::String(s)) => Some(s.clone()),
343                    _ => None,
344                };
345
346                if let Some(teradata_fmt) = fmt_str {
347                    // Convert Teradata format to strftime format
348                    let strftime_fmt = Self::teradata_to_strftime(&teradata_fmt);
349
350                    if is_date {
351                        return Ok(Expression::StrToDate(Box::new(
352                            crate::expressions::StrToDate {
353                                this: Box::new(c.this),
354                                format: Some(strftime_fmt),
355                                safe: None,
356                            },
357                        )));
358                    } else {
359                        return Ok(Expression::StrToTime(Box::new(
360                            crate::expressions::StrToTime {
361                                this: Box::new(c.this),
362                                format: strftime_fmt,
363                                zone: None,
364                                safe: None,
365                                target_type: None,
366                            },
367                        )));
368                    }
369                }
370            }
371        }
372        // Teradata type mappings are handled in the generator
373        Ok(Expression::Cast(Box::new(c)))
374    }
375
376    /// Convert Teradata date/time format string to strftime format
377    fn teradata_to_strftime(fmt: &str) -> String {
378        // Teradata TIME_MAPPING: longest tokens first to avoid partial matches
379        let mut result = fmt.to_string();
380        // Order matters: replace longer tokens first
381        result = result.replace("YYYY", "%Y");
382        result = result.replace("Y4", "%Y");
383        result = result.replace("YY", "%y");
384        result = result.replace("MMMM", "%B");
385        result = result.replace("MMM", "%b");
386        result = result.replace("MM", "%m");
387        result = result.replace("M4", "%B");
388        result = result.replace("M3", "%b");
389        result = result.replace("EEEE", "%A");
390        result = result.replace("EEE", "%a");
391        result = result.replace("EE", "%a");
392        result = result.replace("E4", "%A");
393        result = result.replace("E3", "%a");
394        result = result.replace("DDD", "%j");
395        result = result.replace("DD", "%d");
396        result = result.replace("D3", "%j");
397        result = result.replace("HH24", "%H");
398        result = result.replace("HH", "%H");
399        result = result.replace("SSSSSS", "%f");
400        result = result.replace("SS", "%S");
401        result = result.replace("MI", "%M");
402        result
403    }
404}