Skip to main content

polyglot_sql/dialects/
teradata.rs

1//! Teradata Dialect
2//!
3//! Teradata-specific transformations based on sqlglot patterns.
4//! Teradata has unique syntax including ** for exponentiation,
5//! TOP instead of LIMIT, and TRYCAST for safe casting.
6
7use super::{DialectImpl, DialectType};
8use crate::error::Result;
9use crate::expressions::{AggFunc, Case, Cast, Expression, Function, UnaryFunc, VarArgFunc};
10use crate::generator::GeneratorConfig;
11use crate::tokens::TokenizerConfig;
12
13/// Teradata dialect
14pub struct TeradataDialect;
15
16impl DialectImpl for TeradataDialect {
17    fn dialect_type(&self) -> DialectType {
18        DialectType::Teradata
19    }
20
21    fn tokenizer_config(&self) -> TokenizerConfig {
22        let mut config = TokenizerConfig::default();
23        // Teradata uses double quotes for identifiers
24        config.identifiers.insert('"', '"');
25        // Teradata does NOT support nested comments
26        config.nested_comments = false;
27        // Teradata-specific keywords and operators
28        config
29            .keywords
30            .insert("SEL".to_string(), crate::tokens::TokenType::Select);
31        config
32            .keywords
33            .insert("UPD".to_string(), crate::tokens::TokenType::Update);
34        config
35            .keywords
36            .insert("DEL".to_string(), crate::tokens::TokenType::Delete);
37        config
38            .keywords
39            .insert("INS".to_string(), crate::tokens::TokenType::Insert);
40        config
41            .keywords
42            .insert("SAMPLE".to_string(), crate::tokens::TokenType::Sample);
43        config
44            .keywords
45            .insert("LOCKING".to_string(), crate::tokens::TokenType::Lock);
46        config
47            .keywords
48            .insert("HELP".to_string(), crate::tokens::TokenType::Command);
49        config
50            .keywords
51            .insert("COLLECT".to_string(), crate::tokens::TokenType::Command);
52        config
53            .keywords
54            .insert("EQ".to_string(), crate::tokens::TokenType::Eq);
55        config
56            .keywords
57            .insert("NE".to_string(), crate::tokens::TokenType::Neq);
58        config
59            .keywords
60            .insert("GE".to_string(), crate::tokens::TokenType::Gte);
61        config
62            .keywords
63            .insert("GT".to_string(), crate::tokens::TokenType::Gt);
64        config
65            .keywords
66            .insert("LE".to_string(), crate::tokens::TokenType::Lte);
67        config
68            .keywords
69            .insert("LT".to_string(), crate::tokens::TokenType::Lt);
70        config
71            .keywords
72            .insert("MOD".to_string(), crate::tokens::TokenType::Mod);
73        config
74            .keywords
75            .insert("BYTEINT".to_string(), crate::tokens::TokenType::SmallInt);
76        config.keywords.insert(
77            "ST_GEOMETRY".to_string(),
78            crate::tokens::TokenType::Geometry,
79        );
80        // Teradata does not support % as modulo operator
81        config.single_tokens.remove(&'%');
82        // Teradata treats 0x prefix as hex string literals
83        config.hex_number_strings = true;
84        config
85    }
86
87    fn generator_config(&self) -> GeneratorConfig {
88        use crate::generator::IdentifierQuoteStyle;
89        GeneratorConfig {
90            identifier_quote: '"',
91            identifier_quote_style: IdentifierQuoteStyle::DOUBLE_QUOTE,
92            dialect: Some(DialectType::Teradata),
93            tablesample_keywords: "SAMPLE",
94            tablesample_requires_parens: false,
95            tz_to_with_time_zone: true,
96            ..Default::default()
97        }
98    }
99
100    fn transform_expr(&self, expr: Expression) -> Result<Expression> {
101        match expr {
102            // IFNULL -> COALESCE in Teradata
103            Expression::IfNull(f) => Ok(Expression::Coalesce(Box::new(VarArgFunc {
104                original_name: None,
105                expressions: vec![f.this, f.expression],
106                inferred_type: None,
107            }))),
108
109            // NVL -> COALESCE in Teradata
110            Expression::Nvl(f) => Ok(Expression::Coalesce(Box::new(VarArgFunc {
111                original_name: None,
112                expressions: vec![f.this, f.expression],
113                inferred_type: None,
114            }))),
115
116            // Coalesce with original_name (e.g., IFNULL parsed as Coalesce) -> clear original_name
117            Expression::Coalesce(mut f) => {
118                f.original_name = None;
119                Ok(Expression::Coalesce(f))
120            }
121
122            // TryCast -> TRYCAST in Teradata (native)
123            Expression::TryCast(c) => Ok(Expression::TryCast(c)),
124
125            // SafeCast -> TRYCAST in Teradata
126            Expression::SafeCast(c) => Ok(Expression::TryCast(c)),
127
128            // CountIf -> SUM(CASE WHEN condition THEN 1 ELSE 0 END)
129            Expression::CountIf(f) => {
130                let case_expr = Expression::Case(Box::new(Case {
131                    operand: None,
132                    whens: vec![(f.this.clone(), Expression::number(1))],
133                    else_: Some(Expression::number(0)),
134                    comments: Vec::new(),
135                    inferred_type: None,
136                }));
137                Ok(Expression::Sum(Box::new(AggFunc {
138                    ignore_nulls: None,
139                    having_max: None,
140                    this: case_expr,
141                    distinct: f.distinct,
142                    filter: f.filter,
143                    order_by: Vec::new(),
144                    name: None,
145                    limit: None,
146                    inferred_type: None,
147                })))
148            }
149
150            // RAND -> RANDOM in Teradata (but preserve lower/upper for RANDOM(l, u))
151            Expression::Rand(r) => {
152                if r.lower.is_some() || r.upper.is_some() {
153                    // Keep as Rand with lower/upper for Teradata RANDOM(l, u)
154                    Ok(Expression::Rand(r))
155                } else {
156                    Ok(Expression::Random(crate::expressions::Random))
157                }
158            }
159
160            // Generic function transformations
161            Expression::Function(f) => self.transform_function(*f),
162
163            // Generic aggregate function transformations
164            Expression::AggregateFunction(f) => self.transform_aggregate_function(f),
165
166            // Cast transformations
167            Expression::Cast(c) => self.transform_cast(*c),
168
169            // Pass through everything else
170            _ => Ok(expr),
171        }
172    }
173}
174
175impl TeradataDialect {
176    fn transform_function(&self, f: Function) -> Result<Expression> {
177        let name_upper = f.name.to_uppercase();
178        match name_upper.as_str() {
179            // IFNULL -> COALESCE
180            "IFNULL" if f.args.len() == 2 => Ok(Expression::Coalesce(Box::new(VarArgFunc {
181                original_name: None,
182                expressions: f.args,
183                inferred_type: None,
184            }))),
185
186            // NVL -> COALESCE
187            "NVL" if f.args.len() == 2 => Ok(Expression::Coalesce(Box::new(VarArgFunc {
188                original_name: None,
189                expressions: f.args,
190                inferred_type: None,
191            }))),
192
193            // ISNULL -> COALESCE
194            "ISNULL" if f.args.len() == 2 => Ok(Expression::Coalesce(Box::new(VarArgFunc {
195                original_name: None,
196                expressions: f.args,
197                inferred_type: None,
198            }))),
199
200            // NOW -> CURRENT_TIMESTAMP
201            "NOW" => Ok(Expression::CurrentTimestamp(
202                crate::expressions::CurrentTimestamp {
203                    precision: None,
204                    sysdate: false,
205                },
206            )),
207
208            // GETDATE -> CURRENT_TIMESTAMP
209            "GETDATE" => Ok(Expression::CurrentTimestamp(
210                crate::expressions::CurrentTimestamp {
211                    precision: None,
212                    sysdate: false,
213                },
214            )),
215
216            // RAND -> RANDOM in Teradata
217            "RAND" => Ok(Expression::Random(crate::expressions::Random)),
218
219            // LEN -> CHARACTER_LENGTH in Teradata
220            "LEN" if f.args.len() == 1 => Ok(Expression::Length(Box::new(UnaryFunc::new(
221                f.args.into_iter().next().unwrap(),
222            )))),
223
224            // LENGTH -> CHARACTER_LENGTH in Teradata
225            "LENGTH" if f.args.len() == 1 => Ok(Expression::Length(Box::new(UnaryFunc::new(
226                f.args.into_iter().next().unwrap(),
227            )))),
228
229            // CHARINDEX -> INSTR in Teradata (with swapped args)
230            "CHARINDEX" if f.args.len() >= 2 => {
231                let mut args = f.args;
232                let substring = args.remove(0);
233                let string = args.remove(0);
234                Ok(Expression::Function(Box::new(Function::new(
235                    "INSTR".to_string(),
236                    vec![string, substring],
237                ))))
238            }
239
240            // STRPOS -> INSTR in Teradata
241            "STRPOS" if f.args.len() >= 2 => Ok(Expression::Function(Box::new(Function::new(
242                "INSTR".to_string(),
243                f.args,
244            )))),
245
246            // LOCATE -> INSTR in Teradata (with swapped args)
247            "LOCATE" if f.args.len() >= 2 => {
248                let mut args = f.args;
249                let substring = args.remove(0);
250                let string = args.remove(0);
251                Ok(Expression::Function(Box::new(Function::new(
252                    "INSTR".to_string(),
253                    vec![string, substring],
254                ))))
255            }
256
257            // ARRAY_LENGTH -> CARDINALITY in Teradata
258            "ARRAY_LENGTH" if f.args.len() == 1 => Ok(Expression::Function(Box::new(
259                Function::new("CARDINALITY".to_string(), f.args),
260            ))),
261
262            // SIZE -> CARDINALITY in Teradata
263            "SIZE" if f.args.len() == 1 => Ok(Expression::Function(Box::new(Function::new(
264                "CARDINALITY".to_string(),
265                f.args,
266            )))),
267
268            // SUBSTR -> SUBSTRING
269            "SUBSTR" => Ok(Expression::Function(Box::new(Function::new(
270                "SUBSTRING".to_string(),
271                f.args,
272            )))),
273
274            // DATE_FORMAT -> TO_CHAR in Teradata
275            "DATE_FORMAT" if f.args.len() >= 2 => Ok(Expression::Function(Box::new(
276                Function::new("TO_CHAR".to_string(), f.args),
277            ))),
278
279            // strftime -> TO_CHAR in Teradata
280            "STRFTIME" if f.args.len() >= 2 => {
281                let mut args = f.args;
282                let format = args.remove(0);
283                let date = args.remove(0);
284                Ok(Expression::Function(Box::new(Function::new(
285                    "TO_CHAR".to_string(),
286                    vec![date, format],
287                ))))
288            }
289
290            // GREATEST is native in Teradata
291            "GREATEST" => Ok(Expression::Function(Box::new(f))),
292
293            // LEAST is native in Teradata
294            "LEAST" => Ok(Expression::Function(Box::new(f))),
295
296            // Pass through everything else
297            _ => Ok(Expression::Function(Box::new(f))),
298        }
299    }
300
301    fn transform_aggregate_function(
302        &self,
303        f: Box<crate::expressions::AggregateFunction>,
304    ) -> Result<Expression> {
305        let name_upper = f.name.to_uppercase();
306        match name_upper.as_str() {
307            // COUNT_IF -> SUM(CASE WHEN...)
308            "COUNT_IF" if !f.args.is_empty() => {
309                let condition = f.args.into_iter().next().unwrap();
310                let case_expr = Expression::Case(Box::new(Case {
311                    operand: None,
312                    whens: vec![(condition, Expression::number(1))],
313                    else_: Some(Expression::number(0)),
314                    comments: Vec::new(),
315                    inferred_type: None,
316                }));
317                Ok(Expression::Sum(Box::new(AggFunc {
318                    ignore_nulls: None,
319                    having_max: None,
320                    this: case_expr,
321                    distinct: f.distinct,
322                    filter: f.filter,
323                    order_by: Vec::new(),
324                    name: None,
325                    limit: None,
326                    inferred_type: None,
327                })))
328            }
329
330            // MAX_BY is native in Teradata
331            "MAX_BY" => Ok(Expression::AggregateFunction(f)),
332
333            // MIN_BY is native in Teradata
334            "MIN_BY" => Ok(Expression::AggregateFunction(f)),
335
336            // Pass through everything else
337            _ => Ok(Expression::AggregateFunction(f)),
338        }
339    }
340
341    fn transform_cast(&self, c: Cast) -> Result<Expression> {
342        // Teradata CAST(x AS DATE FORMAT 'fmt') -> StrToDate(x, converted_fmt)
343        // Teradata CAST(x AS TIMESTAMP FORMAT 'fmt') -> StrToTime(x, converted_fmt)
344        if let Some(format_expr) = &c.format {
345            let is_date = matches!(c.to, crate::expressions::DataType::Date);
346            let is_timestamp = matches!(c.to, crate::expressions::DataType::Timestamp { .. });
347
348            if is_date || is_timestamp {
349                // Extract the format string from the expression
350                let fmt_str = match format_expr.as_ref() {
351                    Expression::Literal(crate::expressions::Literal::String(s)) => Some(s.clone()),
352                    _ => None,
353                };
354
355                if let Some(teradata_fmt) = fmt_str {
356                    // Convert Teradata format to strftime format
357                    let strftime_fmt = Self::teradata_to_strftime(&teradata_fmt);
358
359                    if is_date {
360                        return Ok(Expression::StrToDate(Box::new(
361                            crate::expressions::StrToDate {
362                                this: Box::new(c.this),
363                                format: Some(strftime_fmt),
364                                safe: None,
365                            },
366                        )));
367                    } else {
368                        return Ok(Expression::StrToTime(Box::new(
369                            crate::expressions::StrToTime {
370                                this: Box::new(c.this),
371                                format: strftime_fmt,
372                                zone: None,
373                                safe: None,
374                                target_type: None,
375                            },
376                        )));
377                    }
378                }
379            }
380        }
381        // Teradata type mappings are handled in the generator
382        Ok(Expression::Cast(Box::new(c)))
383    }
384
385    /// Convert Teradata date/time format string to strftime format
386    fn teradata_to_strftime(fmt: &str) -> String {
387        // Teradata TIME_MAPPING: longest tokens first to avoid partial matches
388        let mut result = fmt.to_string();
389        // Order matters: replace longer tokens first
390        result = result.replace("YYYY", "%Y");
391        result = result.replace("Y4", "%Y");
392        result = result.replace("YY", "%y");
393        result = result.replace("MMMM", "%B");
394        result = result.replace("MMM", "%b");
395        result = result.replace("MM", "%m");
396        result = result.replace("M4", "%B");
397        result = result.replace("M3", "%b");
398        result = result.replace("EEEE", "%A");
399        result = result.replace("EEE", "%a");
400        result = result.replace("EE", "%a");
401        result = result.replace("E4", "%A");
402        result = result.replace("E3", "%a");
403        result = result.replace("DDD", "%j");
404        result = result.replace("DD", "%d");
405        result = result.replace("D3", "%j");
406        result = result.replace("HH24", "%H");
407        result = result.replace("HH", "%H");
408        result = result.replace("SSSSSS", "%f");
409        result = result.replace("SS", "%S");
410        result = result.replace("MI", "%M");
411        result
412    }
413}