Skip to main content

polyglot_sql/dialects/
teradata.rs

1//! Teradata Dialect
2//!
3//! Teradata-specific transformations based on sqlglot patterns.
4//! Teradata has unique syntax including ** for exponentiation,
5//! TOP instead of LIMIT, and TRYCAST for safe casting.
6
7use super::{DialectImpl, DialectType};
8use crate::error::Result;
9use crate::expressions::{AggFunc, Case, Cast, Expression, Function, UnaryFunc, VarArgFunc};
10use crate::generator::GeneratorConfig;
11use crate::tokens::TokenizerConfig;
12
13/// Teradata dialect
14pub struct TeradataDialect;
15
16impl DialectImpl for TeradataDialect {
17    fn dialect_type(&self) -> DialectType {
18        DialectType::Teradata
19    }
20
21    fn tokenizer_config(&self) -> TokenizerConfig {
22        let mut config = TokenizerConfig::default();
23        // Teradata uses double quotes for identifiers
24        config.identifiers.insert('"', '"');
25        // Teradata does NOT support nested comments
26        config.nested_comments = false;
27        // Teradata-specific keywords and operators
28        config.keywords.insert("SEL".to_string(), crate::tokens::TokenType::Select);
29        config.keywords.insert("UPD".to_string(), crate::tokens::TokenType::Update);
30        config.keywords.insert("DEL".to_string(), crate::tokens::TokenType::Delete);
31        config.keywords.insert("INS".to_string(), crate::tokens::TokenType::Insert);
32        config.keywords.insert("SAMPLE".to_string(), crate::tokens::TokenType::Sample);
33        config.keywords.insert("LOCKING".to_string(), crate::tokens::TokenType::Lock);
34        config.keywords.insert("HELP".to_string(), crate::tokens::TokenType::Command);
35        config.keywords.insert("COLLECT".to_string(), crate::tokens::TokenType::Command);
36        config.keywords.insert("EQ".to_string(), crate::tokens::TokenType::Eq);
37        config.keywords.insert("NE".to_string(), crate::tokens::TokenType::Neq);
38        config.keywords.insert("GE".to_string(), crate::tokens::TokenType::Gte);
39        config.keywords.insert("GT".to_string(), crate::tokens::TokenType::Gt);
40        config.keywords.insert("LE".to_string(), crate::tokens::TokenType::Lte);
41        config.keywords.insert("LT".to_string(), crate::tokens::TokenType::Lt);
42        config.keywords.insert("MOD".to_string(), crate::tokens::TokenType::Mod);
43        config.keywords.insert("BYTEINT".to_string(), crate::tokens::TokenType::SmallInt);
44        config.keywords.insert("ST_GEOMETRY".to_string(), crate::tokens::TokenType::Geometry);
45        // Teradata does not support % as modulo operator
46        config.single_tokens.remove(&'%');
47        // Teradata treats 0x prefix as hex string literals
48        config.hex_number_strings = true;
49        config
50    }
51
52    fn generator_config(&self) -> GeneratorConfig {
53        use crate::generator::IdentifierQuoteStyle;
54        GeneratorConfig {
55            identifier_quote: '"',
56            identifier_quote_style: IdentifierQuoteStyle::DOUBLE_QUOTE,
57            dialect: Some(DialectType::Teradata),
58            tablesample_keywords: "SAMPLE",
59            tablesample_requires_parens: false,
60            tz_to_with_time_zone: true,
61            ..Default::default()
62        }
63    }
64
65    fn transform_expr(&self, expr: Expression) -> Result<Expression> {
66        match expr {
67            // IFNULL -> COALESCE in Teradata
68            Expression::IfNull(f) => Ok(Expression::Coalesce(Box::new(VarArgFunc { original_name: None,
69                expressions: vec![f.this, f.expression],
70            }))),
71
72            // NVL -> COALESCE in Teradata
73            Expression::Nvl(f) => Ok(Expression::Coalesce(Box::new(VarArgFunc { original_name: None,
74                expressions: vec![f.this, f.expression],
75            }))),
76
77            // Coalesce with original_name (e.g., IFNULL parsed as Coalesce) -> clear original_name
78            Expression::Coalesce(mut f) => {
79                f.original_name = None;
80                Ok(Expression::Coalesce(f))
81            }
82
83            // TryCast -> TRYCAST in Teradata (native)
84            Expression::TryCast(c) => Ok(Expression::TryCast(c)),
85
86            // SafeCast -> TRYCAST in Teradata
87            Expression::SafeCast(c) => Ok(Expression::TryCast(c)),
88
89            // CountIf -> SUM(CASE WHEN condition THEN 1 ELSE 0 END)
90            Expression::CountIf(f) => {
91                let case_expr = Expression::Case(Box::new(Case {
92                    operand: None,
93                    whens: vec![(f.this.clone(), Expression::number(1))],
94                    else_: Some(Expression::number(0)),
95                }));
96                Ok(Expression::Sum(Box::new(AggFunc { ignore_nulls: None, having_max: None,
97                    this: case_expr,
98                    distinct: f.distinct,
99                    filter: f.filter,
100                    order_by: Vec::new(),
101                name: None,
102                limit: None,
103                })))
104            }
105
106            // RAND -> RANDOM in Teradata (but preserve lower/upper for RANDOM(l, u))
107            Expression::Rand(r) => {
108                if r.lower.is_some() || r.upper.is_some() {
109                    // Keep as Rand with lower/upper for Teradata RANDOM(l, u)
110                    Ok(Expression::Rand(r))
111                } else {
112                    Ok(Expression::Random(crate::expressions::Random))
113                }
114            }
115
116            // Generic function transformations
117            Expression::Function(f) => self.transform_function(*f),
118
119            // Generic aggregate function transformations
120            Expression::AggregateFunction(f) => self.transform_aggregate_function(f),
121
122            // Cast transformations
123            Expression::Cast(c) => self.transform_cast(*c),
124
125            // Pass through everything else
126            _ => Ok(expr),
127        }
128    }
129}
130
131impl TeradataDialect {
132    fn transform_function(&self, f: Function) -> Result<Expression> {
133        let name_upper = f.name.to_uppercase();
134        match name_upper.as_str() {
135            // IFNULL -> COALESCE
136            "IFNULL" if f.args.len() == 2 => Ok(Expression::Coalesce(Box::new(VarArgFunc { original_name: None,
137                expressions: f.args,
138            }))),
139
140            // NVL -> COALESCE
141            "NVL" if f.args.len() == 2 => Ok(Expression::Coalesce(Box::new(VarArgFunc { original_name: None,
142                expressions: f.args,
143            }))),
144
145            // ISNULL -> COALESCE
146            "ISNULL" if f.args.len() == 2 => Ok(Expression::Coalesce(Box::new(VarArgFunc { original_name: None,
147                expressions: f.args,
148            }))),
149
150            // NOW -> CURRENT_TIMESTAMP
151            "NOW" => Ok(Expression::CurrentTimestamp(
152                crate::expressions::CurrentTimestamp { precision: None, sysdate: false },
153            )),
154
155            // GETDATE -> CURRENT_TIMESTAMP
156            "GETDATE" => Ok(Expression::CurrentTimestamp(
157                crate::expressions::CurrentTimestamp { precision: None, sysdate: false },
158            )),
159
160            // RAND -> RANDOM in Teradata
161            "RAND" => Ok(Expression::Random(crate::expressions::Random)),
162
163            // LEN -> CHARACTER_LENGTH in Teradata
164            "LEN" if f.args.len() == 1 => Ok(Expression::Length(Box::new(UnaryFunc::new(
165                f.args.into_iter().next().unwrap(),
166            )))),
167
168            // LENGTH -> CHARACTER_LENGTH in Teradata
169            "LENGTH" if f.args.len() == 1 => Ok(Expression::Length(Box::new(UnaryFunc::new(
170                f.args.into_iter().next().unwrap(),
171            )))),
172
173            // CHARINDEX -> INSTR in Teradata (with swapped args)
174            "CHARINDEX" if f.args.len() >= 2 => {
175                let mut args = f.args;
176                let substring = args.remove(0);
177                let string = args.remove(0);
178                Ok(Expression::Function(Box::new(Function::new(
179                    "INSTR".to_string(),
180                    vec![string, substring],
181                ))))
182            }
183
184            // STRPOS -> INSTR in Teradata
185            "STRPOS" if f.args.len() >= 2 => Ok(Expression::Function(Box::new(Function::new(
186                "INSTR".to_string(),
187                f.args,
188            )))),
189
190            // LOCATE -> INSTR in Teradata (with swapped args)
191            "LOCATE" if f.args.len() >= 2 => {
192                let mut args = f.args;
193                let substring = args.remove(0);
194                let string = args.remove(0);
195                Ok(Expression::Function(Box::new(Function::new(
196                    "INSTR".to_string(),
197                    vec![string, substring],
198                ))))
199            }
200
201            // ARRAY_LENGTH -> CARDINALITY in Teradata
202            "ARRAY_LENGTH" if f.args.len() == 1 => Ok(Expression::Function(Box::new(
203                Function::new("CARDINALITY".to_string(), f.args),
204            ))),
205
206            // SIZE -> CARDINALITY in Teradata
207            "SIZE" if f.args.len() == 1 => Ok(Expression::Function(Box::new(Function::new(
208                "CARDINALITY".to_string(),
209                f.args,
210            )))),
211
212            // SUBSTR -> SUBSTRING
213            "SUBSTR" => Ok(Expression::Function(Box::new(Function::new(
214                "SUBSTRING".to_string(),
215                f.args,
216            )))),
217
218            // DATE_FORMAT -> TO_CHAR in Teradata
219            "DATE_FORMAT" if f.args.len() >= 2 => Ok(Expression::Function(Box::new(
220                Function::new("TO_CHAR".to_string(), f.args),
221            ))),
222
223            // strftime -> TO_CHAR in Teradata
224            "STRFTIME" if f.args.len() >= 2 => {
225                let mut args = f.args;
226                let format = args.remove(0);
227                let date = args.remove(0);
228                Ok(Expression::Function(Box::new(Function::new(
229                    "TO_CHAR".to_string(),
230                    vec![date, format],
231                ))))
232            }
233
234            // GREATEST is native in Teradata
235            "GREATEST" => Ok(Expression::Function(Box::new(f))),
236
237            // LEAST is native in Teradata
238            "LEAST" => Ok(Expression::Function(Box::new(f))),
239
240            // Pass through everything else
241            _ => Ok(Expression::Function(Box::new(f))),
242        }
243    }
244
245    fn transform_aggregate_function(
246        &self,
247        f: Box<crate::expressions::AggregateFunction>,
248    ) -> Result<Expression> {
249        let name_upper = f.name.to_uppercase();
250        match name_upper.as_str() {
251            // COUNT_IF -> SUM(CASE WHEN...)
252            "COUNT_IF" if !f.args.is_empty() => {
253                let condition = f.args.into_iter().next().unwrap();
254                let case_expr = Expression::Case(Box::new(Case {
255                    operand: None,
256                    whens: vec![(condition, Expression::number(1))],
257                    else_: Some(Expression::number(0)),
258                }));
259                Ok(Expression::Sum(Box::new(AggFunc { ignore_nulls: None, having_max: None,
260                    this: case_expr,
261                    distinct: f.distinct,
262                    filter: f.filter,
263                    order_by: Vec::new(),
264                name: None,
265                limit: None,
266                })))
267            }
268
269            // MAX_BY is native in Teradata
270            "MAX_BY" => Ok(Expression::AggregateFunction(f)),
271
272            // MIN_BY is native in Teradata
273            "MIN_BY" => Ok(Expression::AggregateFunction(f)),
274
275            // Pass through everything else
276            _ => Ok(Expression::AggregateFunction(f)),
277        }
278    }
279
280    fn transform_cast(&self, c: Cast) -> Result<Expression> {
281        // Teradata CAST(x AS DATE FORMAT 'fmt') -> StrToDate(x, converted_fmt)
282        // Teradata CAST(x AS TIMESTAMP FORMAT 'fmt') -> StrToTime(x, converted_fmt)
283        if let Some(format_expr) = &c.format {
284            let is_date = matches!(c.to, crate::expressions::DataType::Date);
285            let is_timestamp = matches!(c.to, crate::expressions::DataType::Timestamp { .. });
286
287            if is_date || is_timestamp {
288                // Extract the format string from the expression
289                let fmt_str = match format_expr.as_ref() {
290                    Expression::Literal(crate::expressions::Literal::String(s)) => Some(s.clone()),
291                    _ => None,
292                };
293
294                if let Some(teradata_fmt) = fmt_str {
295                    // Convert Teradata format to strftime format
296                    let strftime_fmt = Self::teradata_to_strftime(&teradata_fmt);
297
298                    if is_date {
299                        return Ok(Expression::StrToDate(Box::new(crate::expressions::StrToDate {
300                            this: Box::new(c.this),
301                            format: Some(strftime_fmt),
302                            safe: None,
303                        })));
304                    } else {
305                        return Ok(Expression::StrToTime(Box::new(crate::expressions::StrToTime {
306                            this: Box::new(c.this),
307                            format: strftime_fmt,
308                            zone: None,
309                            safe: None,
310                            target_type: None,
311                        })));
312                    }
313                }
314            }
315        }
316        // Teradata type mappings are handled in the generator
317        Ok(Expression::Cast(Box::new(c)))
318    }
319
320    /// Convert Teradata date/time format string to strftime format
321    fn teradata_to_strftime(fmt: &str) -> String {
322        // Teradata TIME_MAPPING: longest tokens first to avoid partial matches
323        let mut result = fmt.to_string();
324        // Order matters: replace longer tokens first
325        result = result.replace("YYYY", "%Y");
326        result = result.replace("Y4", "%Y");
327        result = result.replace("YY", "%y");
328        result = result.replace("MMMM", "%B");
329        result = result.replace("MMM", "%b");
330        result = result.replace("MM", "%m");
331        result = result.replace("M4", "%B");
332        result = result.replace("M3", "%b");
333        result = result.replace("EEEE", "%A");
334        result = result.replace("EEE", "%a");
335        result = result.replace("EE", "%a");
336        result = result.replace("E4", "%A");
337        result = result.replace("E3", "%a");
338        result = result.replace("DDD", "%j");
339        result = result.replace("DD", "%d");
340        result = result.replace("D3", "%j");
341        result = result.replace("HH24", "%H");
342        result = result.replace("HH", "%H");
343        result = result.replace("SSSSSS", "%f");
344        result = result.replace("SS", "%S");
345        result = result.replace("MI", "%M");
346        result
347    }
348}