Skip to main content

polyglot_sql/dialects/
doris.rs

1//! Doris Dialect
2//!
3//! Apache Doris-specific transformations based on sqlglot patterns.
4//! Doris is MySQL-compatible with some extensions for analytics.
5
6use super::{DialectImpl, DialectType};
7use crate::error::Result;
8use crate::expressions::{
9    AggFunc, Case, Cast, Expression, Function, Interval, IntervalUnit, IntervalUnitSpec, VarArgFunc,
10};
11use crate::generator::GeneratorConfig;
12use crate::tokens::TokenizerConfig;
13
14/// Doris dialect
15pub struct DorisDialect;
16
17impl DialectImpl for DorisDialect {
18    fn dialect_type(&self) -> DialectType {
19        DialectType::Doris
20    }
21
22    fn tokenizer_config(&self) -> TokenizerConfig {
23        let mut config = TokenizerConfig::default();
24        // Doris uses backticks for identifiers (MySQL-style)
25        config.identifiers.insert('`', '`');
26        config.nested_comments = false;
27        config
28    }
29
30    fn generator_config(&self) -> GeneratorConfig {
31        use crate::generator::IdentifierQuoteStyle;
32        GeneratorConfig {
33            identifier_quote: '`',
34            identifier_quote_style: IdentifierQuoteStyle::BACKTICK,
35            dialect: Some(DialectType::Doris),
36            // Doris: COMMENT 'value' (naked property, no = sign)
37            schema_comment_with_eq: false,
38            // Doris: PROPERTIES ('key'='value') instead of WITH ('key'='value')
39            with_properties_prefix: "PROPERTIES",
40            ..Default::default()
41        }
42    }
43
44    fn transform_expr(&self, expr: Expression) -> Result<Expression> {
45        match expr {
46            // IFNULL is native in Doris (MySQL-style)
47            Expression::IfNull(f) => Ok(Expression::IfNull(f)),
48
49            // NVL -> IFNULL in Doris
50            Expression::Nvl(f) => Ok(Expression::IfNull(f)),
51
52            // TryCast -> not directly supported, use CAST
53            Expression::TryCast(c) => Ok(Expression::Cast(c)),
54
55            // SafeCast -> CAST in Doris
56            Expression::SafeCast(c) => Ok(Expression::Cast(c)),
57
58            // CountIf -> SUM(CASE WHEN condition THEN 1 ELSE 0 END)
59            Expression::CountIf(f) => {
60                let case_expr = Expression::Case(Box::new(Case {
61                    operand: None,
62                    whens: vec![(f.this.clone(), Expression::number(1))],
63                    else_: Some(Expression::number(0)),
64                    comments: Vec::new(),
65                    inferred_type: None,
66                }));
67                Ok(Expression::Sum(Box::new(AggFunc {
68                    ignore_nulls: None,
69                    having_max: None,
70                    this: case_expr,
71                    distinct: f.distinct,
72                    filter: f.filter,
73                    order_by: Vec::new(),
74                    name: None,
75                    limit: None,
76                    inferred_type: None,
77                })))
78            }
79
80            // RAND is native in Doris
81            Expression::Rand(r) => Ok(Expression::Rand(r)),
82
83            // REGEXP_LIKE -> REGEXP in Doris
84            Expression::RegexpLike(r) => {
85                let mut args = vec![r.this, r.pattern];
86                if let Some(flags) = r.flags {
87                    args.push(flags);
88                }
89                Ok(Expression::Function(Box::new(Function::new(
90                    "REGEXP".to_string(),
91                    args,
92                ))))
93            }
94
95            // Generic function transformations
96            Expression::Function(f) => self.transform_function(*f),
97
98            // Generic aggregate function transformations
99            Expression::AggregateFunction(f) => self.transform_aggregate_function(f),
100
101            // Cast transformations
102            Expression::Cast(c) => self.transform_cast(*c),
103
104            // Pass through everything else
105            _ => Ok(expr),
106        }
107    }
108}
109
110impl DorisDialect {
111    fn wrap_day_interval(expr: Expression) -> Expression {
112        Expression::Interval(Box::new(Interval {
113            this: Some(expr),
114            unit: Some(IntervalUnitSpec::Simple {
115                unit: IntervalUnit::Day,
116                use_plural: false,
117            }),
118        }))
119    }
120
121    fn transform_function(&self, f: Function) -> Result<Expression> {
122        let name_upper = f.name.to_uppercase();
123        match name_upper.as_str() {
124            // NVL -> IFNULL
125            "NVL" if f.args.len() == 2 => Ok(Expression::Function(Box::new(Function::new(
126                "IFNULL".to_string(),
127                f.args,
128            )))),
129
130            // ISNULL -> IFNULL
131            "ISNULL" if f.args.len() == 2 => Ok(Expression::Function(Box::new(Function::new(
132                "IFNULL".to_string(),
133                f.args,
134            )))),
135
136            // COALESCE is native in Doris
137            "COALESCE" => Ok(Expression::Coalesce(Box::new(VarArgFunc {
138                original_name: None,
139                expressions: f.args,
140                inferred_type: None,
141            }))),
142
143            // NOW is native in Doris
144            "NOW" => Ok(Expression::CurrentTimestamp(
145                crate::expressions::CurrentTimestamp {
146                    precision: None,
147                    sysdate: false,
148                },
149            )),
150
151            // GETDATE -> NOW in Doris
152            "GETDATE" => Ok(Expression::CurrentTimestamp(
153                crate::expressions::CurrentTimestamp {
154                    precision: None,
155                    sysdate: false,
156                },
157            )),
158
159            // CURRENT_TIMESTAMP is native
160            "CURRENT_TIMESTAMP" => Ok(Expression::CurrentTimestamp(
161                crate::expressions::CurrentTimestamp {
162                    precision: None,
163                    sysdate: false,
164                },
165            )),
166
167            // GROUP_CONCAT is native in Doris
168            "GROUP_CONCAT" => Ok(Expression::Function(Box::new(f))),
169
170            // STRING_AGG -> GROUP_CONCAT
171            "STRING_AGG" if !f.args.is_empty() => Ok(Expression::Function(Box::new(
172                Function::new("GROUP_CONCAT".to_string(), f.args),
173            ))),
174
175            // LISTAGG -> GROUP_CONCAT
176            "LISTAGG" if !f.args.is_empty() => Ok(Expression::Function(Box::new(Function::new(
177                "GROUP_CONCAT".to_string(),
178                f.args,
179            )))),
180
181            // SUBSTR is native in Doris
182            "SUBSTR" => Ok(Expression::Function(Box::new(f))),
183
184            // SUBSTRING is native in Doris
185            "SUBSTRING" => Ok(Expression::Function(Box::new(f))),
186
187            // LENGTH is native in Doris
188            "LENGTH" => Ok(Expression::Function(Box::new(f))),
189
190            // LEN -> LENGTH
191            "LEN" if f.args.len() == 1 => Ok(Expression::Function(Box::new(Function::new(
192                "LENGTH".to_string(),
193                f.args,
194            )))),
195
196            // CHARINDEX -> INSTR in Doris (with swapped args)
197            "CHARINDEX" if f.args.len() >= 2 => {
198                let mut args = f.args;
199                let substring = args.remove(0);
200                let string = args.remove(0);
201                Ok(Expression::Function(Box::new(Function::new(
202                    "INSTR".to_string(),
203                    vec![string, substring],
204                ))))
205            }
206
207            // STRPOS -> INSTR
208            "STRPOS" if f.args.len() >= 2 => Ok(Expression::Function(Box::new(Function::new(
209                "INSTR".to_string(),
210                f.args,
211            )))),
212
213            // LOCATE is native in Doris (keep as-is)
214            "LOCATE" => Ok(Expression::Function(Box::new(f))),
215
216            // INSTR is native in Doris
217            "INSTR" => Ok(Expression::Function(Box::new(f))),
218
219            // DATE_TRUNC is native in Doris
220            "DATE_TRUNC" => Ok(Expression::Function(Box::new(f))),
221
222            // Doris normalizes MySQL-style day shorthand to INTERVAL syntax.
223            "DATE_ADD" if f.args.len() == 2 && !matches!(f.args[1], Expression::Interval(_)) => {
224                let mut args = f.args;
225                let date = args.remove(0);
226                let days = args.remove(0);
227                Ok(Expression::Function(Box::new(Function::new(
228                    "DATE_ADD".to_string(),
229                    vec![date, Self::wrap_day_interval(days)],
230                ))))
231            }
232            "DATE_SUB" if f.args.len() == 2 && !matches!(f.args[1], Expression::Interval(_)) => {
233                let mut args = f.args;
234                let date = args.remove(0);
235                let days = args.remove(0);
236                Ok(Expression::Function(Box::new(Function::new(
237                    "DATE_SUB".to_string(),
238                    vec![date, Self::wrap_day_interval(days)],
239                ))))
240            }
241            "ADDDATE" if f.args.len() == 2 && !matches!(f.args[1], Expression::Interval(_)) => {
242                let mut args = f.args;
243                let date = args.remove(0);
244                let days = args.remove(0);
245                Ok(Expression::Function(Box::new(Function::new(
246                    "DATE_ADD".to_string(),
247                    vec![date, Self::wrap_day_interval(days)],
248                ))))
249            }
250            "SUBDATE" if f.args.len() == 2 && !matches!(f.args[1], Expression::Interval(_)) => {
251                let mut args = f.args;
252                let date = args.remove(0);
253                let days = args.remove(0);
254                Ok(Expression::Function(Box::new(Function::new(
255                    "DATE_SUB".to_string(),
256                    vec![date, Self::wrap_day_interval(days)],
257                ))))
258            }
259
260            // COLLECT_LIST is native in Doris
261            "COLLECT_LIST" => Ok(Expression::Function(Box::new(f))),
262
263            // COLLECT_SET is native in Doris
264            "COLLECT_SET" => Ok(Expression::Function(Box::new(f))),
265
266            // ARRAY_AGG -> COLLECT_LIST
267            "ARRAY_AGG" if !f.args.is_empty() => Ok(Expression::Function(Box::new(Function::new(
268                "COLLECT_LIST".to_string(),
269                f.args,
270            )))),
271
272            // TO_DATE is native in Doris
273            "TO_DATE" => Ok(Expression::Function(Box::new(f))),
274
275            // TO_TIMESTAMP -> FROM_UNIXTIME or similar
276            "TO_TIMESTAMP" => Ok(Expression::Function(Box::new(f))),
277
278            // DATE_FORMAT is native in Doris (MySQL-style)
279            "DATE_FORMAT" => Ok(Expression::Function(Box::new(f))),
280
281            // strftime -> DATE_FORMAT
282            "STRFTIME" if f.args.len() >= 2 => {
283                let mut args = f.args;
284                let format = args.remove(0);
285                let date = args.remove(0);
286                Ok(Expression::Function(Box::new(Function::new(
287                    "DATE_FORMAT".to_string(),
288                    vec![date, format],
289                ))))
290            }
291
292            // TO_CHAR -> DATE_FORMAT
293            "TO_CHAR" if f.args.len() >= 2 => Ok(Expression::Function(Box::new(Function::new(
294                "DATE_FORMAT".to_string(),
295                f.args,
296            )))),
297
298            // JSON_EXTRACT is native in Doris
299            "JSON_EXTRACT" => Ok(Expression::Function(Box::new(f))),
300
301            // GET_JSON_OBJECT -> JSON_EXTRACT
302            "GET_JSON_OBJECT" if f.args.len() == 2 => Ok(Expression::Function(Box::new(
303                Function::new("JSON_EXTRACT".to_string(), f.args),
304            ))),
305
306            // REGEXP is native in Doris
307            "REGEXP" => Ok(Expression::Function(Box::new(f))),
308
309            // RLIKE is native in Doris
310            "RLIKE" => Ok(Expression::Function(Box::new(f))),
311
312            // REGEXP_LIKE -> REGEXP
313            "REGEXP_LIKE" if f.args.len() >= 2 => Ok(Expression::Function(Box::new(
314                Function::new("REGEXP".to_string(), f.args),
315            ))),
316
317            // MONTHS_ADD is native in Doris
318            "MONTHS_ADD" => Ok(Expression::Function(Box::new(f))),
319
320            // ADD_MONTHS -> MONTHS_ADD
321            "ADD_MONTHS" if f.args.len() == 2 => Ok(Expression::Function(Box::new(Function::new(
322                "MONTHS_ADD".to_string(),
323                f.args,
324            )))),
325
326            // Pass through everything else
327            _ => Ok(Expression::Function(Box::new(f))),
328        }
329    }
330
331    fn transform_aggregate_function(
332        &self,
333        f: Box<crate::expressions::AggregateFunction>,
334    ) -> Result<Expression> {
335        let name_upper = f.name.to_uppercase();
336        match name_upper.as_str() {
337            // COUNT_IF -> SUM(CASE WHEN...)
338            "COUNT_IF" if !f.args.is_empty() => {
339                let condition = f.args.into_iter().next().unwrap();
340                let case_expr = Expression::Case(Box::new(Case {
341                    operand: None,
342                    whens: vec![(condition, Expression::number(1))],
343                    else_: Some(Expression::number(0)),
344                    comments: Vec::new(),
345                    inferred_type: None,
346                }));
347                Ok(Expression::Sum(Box::new(AggFunc {
348                    ignore_nulls: None,
349                    having_max: None,
350                    this: case_expr,
351                    distinct: f.distinct,
352                    filter: f.filter,
353                    order_by: Vec::new(),
354                    name: None,
355                    limit: None,
356                    inferred_type: None,
357                })))
358            }
359
360            // APPROX_COUNT_DISTINCT is native in Doris
361            "APPROX_COUNT_DISTINCT" => Ok(Expression::AggregateFunction(f)),
362
363            // HLL_COUNT_DISTINCT -> APPROX_COUNT_DISTINCT
364            "HLL_COUNT_DISTINCT" if !f.args.is_empty() => Ok(Expression::Function(Box::new(
365                Function::new("APPROX_COUNT_DISTINCT".to_string(), f.args),
366            ))),
367
368            // MAX_BY is native in Doris
369            "MAX_BY" => Ok(Expression::AggregateFunction(f)),
370
371            // MIN_BY is native in Doris
372            "MIN_BY" => Ok(Expression::AggregateFunction(f)),
373
374            // Pass through everything else
375            _ => Ok(Expression::AggregateFunction(f)),
376        }
377    }
378
379    fn transform_cast(&self, c: Cast) -> Result<Expression> {
380        // Doris type mappings are handled in the generator
381        Ok(Expression::Cast(Box::new(c)))
382    }
383}