Skip to main content

polyglot_sql/dialects/
doris.rs

1//! Doris Dialect
2//!
3//! Apache Doris-specific transformations based on sqlglot patterns.
4//! Doris is MySQL-compatible with some extensions for analytics.
5
6use super::{DialectImpl, DialectType};
7use crate::error::Result;
8use crate::expressions::{
9    AggFunc, Case, Cast, Expression, Function, Interval, IntervalUnit, IntervalUnitSpec, VarArgFunc,
10};
11#[cfg(feature = "generate")]
12use crate::generator::GeneratorConfig;
13use crate::tokens::TokenizerConfig;
14
15/// Doris dialect
16pub struct DorisDialect;
17
18impl DialectImpl for DorisDialect {
19    fn dialect_type(&self) -> DialectType {
20        DialectType::Doris
21    }
22
23    fn tokenizer_config(&self) -> TokenizerConfig {
24        let mut config = TokenizerConfig::default();
25        // Doris uses backticks for identifiers (MySQL-style)
26        config.identifiers.insert('`', '`');
27        config.nested_comments = false;
28        config
29    }
30
31    #[cfg(feature = "generate")]
32
33    fn generator_config(&self) -> GeneratorConfig {
34        use crate::generator::IdentifierQuoteStyle;
35        GeneratorConfig {
36            identifier_quote: '`',
37            identifier_quote_style: IdentifierQuoteStyle::BACKTICK,
38            dialect: Some(DialectType::Doris),
39            // Doris: COMMENT 'value' (naked property, no = sign)
40            schema_comment_with_eq: false,
41            // Doris: PROPERTIES ('key'='value') instead of WITH ('key'='value')
42            with_properties_prefix: "PROPERTIES",
43            ..Default::default()
44        }
45    }
46
47    #[cfg(feature = "transpile")]
48
49    fn transform_expr(&self, expr: Expression) -> Result<Expression> {
50        match expr {
51            // IFNULL is native in Doris (MySQL-style)
52            Expression::IfNull(f) => Ok(Expression::IfNull(f)),
53
54            // NVL -> IFNULL in Doris
55            Expression::Nvl(f) => Ok(Expression::IfNull(f)),
56
57            // TryCast -> not directly supported, use CAST
58            Expression::TryCast(c) => Ok(Expression::Cast(c)),
59
60            // SafeCast -> CAST in Doris
61            Expression::SafeCast(c) => Ok(Expression::Cast(c)),
62
63            // CountIf -> SUM(CASE WHEN condition THEN 1 ELSE 0 END)
64            Expression::CountIf(f) => {
65                let case_expr = Expression::Case(Box::new(Case {
66                    operand: None,
67                    whens: vec![(f.this.clone(), Expression::number(1))],
68                    else_: Some(Expression::number(0)),
69                    comments: Vec::new(),
70                    inferred_type: None,
71                }));
72                Ok(Expression::Sum(Box::new(AggFunc {
73                    ignore_nulls: None,
74                    having_max: None,
75                    this: case_expr,
76                    distinct: f.distinct,
77                    filter: f.filter,
78                    order_by: Vec::new(),
79                    name: None,
80                    limit: None,
81                    inferred_type: None,
82                })))
83            }
84
85            // RAND is native in Doris
86            Expression::Rand(r) => Ok(Expression::Rand(r)),
87
88            // REGEXP_LIKE -> REGEXP in Doris
89            Expression::RegexpLike(r) => {
90                let mut args = vec![r.this, r.pattern];
91                if let Some(flags) = r.flags {
92                    args.push(flags);
93                }
94                Ok(Expression::Function(Box::new(Function::new(
95                    "REGEXP".to_string(),
96                    args,
97                ))))
98            }
99
100            // Generic function transformations
101            Expression::Function(f) => self.transform_function(*f),
102
103            // Generic aggregate function transformations
104            Expression::AggregateFunction(f) => self.transform_aggregate_function(f),
105
106            // Cast transformations
107            Expression::Cast(c) => self.transform_cast(*c),
108
109            // Pass through everything else
110            _ => Ok(expr),
111        }
112    }
113}
114
115#[cfg(feature = "transpile")]
116impl DorisDialect {
117    fn wrap_day_interval(expr: Expression) -> Expression {
118        Expression::Interval(Box::new(Interval {
119            this: Some(expr),
120            unit: Some(IntervalUnitSpec::Simple {
121                unit: IntervalUnit::Day,
122                use_plural: false,
123            }),
124        }))
125    }
126
127    fn transform_function(&self, f: Function) -> Result<Expression> {
128        let name_upper = f.name.to_uppercase();
129        match name_upper.as_str() {
130            // NVL -> IFNULL
131            "NVL" if f.args.len() == 2 => Ok(Expression::Function(Box::new(Function::new(
132                "IFNULL".to_string(),
133                f.args,
134            )))),
135
136            // ISNULL -> IFNULL
137            "ISNULL" if f.args.len() == 2 => Ok(Expression::Function(Box::new(Function::new(
138                "IFNULL".to_string(),
139                f.args,
140            )))),
141
142            // COALESCE is native in Doris
143            "COALESCE" => Ok(Expression::Coalesce(Box::new(VarArgFunc {
144                original_name: None,
145                expressions: f.args,
146                inferred_type: None,
147            }))),
148
149            // NOW is native in Doris
150            "NOW" => Ok(Expression::CurrentTimestamp(
151                crate::expressions::CurrentTimestamp {
152                    precision: None,
153                    sysdate: false,
154                },
155            )),
156
157            // GETDATE -> NOW in Doris
158            "GETDATE" => Ok(Expression::CurrentTimestamp(
159                crate::expressions::CurrentTimestamp {
160                    precision: None,
161                    sysdate: false,
162                },
163            )),
164
165            // CURRENT_TIMESTAMP is native
166            "CURRENT_TIMESTAMP" => Ok(Expression::CurrentTimestamp(
167                crate::expressions::CurrentTimestamp {
168                    precision: None,
169                    sysdate: false,
170                },
171            )),
172
173            // GROUP_CONCAT is native in Doris
174            "GROUP_CONCAT" => Ok(Expression::Function(Box::new(f))),
175
176            // STRING_AGG -> GROUP_CONCAT
177            "STRING_AGG" if !f.args.is_empty() => Ok(Expression::Function(Box::new(
178                Function::new("GROUP_CONCAT".to_string(), f.args),
179            ))),
180
181            // LISTAGG -> GROUP_CONCAT
182            "LISTAGG" if !f.args.is_empty() => Ok(Expression::Function(Box::new(Function::new(
183                "GROUP_CONCAT".to_string(),
184                f.args,
185            )))),
186
187            // SUBSTR is native in Doris
188            "SUBSTR" => Ok(Expression::Function(Box::new(f))),
189
190            // SUBSTRING is native in Doris
191            "SUBSTRING" => Ok(Expression::Function(Box::new(f))),
192
193            // LENGTH is native in Doris
194            "LENGTH" => Ok(Expression::Function(Box::new(f))),
195
196            // LEN -> LENGTH
197            "LEN" if f.args.len() == 1 => Ok(Expression::Function(Box::new(Function::new(
198                "LENGTH".to_string(),
199                f.args,
200            )))),
201
202            // CHARINDEX -> INSTR in Doris (with swapped args)
203            "CHARINDEX" if f.args.len() >= 2 => {
204                let mut args = f.args;
205                let substring = args.remove(0);
206                let string = args.remove(0);
207                Ok(Expression::Function(Box::new(Function::new(
208                    "INSTR".to_string(),
209                    vec![string, substring],
210                ))))
211            }
212
213            // STRPOS -> INSTR
214            "STRPOS" if f.args.len() >= 2 => Ok(Expression::Function(Box::new(Function::new(
215                "INSTR".to_string(),
216                f.args,
217            )))),
218
219            // LOCATE is native in Doris (keep as-is)
220            "LOCATE" => Ok(Expression::Function(Box::new(f))),
221
222            // INSTR is native in Doris
223            "INSTR" => Ok(Expression::Function(Box::new(f))),
224
225            // DATE_TRUNC is native in Doris
226            "DATE_TRUNC" => Ok(Expression::Function(Box::new(f))),
227
228            // Doris normalizes MySQL-style day shorthand to INTERVAL syntax.
229            "DATE_ADD" if f.args.len() == 2 && !matches!(f.args[1], Expression::Interval(_)) => {
230                let mut args = f.args;
231                let date = args.remove(0);
232                let days = args.remove(0);
233                Ok(Expression::Function(Box::new(Function::new(
234                    "DATE_ADD".to_string(),
235                    vec![date, Self::wrap_day_interval(days)],
236                ))))
237            }
238            "DATE_SUB" if f.args.len() == 2 && !matches!(f.args[1], Expression::Interval(_)) => {
239                let mut args = f.args;
240                let date = args.remove(0);
241                let days = args.remove(0);
242                Ok(Expression::Function(Box::new(Function::new(
243                    "DATE_SUB".to_string(),
244                    vec![date, Self::wrap_day_interval(days)],
245                ))))
246            }
247            "ADDDATE" if f.args.len() == 2 && !matches!(f.args[1], Expression::Interval(_)) => {
248                let mut args = f.args;
249                let date = args.remove(0);
250                let days = args.remove(0);
251                Ok(Expression::Function(Box::new(Function::new(
252                    "DATE_ADD".to_string(),
253                    vec![date, Self::wrap_day_interval(days)],
254                ))))
255            }
256            "SUBDATE" if f.args.len() == 2 && !matches!(f.args[1], Expression::Interval(_)) => {
257                let mut args = f.args;
258                let date = args.remove(0);
259                let days = args.remove(0);
260                Ok(Expression::Function(Box::new(Function::new(
261                    "DATE_SUB".to_string(),
262                    vec![date, Self::wrap_day_interval(days)],
263                ))))
264            }
265
266            // COLLECT_LIST is native in Doris
267            "COLLECT_LIST" => Ok(Expression::Function(Box::new(f))),
268
269            // COLLECT_SET is native in Doris
270            "COLLECT_SET" => Ok(Expression::Function(Box::new(f))),
271
272            // ARRAY_AGG -> COLLECT_LIST
273            "ARRAY_AGG" if !f.args.is_empty() => Ok(Expression::Function(Box::new(Function::new(
274                "COLLECT_LIST".to_string(),
275                f.args,
276            )))),
277
278            // TO_DATE is native in Doris
279            "TO_DATE" => Ok(Expression::Function(Box::new(f))),
280
281            // TO_TIMESTAMP -> FROM_UNIXTIME or similar
282            "TO_TIMESTAMP" => Ok(Expression::Function(Box::new(f))),
283
284            // DATE_FORMAT is native in Doris (MySQL-style)
285            "DATE_FORMAT" => Ok(Expression::Function(Box::new(f))),
286
287            // strftime -> DATE_FORMAT
288            "STRFTIME" if f.args.len() >= 2 => {
289                let mut args = f.args;
290                let format = args.remove(0);
291                let date = args.remove(0);
292                Ok(Expression::Function(Box::new(Function::new(
293                    "DATE_FORMAT".to_string(),
294                    vec![date, format],
295                ))))
296            }
297
298            // TO_CHAR -> DATE_FORMAT
299            "TO_CHAR" if f.args.len() >= 2 => Ok(Expression::Function(Box::new(Function::new(
300                "DATE_FORMAT".to_string(),
301                f.args,
302            )))),
303
304            // JSON_EXTRACT is native in Doris
305            "JSON_EXTRACT" => Ok(Expression::Function(Box::new(f))),
306
307            // GET_JSON_OBJECT -> JSON_EXTRACT
308            "GET_JSON_OBJECT" if f.args.len() == 2 => Ok(Expression::Function(Box::new(
309                Function::new("JSON_EXTRACT".to_string(), f.args),
310            ))),
311
312            // REGEXP is native in Doris
313            "REGEXP" => Ok(Expression::Function(Box::new(f))),
314
315            // RLIKE is native in Doris
316            "RLIKE" => Ok(Expression::Function(Box::new(f))),
317
318            // REGEXP_LIKE -> REGEXP
319            "REGEXP_LIKE" if f.args.len() >= 2 => Ok(Expression::Function(Box::new(
320                Function::new("REGEXP".to_string(), f.args),
321            ))),
322
323            // MONTHS_ADD is native in Doris
324            "MONTHS_ADD" => Ok(Expression::Function(Box::new(f))),
325
326            // ADD_MONTHS -> MONTHS_ADD
327            "ADD_MONTHS" if f.args.len() == 2 => Ok(Expression::Function(Box::new(Function::new(
328                "MONTHS_ADD".to_string(),
329                f.args,
330            )))),
331
332            // Pass through everything else
333            _ => Ok(Expression::Function(Box::new(f))),
334        }
335    }
336
337    fn transform_aggregate_function(
338        &self,
339        f: Box<crate::expressions::AggregateFunction>,
340    ) -> Result<Expression> {
341        let name_upper = f.name.to_uppercase();
342        match name_upper.as_str() {
343            // COUNT_IF -> SUM(CASE WHEN...)
344            "COUNT_IF" if !f.args.is_empty() => {
345                let condition = f.args.into_iter().next().unwrap();
346                let case_expr = Expression::Case(Box::new(Case {
347                    operand: None,
348                    whens: vec![(condition, Expression::number(1))],
349                    else_: Some(Expression::number(0)),
350                    comments: Vec::new(),
351                    inferred_type: None,
352                }));
353                Ok(Expression::Sum(Box::new(AggFunc {
354                    ignore_nulls: None,
355                    having_max: None,
356                    this: case_expr,
357                    distinct: f.distinct,
358                    filter: f.filter,
359                    order_by: Vec::new(),
360                    name: None,
361                    limit: None,
362                    inferred_type: None,
363                })))
364            }
365
366            // APPROX_COUNT_DISTINCT is native in Doris
367            "APPROX_COUNT_DISTINCT" => Ok(Expression::AggregateFunction(f)),
368
369            // HLL_COUNT_DISTINCT -> APPROX_COUNT_DISTINCT
370            "HLL_COUNT_DISTINCT" if !f.args.is_empty() => Ok(Expression::Function(Box::new(
371                Function::new("APPROX_COUNT_DISTINCT".to_string(), f.args),
372            ))),
373
374            // MAX_BY is native in Doris
375            "MAX_BY" => Ok(Expression::AggregateFunction(f)),
376
377            // MIN_BY is native in Doris
378            "MIN_BY" => Ok(Expression::AggregateFunction(f)),
379
380            // Pass through everything else
381            _ => Ok(Expression::AggregateFunction(f)),
382        }
383    }
384
385    fn transform_cast(&self, c: Cast) -> Result<Expression> {
386        // Doris type mappings are handled in the generator
387        Ok(Expression::Cast(Box::new(c)))
388    }
389}