Skip to main content

polyglot_sql/dialects/
doris.rs

1//! Doris Dialect
2//!
3//! Apache Doris-specific transformations based on sqlglot patterns.
4//! Doris is MySQL-compatible with some extensions for analytics.
5
6use super::{DialectImpl, DialectType};
7use crate::error::Result;
8use crate::expressions::{AggFunc, Case, Cast, Expression, Function, VarArgFunc};
9use crate::generator::GeneratorConfig;
10use crate::tokens::TokenizerConfig;
11
12/// Doris dialect
13pub struct DorisDialect;
14
15impl DialectImpl for DorisDialect {
16    fn dialect_type(&self) -> DialectType {
17        DialectType::Doris
18    }
19
20    fn tokenizer_config(&self) -> TokenizerConfig {
21        let mut config = TokenizerConfig::default();
22        // Doris uses backticks for identifiers (MySQL-style)
23        config.identifiers.insert('`', '`');
24        config.nested_comments = false;
25        config
26    }
27
28    fn generator_config(&self) -> GeneratorConfig {
29        use crate::generator::IdentifierQuoteStyle;
30        GeneratorConfig {
31            identifier_quote: '`',
32            identifier_quote_style: IdentifierQuoteStyle::BACKTICK,
33            dialect: Some(DialectType::Doris),
34            // Doris: COMMENT 'value' (naked property, no = sign)
35            schema_comment_with_eq: false,
36            // Doris: PROPERTIES ('key'='value') instead of WITH ('key'='value')
37            with_properties_prefix: "PROPERTIES",
38            ..Default::default()
39        }
40    }
41
42    fn transform_expr(&self, expr: Expression) -> Result<Expression> {
43        match expr {
44            // IFNULL is native in Doris (MySQL-style)
45            Expression::IfNull(f) => Ok(Expression::IfNull(f)),
46
47            // NVL -> IFNULL in Doris
48            Expression::Nvl(f) => Ok(Expression::IfNull(f)),
49
50            // TryCast -> not directly supported, use CAST
51            Expression::TryCast(c) => Ok(Expression::Cast(c)),
52
53            // SafeCast -> CAST in Doris
54            Expression::SafeCast(c) => Ok(Expression::Cast(c)),
55
56            // CountIf -> SUM(CASE WHEN condition THEN 1 ELSE 0 END)
57            Expression::CountIf(f) => {
58                let case_expr = Expression::Case(Box::new(Case {
59                    operand: None,
60                    whens: vec![(f.this.clone(), Expression::number(1))],
61                    else_: Some(Expression::number(0)),
62                    comments: Vec::new(),
63                    inferred_type: None,
64                }));
65                Ok(Expression::Sum(Box::new(AggFunc {
66                    ignore_nulls: None,
67                    having_max: None,
68                    this: case_expr,
69                    distinct: f.distinct,
70                    filter: f.filter,
71                    order_by: Vec::new(),
72                    name: None,
73                    limit: None,
74                    inferred_type: None,
75                })))
76            }
77
78            // RAND is native in Doris
79            Expression::Rand(r) => Ok(Expression::Rand(r)),
80
81            // REGEXP_LIKE -> REGEXP in Doris
82            Expression::RegexpLike(r) => {
83                let mut args = vec![r.this, r.pattern];
84                if let Some(flags) = r.flags {
85                    args.push(flags);
86                }
87                Ok(Expression::Function(Box::new(Function::new(
88                    "REGEXP".to_string(),
89                    args,
90                ))))
91            }
92
93            // Generic function transformations
94            Expression::Function(f) => self.transform_function(*f),
95
96            // Generic aggregate function transformations
97            Expression::AggregateFunction(f) => self.transform_aggregate_function(f),
98
99            // Cast transformations
100            Expression::Cast(c) => self.transform_cast(*c),
101
102            // Pass through everything else
103            _ => Ok(expr),
104        }
105    }
106}
107
108impl DorisDialect {
109    fn transform_function(&self, f: Function) -> Result<Expression> {
110        let name_upper = f.name.to_uppercase();
111        match name_upper.as_str() {
112            // NVL -> IFNULL
113            "NVL" if f.args.len() == 2 => Ok(Expression::Function(Box::new(Function::new(
114                "IFNULL".to_string(),
115                f.args,
116            )))),
117
118            // ISNULL -> IFNULL
119            "ISNULL" if f.args.len() == 2 => Ok(Expression::Function(Box::new(Function::new(
120                "IFNULL".to_string(),
121                f.args,
122            )))),
123
124            // COALESCE is native in Doris
125            "COALESCE" => Ok(Expression::Coalesce(Box::new(VarArgFunc {
126                original_name: None,
127                expressions: f.args,
128                inferred_type: None,
129            }))),
130
131            // NOW is native in Doris
132            "NOW" => Ok(Expression::CurrentTimestamp(
133                crate::expressions::CurrentTimestamp {
134                    precision: None,
135                    sysdate: false,
136                },
137            )),
138
139            // GETDATE -> NOW in Doris
140            "GETDATE" => Ok(Expression::CurrentTimestamp(
141                crate::expressions::CurrentTimestamp {
142                    precision: None,
143                    sysdate: false,
144                },
145            )),
146
147            // CURRENT_TIMESTAMP is native
148            "CURRENT_TIMESTAMP" => Ok(Expression::CurrentTimestamp(
149                crate::expressions::CurrentTimestamp {
150                    precision: None,
151                    sysdate: false,
152                },
153            )),
154
155            // GROUP_CONCAT is native in Doris
156            "GROUP_CONCAT" => Ok(Expression::Function(Box::new(f))),
157
158            // STRING_AGG -> GROUP_CONCAT
159            "STRING_AGG" if !f.args.is_empty() => Ok(Expression::Function(Box::new(
160                Function::new("GROUP_CONCAT".to_string(), f.args),
161            ))),
162
163            // LISTAGG -> GROUP_CONCAT
164            "LISTAGG" if !f.args.is_empty() => Ok(Expression::Function(Box::new(Function::new(
165                "GROUP_CONCAT".to_string(),
166                f.args,
167            )))),
168
169            // SUBSTR is native in Doris
170            "SUBSTR" => Ok(Expression::Function(Box::new(f))),
171
172            // SUBSTRING is native in Doris
173            "SUBSTRING" => Ok(Expression::Function(Box::new(f))),
174
175            // LENGTH is native in Doris
176            "LENGTH" => Ok(Expression::Function(Box::new(f))),
177
178            // LEN -> LENGTH
179            "LEN" if f.args.len() == 1 => Ok(Expression::Function(Box::new(Function::new(
180                "LENGTH".to_string(),
181                f.args,
182            )))),
183
184            // CHARINDEX -> INSTR in Doris (with swapped args)
185            "CHARINDEX" if f.args.len() >= 2 => {
186                let mut args = f.args;
187                let substring = args.remove(0);
188                let string = args.remove(0);
189                Ok(Expression::Function(Box::new(Function::new(
190                    "INSTR".to_string(),
191                    vec![string, substring],
192                ))))
193            }
194
195            // STRPOS -> INSTR
196            "STRPOS" if f.args.len() >= 2 => Ok(Expression::Function(Box::new(Function::new(
197                "INSTR".to_string(),
198                f.args,
199            )))),
200
201            // LOCATE is native in Doris (keep as-is)
202            "LOCATE" => Ok(Expression::Function(Box::new(f))),
203
204            // INSTR is native in Doris
205            "INSTR" => Ok(Expression::Function(Box::new(f))),
206
207            // DATE_TRUNC is native in Doris
208            "DATE_TRUNC" => Ok(Expression::Function(Box::new(f))),
209
210            // COLLECT_LIST is native in Doris
211            "COLLECT_LIST" => Ok(Expression::Function(Box::new(f))),
212
213            // COLLECT_SET is native in Doris
214            "COLLECT_SET" => Ok(Expression::Function(Box::new(f))),
215
216            // ARRAY_AGG -> COLLECT_LIST
217            "ARRAY_AGG" if !f.args.is_empty() => Ok(Expression::Function(Box::new(Function::new(
218                "COLLECT_LIST".to_string(),
219                f.args,
220            )))),
221
222            // TO_DATE is native in Doris
223            "TO_DATE" => Ok(Expression::Function(Box::new(f))),
224
225            // TO_TIMESTAMP -> FROM_UNIXTIME or similar
226            "TO_TIMESTAMP" => Ok(Expression::Function(Box::new(f))),
227
228            // DATE_FORMAT is native in Doris (MySQL-style)
229            "DATE_FORMAT" => Ok(Expression::Function(Box::new(f))),
230
231            // strftime -> DATE_FORMAT
232            "STRFTIME" if f.args.len() >= 2 => {
233                let mut args = f.args;
234                let format = args.remove(0);
235                let date = args.remove(0);
236                Ok(Expression::Function(Box::new(Function::new(
237                    "DATE_FORMAT".to_string(),
238                    vec![date, format],
239                ))))
240            }
241
242            // TO_CHAR -> DATE_FORMAT
243            "TO_CHAR" if f.args.len() >= 2 => Ok(Expression::Function(Box::new(Function::new(
244                "DATE_FORMAT".to_string(),
245                f.args,
246            )))),
247
248            // JSON_EXTRACT is native in Doris
249            "JSON_EXTRACT" => Ok(Expression::Function(Box::new(f))),
250
251            // GET_JSON_OBJECT -> JSON_EXTRACT
252            "GET_JSON_OBJECT" if f.args.len() == 2 => Ok(Expression::Function(Box::new(
253                Function::new("JSON_EXTRACT".to_string(), f.args),
254            ))),
255
256            // REGEXP is native in Doris
257            "REGEXP" => Ok(Expression::Function(Box::new(f))),
258
259            // RLIKE is native in Doris
260            "RLIKE" => Ok(Expression::Function(Box::new(f))),
261
262            // REGEXP_LIKE -> REGEXP
263            "REGEXP_LIKE" if f.args.len() >= 2 => Ok(Expression::Function(Box::new(
264                Function::new("REGEXP".to_string(), f.args),
265            ))),
266
267            // MONTHS_ADD is native in Doris
268            "MONTHS_ADD" => Ok(Expression::Function(Box::new(f))),
269
270            // ADD_MONTHS -> MONTHS_ADD
271            "ADD_MONTHS" if f.args.len() == 2 => Ok(Expression::Function(Box::new(Function::new(
272                "MONTHS_ADD".to_string(),
273                f.args,
274            )))),
275
276            // Pass through everything else
277            _ => Ok(Expression::Function(Box::new(f))),
278        }
279    }
280
281    fn transform_aggregate_function(
282        &self,
283        f: Box<crate::expressions::AggregateFunction>,
284    ) -> Result<Expression> {
285        let name_upper = f.name.to_uppercase();
286        match name_upper.as_str() {
287            // COUNT_IF -> SUM(CASE WHEN...)
288            "COUNT_IF" if !f.args.is_empty() => {
289                let condition = f.args.into_iter().next().unwrap();
290                let case_expr = Expression::Case(Box::new(Case {
291                    operand: None,
292                    whens: vec![(condition, Expression::number(1))],
293                    else_: Some(Expression::number(0)),
294                    comments: Vec::new(),
295                    inferred_type: None,
296                }));
297                Ok(Expression::Sum(Box::new(AggFunc {
298                    ignore_nulls: None,
299                    having_max: None,
300                    this: case_expr,
301                    distinct: f.distinct,
302                    filter: f.filter,
303                    order_by: Vec::new(),
304                    name: None,
305                    limit: None,
306                    inferred_type: None,
307                })))
308            }
309
310            // APPROX_COUNT_DISTINCT is native in Doris
311            "APPROX_COUNT_DISTINCT" => Ok(Expression::AggregateFunction(f)),
312
313            // HLL_COUNT_DISTINCT -> APPROX_COUNT_DISTINCT
314            "HLL_COUNT_DISTINCT" if !f.args.is_empty() => Ok(Expression::Function(Box::new(
315                Function::new("APPROX_COUNT_DISTINCT".to_string(), f.args),
316            ))),
317
318            // MAX_BY is native in Doris
319            "MAX_BY" => Ok(Expression::AggregateFunction(f)),
320
321            // MIN_BY is native in Doris
322            "MIN_BY" => Ok(Expression::AggregateFunction(f)),
323
324            // Pass through everything else
325            _ => Ok(Expression::AggregateFunction(f)),
326        }
327    }
328
329    fn transform_cast(&self, c: Cast) -> Result<Expression> {
330        // Doris type mappings are handled in the generator
331        Ok(Expression::Cast(Box::new(c)))
332    }
333}