Skip to main content

polyglot_sql/dialects/
doris.rs

1//! Doris Dialect
2//!
3//! Apache Doris-specific transformations based on sqlglot patterns.
4//! Doris is MySQL-compatible with some extensions for analytics.
5
6use super::{DialectImpl, DialectType};
7use crate::error::Result;
8use crate::expressions::{AggFunc, Case, Cast, Expression, Function, VarArgFunc};
9use crate::generator::GeneratorConfig;
10use crate::tokens::TokenizerConfig;
11
12/// Doris dialect
13pub struct DorisDialect;
14
15impl DialectImpl for DorisDialect {
16    fn dialect_type(&self) -> DialectType {
17        DialectType::Doris
18    }
19
20    fn tokenizer_config(&self) -> TokenizerConfig {
21        let mut config = TokenizerConfig::default();
22        // Doris uses backticks for identifiers (MySQL-style)
23        config.identifiers.insert('`', '`');
24        config.nested_comments = false;
25        config
26    }
27
28    fn generator_config(&self) -> GeneratorConfig {
29        use crate::generator::IdentifierQuoteStyle;
30        GeneratorConfig {
31            identifier_quote: '`',
32            identifier_quote_style: IdentifierQuoteStyle::BACKTICK,
33            dialect: Some(DialectType::Doris),
34            // Doris: COMMENT 'value' (naked property, no = sign)
35            schema_comment_with_eq: false,
36            // Doris: PROPERTIES ('key'='value') instead of WITH ('key'='value')
37            with_properties_prefix: "PROPERTIES",
38            ..Default::default()
39        }
40    }
41
42    fn transform_expr(&self, expr: Expression) -> Result<Expression> {
43        match expr {
44            // IFNULL is native in Doris (MySQL-style)
45            Expression::IfNull(f) => Ok(Expression::IfNull(f)),
46
47            // NVL -> IFNULL in Doris
48            Expression::Nvl(f) => Ok(Expression::IfNull(f)),
49
50            // TryCast -> not directly supported, use CAST
51            Expression::TryCast(c) => Ok(Expression::Cast(c)),
52
53            // SafeCast -> CAST in Doris
54            Expression::SafeCast(c) => Ok(Expression::Cast(c)),
55
56            // CountIf -> SUM(CASE WHEN condition THEN 1 ELSE 0 END)
57            Expression::CountIf(f) => {
58                let case_expr = Expression::Case(Box::new(Case {
59                    operand: None,
60                    whens: vec![(f.this.clone(), Expression::number(1))],
61                    else_: Some(Expression::number(0)),
62                }));
63                Ok(Expression::Sum(Box::new(AggFunc { ignore_nulls: None, having_max: None,
64                    this: case_expr,
65                    distinct: f.distinct,
66                    filter: f.filter,
67                    order_by: Vec::new(),
68                name: None,
69                limit: None,
70                })))
71            }
72
73            // RAND is native in Doris
74            Expression::Rand(r) => Ok(Expression::Rand(r)),
75
76            // REGEXP_LIKE -> REGEXP in Doris
77            Expression::RegexpLike(r) => {
78                let mut args = vec![r.this, r.pattern];
79                if let Some(flags) = r.flags {
80                    args.push(flags);
81                }
82                Ok(Expression::Function(Box::new(Function::new("REGEXP".to_string(), args))))
83            }
84
85            // Generic function transformations
86            Expression::Function(f) => self.transform_function(*f),
87
88            // Generic aggregate function transformations
89            Expression::AggregateFunction(f) => self.transform_aggregate_function(f),
90
91            // Cast transformations
92            Expression::Cast(c) => self.transform_cast(*c),
93
94            // Pass through everything else
95            _ => Ok(expr),
96        }
97    }
98}
99
100impl DorisDialect {
101    fn transform_function(&self, f: Function) -> Result<Expression> {
102        let name_upper = f.name.to_uppercase();
103        match name_upper.as_str() {
104            // NVL -> IFNULL
105            "NVL" if f.args.len() == 2 => {
106                Ok(Expression::Function(Box::new(Function::new(
107                    "IFNULL".to_string(),
108                    f.args,
109                ))))
110            }
111
112            // ISNULL -> IFNULL
113            "ISNULL" if f.args.len() == 2 => {
114                Ok(Expression::Function(Box::new(Function::new(
115                    "IFNULL".to_string(),
116                    f.args,
117                ))))
118            }
119
120            // COALESCE is native in Doris
121            "COALESCE" => Ok(Expression::Coalesce(Box::new(VarArgFunc { original_name: None,
122                expressions: f.args,
123            }))),
124
125            // NOW is native in Doris
126            "NOW" => Ok(Expression::CurrentTimestamp(
127                crate::expressions::CurrentTimestamp { precision: None, sysdate: false },
128            )),
129
130            // GETDATE -> NOW in Doris
131            "GETDATE" => Ok(Expression::CurrentTimestamp(
132                crate::expressions::CurrentTimestamp { precision: None, sysdate: false },
133            )),
134
135            // CURRENT_TIMESTAMP is native
136            "CURRENT_TIMESTAMP" => Ok(Expression::CurrentTimestamp(
137                crate::expressions::CurrentTimestamp { precision: None, sysdate: false },
138            )),
139
140            // GROUP_CONCAT is native in Doris
141            "GROUP_CONCAT" => Ok(Expression::Function(Box::new(f))),
142
143            // STRING_AGG -> GROUP_CONCAT
144            "STRING_AGG" if !f.args.is_empty() => Ok(Expression::Function(Box::new(Function::new(
145                "GROUP_CONCAT".to_string(),
146                f.args,
147            )))),
148
149            // LISTAGG -> GROUP_CONCAT
150            "LISTAGG" if !f.args.is_empty() => Ok(Expression::Function(Box::new(Function::new(
151                "GROUP_CONCAT".to_string(),
152                f.args,
153            )))),
154
155            // SUBSTR is native in Doris
156            "SUBSTR" => Ok(Expression::Function(Box::new(f))),
157
158            // SUBSTRING is native in Doris
159            "SUBSTRING" => Ok(Expression::Function(Box::new(f))),
160
161            // LENGTH is native in Doris
162            "LENGTH" => Ok(Expression::Function(Box::new(f))),
163
164            // LEN -> LENGTH
165            "LEN" if f.args.len() == 1 => Ok(Expression::Function(Box::new(Function::new(
166                "LENGTH".to_string(),
167                f.args,
168            )))),
169
170            // CHARINDEX -> INSTR in Doris (with swapped args)
171            "CHARINDEX" if f.args.len() >= 2 => {
172                let mut args = f.args;
173                let substring = args.remove(0);
174                let string = args.remove(0);
175                Ok(Expression::Function(Box::new(Function::new(
176                    "INSTR".to_string(),
177                    vec![string, substring],
178                ))))
179            }
180
181            // STRPOS -> INSTR
182            "STRPOS" if f.args.len() >= 2 => Ok(Expression::Function(Box::new(Function::new(
183                "INSTR".to_string(),
184                f.args,
185            )))),
186
187            // LOCATE -> INSTR (with swapped args)
188            "LOCATE" if f.args.len() >= 2 => {
189                let mut args = f.args;
190                let substring = args.remove(0);
191                let string = args.remove(0);
192                Ok(Expression::Function(Box::new(Function::new(
193                    "INSTR".to_string(),
194                    vec![string, substring],
195                ))))
196            }
197
198            // INSTR is native in Doris
199            "INSTR" => Ok(Expression::Function(Box::new(f))),
200
201            // DATE_TRUNC is native in Doris
202            "DATE_TRUNC" => Ok(Expression::Function(Box::new(f))),
203
204            // COLLECT_LIST is native in Doris
205            "COLLECT_LIST" => Ok(Expression::Function(Box::new(f))),
206
207            // COLLECT_SET is native in Doris
208            "COLLECT_SET" => Ok(Expression::Function(Box::new(f))),
209
210            // ARRAY_AGG -> COLLECT_LIST
211            "ARRAY_AGG" if !f.args.is_empty() => Ok(Expression::Function(Box::new(Function::new(
212                "COLLECT_LIST".to_string(),
213                f.args,
214            )))),
215
216            // TO_DATE is native in Doris
217            "TO_DATE" => Ok(Expression::Function(Box::new(f))),
218
219            // TO_TIMESTAMP -> FROM_UNIXTIME or similar
220            "TO_TIMESTAMP" => Ok(Expression::Function(Box::new(f))),
221
222            // DATE_FORMAT is native in Doris (MySQL-style)
223            "DATE_FORMAT" => Ok(Expression::Function(Box::new(f))),
224
225            // strftime -> DATE_FORMAT
226            "STRFTIME" if f.args.len() >= 2 => {
227                let mut args = f.args;
228                let format = args.remove(0);
229                let date = args.remove(0);
230                Ok(Expression::Function(Box::new(Function::new(
231                    "DATE_FORMAT".to_string(),
232                    vec![date, format],
233                ))))
234            }
235
236            // TO_CHAR -> DATE_FORMAT
237            "TO_CHAR" if f.args.len() >= 2 => Ok(Expression::Function(Box::new(Function::new(
238                "DATE_FORMAT".to_string(),
239                f.args,
240            )))),
241
242            // JSON_EXTRACT is native in Doris
243            "JSON_EXTRACT" => Ok(Expression::Function(Box::new(f))),
244
245            // GET_JSON_OBJECT -> JSON_EXTRACT
246            "GET_JSON_OBJECT" if f.args.len() == 2 => Ok(Expression::Function(Box::new(
247                Function::new("JSON_EXTRACT".to_string(), f.args),
248            ))),
249
250            // REGEXP is native in Doris
251            "REGEXP" => Ok(Expression::Function(Box::new(f))),
252
253            // RLIKE is native in Doris
254            "RLIKE" => Ok(Expression::Function(Box::new(f))),
255
256            // REGEXP_LIKE -> REGEXP
257            "REGEXP_LIKE" if f.args.len() >= 2 => Ok(Expression::Function(Box::new(Function::new(
258                "REGEXP".to_string(),
259                f.args,
260            )))),
261
262            // MONTHS_ADD is native in Doris
263            "MONTHS_ADD" => Ok(Expression::Function(Box::new(f))),
264
265            // ADD_MONTHS -> MONTHS_ADD
266            "ADD_MONTHS" if f.args.len() == 2 => Ok(Expression::Function(Box::new(Function::new(
267                "MONTHS_ADD".to_string(),
268                f.args,
269            )))),
270
271            // Pass through everything else
272            _ => Ok(Expression::Function(Box::new(f))),
273        }
274    }
275
276    fn transform_aggregate_function(
277        &self,
278        f: Box<crate::expressions::AggregateFunction>,
279    ) -> Result<Expression> {
280        let name_upper = f.name.to_uppercase();
281        match name_upper.as_str() {
282            // COUNT_IF -> SUM(CASE WHEN...)
283            "COUNT_IF" if !f.args.is_empty() => {
284                let condition = f.args.into_iter().next().unwrap();
285                let case_expr = Expression::Case(Box::new(Case {
286                    operand: None,
287                    whens: vec![(condition, Expression::number(1))],
288                    else_: Some(Expression::number(0)),
289                }));
290                Ok(Expression::Sum(Box::new(AggFunc { ignore_nulls: None, having_max: None,
291                    this: case_expr,
292                    distinct: f.distinct,
293                    filter: f.filter,
294                    order_by: Vec::new(),
295                name: None,
296                limit: None,
297                })))
298            }
299
300            // APPROX_COUNT_DISTINCT is native in Doris
301            "APPROX_COUNT_DISTINCT" => Ok(Expression::AggregateFunction(f)),
302
303            // HLL_COUNT_DISTINCT -> APPROX_COUNT_DISTINCT
304            "HLL_COUNT_DISTINCT" if !f.args.is_empty() => Ok(Expression::Function(Box::new(
305                Function::new("APPROX_COUNT_DISTINCT".to_string(), f.args),
306            ))),
307
308            // MAX_BY is native in Doris
309            "MAX_BY" => Ok(Expression::AggregateFunction(f)),
310
311            // MIN_BY is native in Doris
312            "MIN_BY" => Ok(Expression::AggregateFunction(f)),
313
314            // Pass through everything else
315            _ => Ok(Expression::AggregateFunction(f)),
316        }
317    }
318
319    fn transform_cast(&self, c: Cast) -> Result<Expression> {
320        // Doris type mappings are handled in the generator
321        Ok(Expression::Cast(Box::new(c)))
322    }
323}