Skip to main content

polyglot_sql/dialects/
doris.rs

1//! Doris Dialect
2//!
3//! Apache Doris-specific transformations based on sqlglot patterns.
4//! Doris is MySQL-compatible with some extensions for analytics.
5
6use super::{DialectImpl, DialectType};
7use crate::error::Result;
8use crate::expressions::{AggFunc, Case, Cast, Expression, Function, VarArgFunc};
9use crate::generator::GeneratorConfig;
10use crate::tokens::TokenizerConfig;
11
12/// Doris dialect
13pub struct DorisDialect;
14
15impl DialectImpl for DorisDialect {
16    fn dialect_type(&self) -> DialectType {
17        DialectType::Doris
18    }
19
20    fn tokenizer_config(&self) -> TokenizerConfig {
21        let mut config = TokenizerConfig::default();
22        // Doris uses backticks for identifiers (MySQL-style)
23        config.identifiers.insert('`', '`');
24        config.nested_comments = false;
25        config
26    }
27
28    fn generator_config(&self) -> GeneratorConfig {
29        use crate::generator::IdentifierQuoteStyle;
30        GeneratorConfig {
31            identifier_quote: '`',
32            identifier_quote_style: IdentifierQuoteStyle::BACKTICK,
33            dialect: Some(DialectType::Doris),
34            // Doris: COMMENT 'value' (naked property, no = sign)
35            schema_comment_with_eq: false,
36            // Doris: PROPERTIES ('key'='value') instead of WITH ('key'='value')
37            with_properties_prefix: "PROPERTIES",
38            ..Default::default()
39        }
40    }
41
42    fn transform_expr(&self, expr: Expression) -> Result<Expression> {
43        match expr {
44            // IFNULL is native in Doris (MySQL-style)
45            Expression::IfNull(f) => Ok(Expression::IfNull(f)),
46
47            // NVL -> IFNULL in Doris
48            Expression::Nvl(f) => Ok(Expression::IfNull(f)),
49
50            // TryCast -> not directly supported, use CAST
51            Expression::TryCast(c) => Ok(Expression::Cast(c)),
52
53            // SafeCast -> CAST in Doris
54            Expression::SafeCast(c) => Ok(Expression::Cast(c)),
55
56            // CountIf -> SUM(CASE WHEN condition THEN 1 ELSE 0 END)
57            Expression::CountIf(f) => {
58                let case_expr = Expression::Case(Box::new(Case {
59                    operand: None,
60                    whens: vec![(f.this.clone(), Expression::number(1))],
61                    else_: Some(Expression::number(0)),
62                    comments: Vec::new(),
63                }));
64                Ok(Expression::Sum(Box::new(AggFunc {
65                    ignore_nulls: None,
66                    having_max: None,
67                    this: case_expr,
68                    distinct: f.distinct,
69                    filter: f.filter,
70                    order_by: Vec::new(),
71                    name: None,
72                    limit: None,
73                })))
74            }
75
76            // RAND is native in Doris
77            Expression::Rand(r) => Ok(Expression::Rand(r)),
78
79            // REGEXP_LIKE -> REGEXP in Doris
80            Expression::RegexpLike(r) => {
81                let mut args = vec![r.this, r.pattern];
82                if let Some(flags) = r.flags {
83                    args.push(flags);
84                }
85                Ok(Expression::Function(Box::new(Function::new(
86                    "REGEXP".to_string(),
87                    args,
88                ))))
89            }
90
91            // Generic function transformations
92            Expression::Function(f) => self.transform_function(*f),
93
94            // Generic aggregate function transformations
95            Expression::AggregateFunction(f) => self.transform_aggregate_function(f),
96
97            // Cast transformations
98            Expression::Cast(c) => self.transform_cast(*c),
99
100            // Pass through everything else
101            _ => Ok(expr),
102        }
103    }
104}
105
106impl DorisDialect {
107    fn transform_function(&self, f: Function) -> Result<Expression> {
108        let name_upper = f.name.to_uppercase();
109        match name_upper.as_str() {
110            // NVL -> IFNULL
111            "NVL" if f.args.len() == 2 => Ok(Expression::Function(Box::new(Function::new(
112                "IFNULL".to_string(),
113                f.args,
114            )))),
115
116            // ISNULL -> IFNULL
117            "ISNULL" if f.args.len() == 2 => Ok(Expression::Function(Box::new(Function::new(
118                "IFNULL".to_string(),
119                f.args,
120            )))),
121
122            // COALESCE is native in Doris
123            "COALESCE" => Ok(Expression::Coalesce(Box::new(VarArgFunc {
124                original_name: None,
125                expressions: f.args,
126            }))),
127
128            // NOW is native in Doris
129            "NOW" => Ok(Expression::CurrentTimestamp(
130                crate::expressions::CurrentTimestamp {
131                    precision: None,
132                    sysdate: false,
133                },
134            )),
135
136            // GETDATE -> NOW in Doris
137            "GETDATE" => Ok(Expression::CurrentTimestamp(
138                crate::expressions::CurrentTimestamp {
139                    precision: None,
140                    sysdate: false,
141                },
142            )),
143
144            // CURRENT_TIMESTAMP is native
145            "CURRENT_TIMESTAMP" => Ok(Expression::CurrentTimestamp(
146                crate::expressions::CurrentTimestamp {
147                    precision: None,
148                    sysdate: false,
149                },
150            )),
151
152            // GROUP_CONCAT is native in Doris
153            "GROUP_CONCAT" => Ok(Expression::Function(Box::new(f))),
154
155            // STRING_AGG -> GROUP_CONCAT
156            "STRING_AGG" if !f.args.is_empty() => Ok(Expression::Function(Box::new(
157                Function::new("GROUP_CONCAT".to_string(), f.args),
158            ))),
159
160            // LISTAGG -> GROUP_CONCAT
161            "LISTAGG" if !f.args.is_empty() => Ok(Expression::Function(Box::new(Function::new(
162                "GROUP_CONCAT".to_string(),
163                f.args,
164            )))),
165
166            // SUBSTR is native in Doris
167            "SUBSTR" => Ok(Expression::Function(Box::new(f))),
168
169            // SUBSTRING is native in Doris
170            "SUBSTRING" => Ok(Expression::Function(Box::new(f))),
171
172            // LENGTH is native in Doris
173            "LENGTH" => Ok(Expression::Function(Box::new(f))),
174
175            // LEN -> LENGTH
176            "LEN" if f.args.len() == 1 => Ok(Expression::Function(Box::new(Function::new(
177                "LENGTH".to_string(),
178                f.args,
179            )))),
180
181            // CHARINDEX -> INSTR in Doris (with swapped args)
182            "CHARINDEX" if f.args.len() >= 2 => {
183                let mut args = f.args;
184                let substring = args.remove(0);
185                let string = args.remove(0);
186                Ok(Expression::Function(Box::new(Function::new(
187                    "INSTR".to_string(),
188                    vec![string, substring],
189                ))))
190            }
191
192            // STRPOS -> INSTR
193            "STRPOS" if f.args.len() >= 2 => Ok(Expression::Function(Box::new(Function::new(
194                "INSTR".to_string(),
195                f.args,
196            )))),
197
198            // LOCATE is native in Doris (keep as-is)
199            "LOCATE" => Ok(Expression::Function(Box::new(f))),
200
201            // INSTR is native in Doris
202            "INSTR" => Ok(Expression::Function(Box::new(f))),
203
204            // DATE_TRUNC is native in Doris
205            "DATE_TRUNC" => Ok(Expression::Function(Box::new(f))),
206
207            // COLLECT_LIST is native in Doris
208            "COLLECT_LIST" => Ok(Expression::Function(Box::new(f))),
209
210            // COLLECT_SET is native in Doris
211            "COLLECT_SET" => Ok(Expression::Function(Box::new(f))),
212
213            // ARRAY_AGG -> COLLECT_LIST
214            "ARRAY_AGG" if !f.args.is_empty() => Ok(Expression::Function(Box::new(Function::new(
215                "COLLECT_LIST".to_string(),
216                f.args,
217            )))),
218
219            // TO_DATE is native in Doris
220            "TO_DATE" => Ok(Expression::Function(Box::new(f))),
221
222            // TO_TIMESTAMP -> FROM_UNIXTIME or similar
223            "TO_TIMESTAMP" => Ok(Expression::Function(Box::new(f))),
224
225            // DATE_FORMAT is native in Doris (MySQL-style)
226            "DATE_FORMAT" => Ok(Expression::Function(Box::new(f))),
227
228            // strftime -> DATE_FORMAT
229            "STRFTIME" if f.args.len() >= 2 => {
230                let mut args = f.args;
231                let format = args.remove(0);
232                let date = args.remove(0);
233                Ok(Expression::Function(Box::new(Function::new(
234                    "DATE_FORMAT".to_string(),
235                    vec![date, format],
236                ))))
237            }
238
239            // TO_CHAR -> DATE_FORMAT
240            "TO_CHAR" if f.args.len() >= 2 => Ok(Expression::Function(Box::new(Function::new(
241                "DATE_FORMAT".to_string(),
242                f.args,
243            )))),
244
245            // JSON_EXTRACT is native in Doris
246            "JSON_EXTRACT" => Ok(Expression::Function(Box::new(f))),
247
248            // GET_JSON_OBJECT -> JSON_EXTRACT
249            "GET_JSON_OBJECT" if f.args.len() == 2 => Ok(Expression::Function(Box::new(
250                Function::new("JSON_EXTRACT".to_string(), f.args),
251            ))),
252
253            // REGEXP is native in Doris
254            "REGEXP" => Ok(Expression::Function(Box::new(f))),
255
256            // RLIKE is native in Doris
257            "RLIKE" => Ok(Expression::Function(Box::new(f))),
258
259            // REGEXP_LIKE -> REGEXP
260            "REGEXP_LIKE" if f.args.len() >= 2 => Ok(Expression::Function(Box::new(
261                Function::new("REGEXP".to_string(), f.args),
262            ))),
263
264            // MONTHS_ADD is native in Doris
265            "MONTHS_ADD" => Ok(Expression::Function(Box::new(f))),
266
267            // ADD_MONTHS -> MONTHS_ADD
268            "ADD_MONTHS" if f.args.len() == 2 => Ok(Expression::Function(Box::new(Function::new(
269                "MONTHS_ADD".to_string(),
270                f.args,
271            )))),
272
273            // Pass through everything else
274            _ => Ok(Expression::Function(Box::new(f))),
275        }
276    }
277
278    fn transform_aggregate_function(
279        &self,
280        f: Box<crate::expressions::AggregateFunction>,
281    ) -> Result<Expression> {
282        let name_upper = f.name.to_uppercase();
283        match name_upper.as_str() {
284            // COUNT_IF -> SUM(CASE WHEN...)
285            "COUNT_IF" if !f.args.is_empty() => {
286                let condition = f.args.into_iter().next().unwrap();
287                let case_expr = Expression::Case(Box::new(Case {
288                    operand: None,
289                    whens: vec![(condition, Expression::number(1))],
290                    else_: Some(Expression::number(0)),
291                    comments: Vec::new(),
292                }));
293                Ok(Expression::Sum(Box::new(AggFunc {
294                    ignore_nulls: None,
295                    having_max: None,
296                    this: case_expr,
297                    distinct: f.distinct,
298                    filter: f.filter,
299                    order_by: Vec::new(),
300                    name: None,
301                    limit: None,
302                })))
303            }
304
305            // APPROX_COUNT_DISTINCT is native in Doris
306            "APPROX_COUNT_DISTINCT" => Ok(Expression::AggregateFunction(f)),
307
308            // HLL_COUNT_DISTINCT -> APPROX_COUNT_DISTINCT
309            "HLL_COUNT_DISTINCT" if !f.args.is_empty() => Ok(Expression::Function(Box::new(
310                Function::new("APPROX_COUNT_DISTINCT".to_string(), f.args),
311            ))),
312
313            // MAX_BY is native in Doris
314            "MAX_BY" => Ok(Expression::AggregateFunction(f)),
315
316            // MIN_BY is native in Doris
317            "MIN_BY" => Ok(Expression::AggregateFunction(f)),
318
319            // Pass through everything else
320            _ => Ok(Expression::AggregateFunction(f)),
321        }
322    }
323
324    fn transform_cast(&self, c: Cast) -> Result<Expression> {
325        // Doris type mappings are handled in the generator
326        Ok(Expression::Cast(Box::new(c)))
327    }
328}