Skip to main content

polyglot_sql/dialects/
starrocks.rs

1//! StarRocks Dialect
2//!
3//! StarRocks-specific transformations based on sqlglot patterns.
4//! StarRocks is MySQL-compatible with OLAP extensions (similar to Doris).
5
6use super::{DialectImpl, DialectType};
7use crate::error::Result;
8use crate::expressions::{
9    AggFunc, Case, Cast, Expression, Function, Interval, IntervalUnit, IntervalUnitSpec, Lateral,
10    VarArgFunc,
11};
12use crate::generator::GeneratorConfig;
13use crate::tokens::TokenizerConfig;
14
15/// StarRocks dialect
16pub struct StarRocksDialect;
17
18impl DialectImpl for StarRocksDialect {
19    fn dialect_type(&self) -> DialectType {
20        DialectType::StarRocks
21    }
22
23    fn tokenizer_config(&self) -> TokenizerConfig {
24        use crate::tokens::TokenType;
25        let mut config = TokenizerConfig::default();
26        // StarRocks uses backticks for identifiers (MySQL-style)
27        config.identifiers.insert('`', '`');
28        // Remove double quotes from identifiers (MySQL-style)
29        config.identifiers.remove(&'"');
30        config.quotes.insert("\"".to_string(), "\"".to_string());
31        config.nested_comments = false;
32        // LARGEINT maps to INT128
33        config
34            .keywords
35            .insert("LARGEINT".to_string(), TokenType::Int128);
36        config
37    }
38
39    fn generator_config(&self) -> GeneratorConfig {
40        use crate::generator::IdentifierQuoteStyle;
41        GeneratorConfig {
42            identifier_quote: '`',
43            identifier_quote_style: IdentifierQuoteStyle::BACKTICK,
44            dialect: Some(DialectType::StarRocks),
45            // StarRocks: INSERT OVERWRITE (without TABLE keyword)
46            insert_overwrite: " OVERWRITE",
47            // StarRocks: PROPERTIES prefix for WITH properties
48            with_properties_prefix: "PROPERTIES",
49            // StarRocks uses MySQL-style settings
50            null_ordering_supported: false,
51            limit_only_literals: true,
52            semi_anti_join_with_side: false,
53            supports_table_alias_columns: false,
54            values_as_table: false,
55            tablesample_requires_parens: false,
56            tablesample_with_method: false,
57            aggregate_filter_supported: false,
58            try_supported: false,
59            supports_convert_timezone: false,
60            supports_uescape: false,
61            supports_between_flags: false,
62            query_hints: false,
63            parameter_token: "?",
64            supports_window_exclude: false,
65            supports_exploding_projections: false,
66            // StarRocks: COMMENT 'value' (naked property, no = sign)
67            schema_comment_with_eq: false,
68            ..Default::default()
69        }
70    }
71
72    fn transform_expr(&self, expr: Expression) -> Result<Expression> {
73        match expr {
74            // IFNULL is native in StarRocks (MySQL-style)
75            Expression::IfNull(f) => Ok(Expression::IfNull(f)),
76
77            // NVL -> IFNULL in StarRocks
78            Expression::Nvl(f) => Ok(Expression::IfNull(f)),
79
80            // TryCast -> not directly supported, use CAST
81            Expression::TryCast(c) => Ok(Expression::Cast(c)),
82
83            // SafeCast -> CAST in StarRocks
84            Expression::SafeCast(c) => Ok(Expression::Cast(c)),
85
86            // CountIf -> SUM(CASE WHEN condition THEN 1 ELSE 0 END)
87            Expression::CountIf(f) => {
88                let case_expr = Expression::Case(Box::new(Case {
89                    operand: None,
90                    whens: vec![(f.this.clone(), Expression::number(1))],
91                    else_: Some(Expression::number(0)),
92                    comments: Vec::new(),
93                    inferred_type: None,
94                }));
95                Ok(Expression::Sum(Box::new(AggFunc {
96                    ignore_nulls: None,
97                    having_max: None,
98                    this: case_expr,
99                    distinct: f.distinct,
100                    filter: f.filter,
101                    order_by: Vec::new(),
102                    name: None,
103                    limit: None,
104                    inferred_type: None,
105                })))
106            }
107
108            // RAND is native in StarRocks
109            Expression::Rand(r) => Ok(Expression::Rand(r)),
110
111            // JSON arrow syntax: preserve -> for StarRocks (arrow_json_extract_sql)
112            Expression::JsonExtract(mut f) => {
113                // Set arrow_syntax to true to preserve -> operator
114                f.arrow_syntax = true;
115                Ok(Expression::JsonExtract(f))
116            }
117
118            Expression::JsonExtractScalar(mut f) => {
119                // Set arrow_syntax to true to preserve ->> operator
120                f.arrow_syntax = true;
121                Ok(Expression::JsonExtractScalar(f))
122            }
123
124            // Generic function transformations
125            Expression::Function(f) => self.transform_function(*f),
126
127            // Generic aggregate function transformations
128            Expression::AggregateFunction(f) => self.transform_aggregate_function(f),
129
130            // Cast transformations
131            Expression::Cast(c) => self.transform_cast(*c),
132
133            // Handle LATERAL UNNEST - StarRocks requires column alias "unnest" by default
134            Expression::Lateral(mut l) => {
135                self.transform_lateral(&mut l)?;
136                Ok(Expression::Lateral(l))
137            }
138
139            // Pass through everything else
140            _ => Ok(expr),
141        }
142    }
143}
144
145impl StarRocksDialect {
146    fn wrap_day_interval(expr: Expression) -> Expression {
147        Expression::Interval(Box::new(Interval {
148            this: Some(expr),
149            unit: Some(IntervalUnitSpec::Simple {
150                unit: IntervalUnit::Day,
151                use_plural: false,
152            }),
153        }))
154    }
155
156    fn transform_function(&self, f: Function) -> Result<Expression> {
157        let name_upper = f.name.to_uppercase();
158        match name_upper.as_str() {
159            // NVL -> IFNULL
160            "NVL" if f.args.len() == 2 => Ok(Expression::Function(Box::new(Function::new(
161                "IFNULL".to_string(),
162                f.args,
163            )))),
164
165            // ISNULL -> IFNULL
166            "ISNULL" if f.args.len() == 2 => Ok(Expression::Function(Box::new(Function::new(
167                "IFNULL".to_string(),
168                f.args,
169            )))),
170
171            // COALESCE is native in StarRocks
172            "COALESCE" => Ok(Expression::Coalesce(Box::new(VarArgFunc {
173                original_name: None,
174                expressions: f.args,
175                inferred_type: None,
176            }))),
177
178            // NOW is native in StarRocks
179            "NOW" => Ok(Expression::CurrentTimestamp(
180                crate::expressions::CurrentTimestamp {
181                    precision: None,
182                    sysdate: false,
183                },
184            )),
185
186            // GETDATE -> NOW in StarRocks
187            "GETDATE" => Ok(Expression::CurrentTimestamp(
188                crate::expressions::CurrentTimestamp {
189                    precision: None,
190                    sysdate: false,
191                },
192            )),
193
194            // GROUP_CONCAT is native in StarRocks
195            "GROUP_CONCAT" => Ok(Expression::Function(Box::new(f))),
196
197            // STRING_AGG -> GROUP_CONCAT
198            "STRING_AGG" if !f.args.is_empty() => Ok(Expression::Function(Box::new(
199                Function::new("GROUP_CONCAT".to_string(), f.args),
200            ))),
201
202            // LISTAGG -> GROUP_CONCAT
203            "LISTAGG" if !f.args.is_empty() => Ok(Expression::Function(Box::new(Function::new(
204                "GROUP_CONCAT".to_string(),
205                f.args,
206            )))),
207
208            // SUBSTR is native in StarRocks
209            "SUBSTR" => Ok(Expression::Function(Box::new(f))),
210
211            // SUBSTRING is native in StarRocks
212            "SUBSTRING" => Ok(Expression::Function(Box::new(f))),
213
214            // LENGTH is native in StarRocks
215            "LENGTH" => Ok(Expression::Function(Box::new(f))),
216
217            // LEN -> LENGTH
218            "LEN" if f.args.len() == 1 => Ok(Expression::Function(Box::new(Function::new(
219                "LENGTH".to_string(),
220                f.args,
221            )))),
222
223            // CHARINDEX -> INSTR in StarRocks (with swapped args)
224            "CHARINDEX" if f.args.len() >= 2 => {
225                let mut args = f.args;
226                let substring = args.remove(0);
227                let string = args.remove(0);
228                Ok(Expression::Function(Box::new(Function::new(
229                    "INSTR".to_string(),
230                    vec![string, substring],
231                ))))
232            }
233
234            // STRPOS -> INSTR
235            "STRPOS" if f.args.len() >= 2 => Ok(Expression::Function(Box::new(Function::new(
236                "INSTR".to_string(),
237                f.args,
238            )))),
239
240            // DATE_TRUNC is native in StarRocks
241            "DATE_TRUNC" => Ok(Expression::Function(Box::new(f))),
242
243            // StarRocks normalizes MySQL-style day shorthand to INTERVAL syntax.
244            "DATE_ADD" if f.args.len() == 2 && !matches!(f.args[1], Expression::Interval(_)) => {
245                let mut args = f.args;
246                let date = args.remove(0);
247                let days = args.remove(0);
248                Ok(Expression::Function(Box::new(Function::new(
249                    "DATE_ADD".to_string(),
250                    vec![date, Self::wrap_day_interval(days)],
251                ))))
252            }
253            "DATE_SUB" if f.args.len() == 2 && !matches!(f.args[1], Expression::Interval(_)) => {
254                let mut args = f.args;
255                let date = args.remove(0);
256                let days = args.remove(0);
257                Ok(Expression::Function(Box::new(Function::new(
258                    "DATE_SUB".to_string(),
259                    vec![date, Self::wrap_day_interval(days)],
260                ))))
261            }
262            "ADDDATE" if f.args.len() == 2 && !matches!(f.args[1], Expression::Interval(_)) => {
263                let mut args = f.args;
264                let date = args.remove(0);
265                let days = args.remove(0);
266                Ok(Expression::Function(Box::new(Function::new(
267                    "DATE_ADD".to_string(),
268                    vec![date, Self::wrap_day_interval(days)],
269                ))))
270            }
271            "SUBDATE" if f.args.len() == 2 && !matches!(f.args[1], Expression::Interval(_)) => {
272                let mut args = f.args;
273                let date = args.remove(0);
274                let days = args.remove(0);
275                Ok(Expression::Function(Box::new(Function::new(
276                    "DATE_SUB".to_string(),
277                    vec![date, Self::wrap_day_interval(days)],
278                ))))
279            }
280
281            // ARRAY_AGG is native in StarRocks
282            "ARRAY_AGG" => Ok(Expression::Function(Box::new(f))),
283
284            // COLLECT_LIST -> ARRAY_AGG
285            "COLLECT_LIST" if !f.args.is_empty() => Ok(Expression::Function(Box::new(
286                Function::new("ARRAY_AGG".to_string(), f.args),
287            ))),
288
289            // ARRAY_JOIN is native in StarRocks
290            "ARRAY_JOIN" => Ok(Expression::Function(Box::new(f))),
291
292            // ARRAY_FLATTEN is native in StarRocks
293            "ARRAY_FLATTEN" => Ok(Expression::Function(Box::new(f))),
294
295            // FLATTEN -> ARRAY_FLATTEN
296            "FLATTEN" if f.args.len() == 1 => Ok(Expression::Function(Box::new(Function::new(
297                "ARRAY_FLATTEN".to_string(),
298                f.args,
299            )))),
300
301            // TO_DATE is native in StarRocks
302            "TO_DATE" => Ok(Expression::Function(Box::new(f))),
303
304            // DATE_FORMAT is native in StarRocks
305            "DATE_FORMAT" => Ok(Expression::Function(Box::new(f))),
306
307            // strftime -> DATE_FORMAT
308            "STRFTIME" if f.args.len() >= 2 => {
309                let mut args = f.args;
310                let format = args.remove(0);
311                let date = args.remove(0);
312                Ok(Expression::Function(Box::new(Function::new(
313                    "DATE_FORMAT".to_string(),
314                    vec![date, format],
315                ))))
316            }
317
318            // TO_CHAR -> DATE_FORMAT
319            "TO_CHAR" if f.args.len() >= 2 => Ok(Expression::Function(Box::new(Function::new(
320                "DATE_FORMAT".to_string(),
321                f.args,
322            )))),
323
324            // JSON_EXTRACT -> arrow operator in StarRocks
325            "JSON_EXTRACT" => Ok(Expression::Function(Box::new(f))),
326
327            // GET_JSON_OBJECT -> JSON_EXTRACT
328            "GET_JSON_OBJECT" if f.args.len() == 2 => Ok(Expression::Function(Box::new(
329                Function::new("JSON_EXTRACT".to_string(), f.args),
330            ))),
331
332            // REGEXP is native in StarRocks
333            "REGEXP" => Ok(Expression::Function(Box::new(f))),
334
335            // RLIKE is native in StarRocks
336            "RLIKE" => Ok(Expression::Function(Box::new(f))),
337
338            // REGEXP_LIKE -> REGEXP
339            "REGEXP_LIKE" if f.args.len() >= 2 => Ok(Expression::Function(Box::new(
340                Function::new("REGEXP".to_string(), f.args),
341            ))),
342
343            // ARRAY_INTERSECTION -> ARRAY_INTERSECT
344            "ARRAY_INTERSECTION" => Ok(Expression::Function(Box::new(Function::new(
345                "ARRAY_INTERSECT".to_string(),
346                f.args,
347            )))),
348
349            // ST_MAKEPOINT -> ST_POINT
350            "ST_MAKEPOINT" if f.args.len() == 2 => Ok(Expression::Function(Box::new(
351                Function::new("ST_POINT".to_string(), f.args),
352            ))),
353
354            // ST_DISTANCE(a, b) -> ST_DISTANCE_SPHERE(ST_X(a), ST_Y(a), ST_X(b), ST_Y(b))
355            "ST_DISTANCE" if f.args.len() == 2 => {
356                let a = f.args[0].clone();
357                let b = f.args[1].clone();
358                Ok(Expression::Function(Box::new(Function::new(
359                    "ST_DISTANCE_SPHERE".to_string(),
360                    vec![
361                        Expression::Function(Box::new(Function::new(
362                            "ST_X".to_string(),
363                            vec![a.clone()],
364                        ))),
365                        Expression::Function(Box::new(Function::new("ST_Y".to_string(), vec![a]))),
366                        Expression::Function(Box::new(Function::new(
367                            "ST_X".to_string(),
368                            vec![b.clone()],
369                        ))),
370                        Expression::Function(Box::new(Function::new("ST_Y".to_string(), vec![b]))),
371                    ],
372                ))))
373            }
374
375            // Pass through everything else
376            _ => Ok(Expression::Function(Box::new(f))),
377        }
378    }
379
380    fn transform_aggregate_function(
381        &self,
382        f: Box<crate::expressions::AggregateFunction>,
383    ) -> Result<Expression> {
384        let name_upper = f.name.to_uppercase();
385        match name_upper.as_str() {
386            // COUNT_IF -> SUM(CASE WHEN...)
387            "COUNT_IF" if !f.args.is_empty() => {
388                let condition = f.args.into_iter().next().unwrap();
389                let case_expr = Expression::Case(Box::new(Case {
390                    operand: None,
391                    whens: vec![(condition, Expression::number(1))],
392                    else_: Some(Expression::number(0)),
393                    comments: Vec::new(),
394                    inferred_type: None,
395                }));
396                Ok(Expression::Sum(Box::new(AggFunc {
397                    ignore_nulls: None,
398                    having_max: None,
399                    this: case_expr,
400                    distinct: f.distinct,
401                    filter: f.filter,
402                    order_by: Vec::new(),
403                    name: None,
404                    limit: None,
405                    inferred_type: None,
406                })))
407            }
408
409            // APPROX_COUNT_DISTINCT is native in StarRocks
410            "APPROX_COUNT_DISTINCT" => Ok(Expression::AggregateFunction(f)),
411
412            // Pass through everything else
413            _ => Ok(Expression::AggregateFunction(f)),
414        }
415    }
416
417    fn transform_cast(&self, c: Cast) -> Result<Expression> {
418        // StarRocks: CAST(x AS TIMESTAMP/TIMESTAMPTZ) -> TIMESTAMP(x) function
419        // Similar to MySQL behavior
420        match &c.to {
421            crate::expressions::DataType::Timestamp { .. } => Ok(Expression::Function(Box::new(
422                Function::new("TIMESTAMP".to_string(), vec![c.this]),
423            ))),
424            crate::expressions::DataType::Custom { name }
425                if name.to_uppercase() == "TIMESTAMPTZ"
426                    || name.to_uppercase() == "TIMESTAMPLTZ" =>
427            {
428                Ok(Expression::Function(Box::new(Function::new(
429                    "TIMESTAMP".to_string(),
430                    vec![c.this],
431                ))))
432            }
433            // StarRocks type mappings are handled in the generator
434            _ => Ok(Expression::Cast(Box::new(c))),
435        }
436    }
437
438    /// Transform LATERAL UNNEST for StarRocks
439    /// StarRocks requires UNNEST to have a default column alias of "unnest" if not specified.
440    /// Python reference: starrocks.py _parse_unnest
441    fn transform_lateral(&self, l: &mut Box<Lateral>) -> Result<()> {
442        // Check if the lateral expression contains UNNEST
443        if let Expression::Unnest(_) = &*l.this {
444            // If there's a table alias but no column aliases, add "unnest" as default column
445            if l.alias.is_some() && l.column_aliases.is_empty() {
446                l.column_aliases.push("unnest".to_string());
447            }
448            // If there's no alias at all, add both table alias "unnest" and column alias "unnest"
449            else if l.alias.is_none() {
450                l.alias = Some("unnest".to_string());
451                l.column_aliases.push("unnest".to_string());
452            }
453        }
454        Ok(())
455    }
456}