Skip to main content

polyglot_sql/dialects/
starrocks.rs

1//! StarRocks Dialect
2//!
3//! StarRocks-specific transformations based on sqlglot patterns.
4//! StarRocks is MySQL-compatible with OLAP extensions (similar to Doris).
5
6use super::{DialectImpl, DialectType};
7use crate::error::Result;
8use crate::expressions::{
9    AggFunc, Case, Cast, Expression, Function, Interval, IntervalUnit, IntervalUnitSpec, Lateral,
10    VarArgFunc,
11};
12#[cfg(feature = "generate")]
13use crate::generator::GeneratorConfig;
14use crate::tokens::TokenizerConfig;
15
16/// StarRocks dialect
17pub struct StarRocksDialect;
18
19impl DialectImpl for StarRocksDialect {
20    fn dialect_type(&self) -> DialectType {
21        DialectType::StarRocks
22    }
23
24    fn tokenizer_config(&self) -> TokenizerConfig {
25        use crate::tokens::TokenType;
26        let mut config = TokenizerConfig::default();
27        // StarRocks uses backticks for identifiers (MySQL-style)
28        config.identifiers.insert('`', '`');
29        // Remove double quotes from identifiers (MySQL-style)
30        config.identifiers.remove(&'"');
31        config.quotes.insert("\"".to_string(), "\"".to_string());
32        config.nested_comments = false;
33        // LARGEINT maps to INT128
34        config
35            .keywords
36            .insert("LARGEINT".to_string(), TokenType::Int128);
37        config
38    }
39
40    #[cfg(feature = "generate")]
41
42    fn generator_config(&self) -> GeneratorConfig {
43        use crate::generator::IdentifierQuoteStyle;
44        GeneratorConfig {
45            identifier_quote: '`',
46            identifier_quote_style: IdentifierQuoteStyle::BACKTICK,
47            dialect: Some(DialectType::StarRocks),
48            // StarRocks: INSERT OVERWRITE (without TABLE keyword)
49            insert_overwrite: " OVERWRITE",
50            // StarRocks: PROPERTIES prefix for WITH properties
51            with_properties_prefix: "PROPERTIES",
52            // StarRocks uses MySQL-style settings
53            null_ordering_supported: false,
54            limit_only_literals: true,
55            semi_anti_join_with_side: false,
56            supports_table_alias_columns: false,
57            values_as_table: false,
58            tablesample_requires_parens: false,
59            tablesample_with_method: false,
60            aggregate_filter_supported: false,
61            try_supported: false,
62            supports_convert_timezone: false,
63            supports_uescape: false,
64            supports_between_flags: false,
65            query_hints: false,
66            parameter_token: "?",
67            supports_window_exclude: false,
68            supports_exploding_projections: false,
69            // StarRocks: COMMENT 'value' (naked property, no = sign)
70            schema_comment_with_eq: false,
71            ..Default::default()
72        }
73    }
74
75    #[cfg(feature = "transpile")]
76
77    fn transform_expr(&self, expr: Expression) -> Result<Expression> {
78        match expr {
79            // IFNULL is native in StarRocks (MySQL-style)
80            Expression::IfNull(f) => Ok(Expression::IfNull(f)),
81
82            // NVL -> IFNULL in StarRocks
83            Expression::Nvl(f) => Ok(Expression::IfNull(f)),
84
85            // TryCast -> not directly supported, use CAST
86            Expression::TryCast(c) => Ok(Expression::Cast(c)),
87
88            // SafeCast -> CAST in StarRocks
89            Expression::SafeCast(c) => Ok(Expression::Cast(c)),
90
91            // CountIf -> SUM(CASE WHEN condition THEN 1 ELSE 0 END)
92            Expression::CountIf(f) => {
93                let case_expr = Expression::Case(Box::new(Case {
94                    operand: None,
95                    whens: vec![(f.this.clone(), Expression::number(1))],
96                    else_: Some(Expression::number(0)),
97                    comments: Vec::new(),
98                    inferred_type: None,
99                }));
100                Ok(Expression::Sum(Box::new(AggFunc {
101                    ignore_nulls: None,
102                    having_max: None,
103                    this: case_expr,
104                    distinct: f.distinct,
105                    filter: f.filter,
106                    order_by: Vec::new(),
107                    name: None,
108                    limit: None,
109                    inferred_type: None,
110                })))
111            }
112
113            // RAND is native in StarRocks
114            Expression::Rand(r) => Ok(Expression::Rand(r)),
115
116            // JSON arrow syntax: preserve -> for StarRocks (arrow_json_extract_sql)
117            Expression::JsonExtract(mut f) => {
118                // Set arrow_syntax to true to preserve -> operator
119                f.arrow_syntax = true;
120                Ok(Expression::JsonExtract(f))
121            }
122
123            Expression::JsonExtractScalar(mut f) => {
124                // Set arrow_syntax to true to preserve ->> operator
125                f.arrow_syntax = true;
126                Ok(Expression::JsonExtractScalar(f))
127            }
128
129            // Generic function transformations
130            Expression::Function(f) => self.transform_function(*f),
131
132            // Generic aggregate function transformations
133            Expression::AggregateFunction(f) => self.transform_aggregate_function(f),
134
135            // Cast transformations
136            Expression::Cast(c) => self.transform_cast(*c),
137
138            // Handle LATERAL UNNEST - StarRocks requires column alias "unnest" by default
139            Expression::Lateral(mut l) => {
140                self.transform_lateral(&mut l)?;
141                Ok(Expression::Lateral(l))
142            }
143
144            // Pass through everything else
145            _ => Ok(expr),
146        }
147    }
148}
149
150#[cfg(feature = "transpile")]
151impl StarRocksDialect {
152    fn wrap_day_interval(expr: Expression) -> Expression {
153        Expression::Interval(Box::new(Interval {
154            this: Some(expr),
155            unit: Some(IntervalUnitSpec::Simple {
156                unit: IntervalUnit::Day,
157                use_plural: false,
158            }),
159        }))
160    }
161
162    fn transform_function(&self, f: Function) -> Result<Expression> {
163        let name_upper = f.name.to_uppercase();
164        match name_upper.as_str() {
165            // NVL -> IFNULL
166            "NVL" if f.args.len() == 2 => Ok(Expression::Function(Box::new(Function::new(
167                "IFNULL".to_string(),
168                f.args,
169            )))),
170
171            // ISNULL -> IFNULL
172            "ISNULL" if f.args.len() == 2 => Ok(Expression::Function(Box::new(Function::new(
173                "IFNULL".to_string(),
174                f.args,
175            )))),
176
177            // COALESCE is native in StarRocks
178            "COALESCE" => Ok(Expression::Coalesce(Box::new(VarArgFunc {
179                original_name: None,
180                expressions: f.args,
181                inferred_type: None,
182            }))),
183
184            // NOW is native in StarRocks
185            "NOW" => Ok(Expression::CurrentTimestamp(
186                crate::expressions::CurrentTimestamp {
187                    precision: None,
188                    sysdate: false,
189                },
190            )),
191
192            // GETDATE -> NOW in StarRocks
193            "GETDATE" => Ok(Expression::CurrentTimestamp(
194                crate::expressions::CurrentTimestamp {
195                    precision: None,
196                    sysdate: false,
197                },
198            )),
199
200            // GROUP_CONCAT is native in StarRocks
201            "GROUP_CONCAT" => Ok(Expression::Function(Box::new(f))),
202
203            // STRING_AGG -> GROUP_CONCAT
204            "STRING_AGG" if !f.args.is_empty() => Ok(Expression::Function(Box::new(
205                Function::new("GROUP_CONCAT".to_string(), f.args),
206            ))),
207
208            // LISTAGG -> GROUP_CONCAT
209            "LISTAGG" if !f.args.is_empty() => Ok(Expression::Function(Box::new(Function::new(
210                "GROUP_CONCAT".to_string(),
211                f.args,
212            )))),
213
214            // SUBSTR is native in StarRocks
215            "SUBSTR" => Ok(Expression::Function(Box::new(f))),
216
217            // SUBSTRING is native in StarRocks
218            "SUBSTRING" => Ok(Expression::Function(Box::new(f))),
219
220            // LENGTH is native in StarRocks
221            "LENGTH" => Ok(Expression::Function(Box::new(f))),
222
223            // LEN -> LENGTH
224            "LEN" if f.args.len() == 1 => Ok(Expression::Function(Box::new(Function::new(
225                "LENGTH".to_string(),
226                f.args,
227            )))),
228
229            // CHARINDEX -> INSTR in StarRocks (with swapped args)
230            "CHARINDEX" if f.args.len() >= 2 => {
231                let mut args = f.args;
232                let substring = args.remove(0);
233                let string = args.remove(0);
234                Ok(Expression::Function(Box::new(Function::new(
235                    "INSTR".to_string(),
236                    vec![string, substring],
237                ))))
238            }
239
240            // STRPOS -> INSTR
241            "STRPOS" if f.args.len() >= 2 => Ok(Expression::Function(Box::new(Function::new(
242                "INSTR".to_string(),
243                f.args,
244            )))),
245
246            // DATE_TRUNC is native in StarRocks
247            "DATE_TRUNC" => Ok(Expression::Function(Box::new(f))),
248
249            // StarRocks normalizes MySQL-style day shorthand to INTERVAL syntax.
250            "DATE_ADD" if f.args.len() == 2 && !matches!(f.args[1], Expression::Interval(_)) => {
251                let mut args = f.args;
252                let date = args.remove(0);
253                let days = args.remove(0);
254                Ok(Expression::Function(Box::new(Function::new(
255                    "DATE_ADD".to_string(),
256                    vec![date, Self::wrap_day_interval(days)],
257                ))))
258            }
259            "DATE_SUB" if f.args.len() == 2 && !matches!(f.args[1], Expression::Interval(_)) => {
260                let mut args = f.args;
261                let date = args.remove(0);
262                let days = args.remove(0);
263                Ok(Expression::Function(Box::new(Function::new(
264                    "DATE_SUB".to_string(),
265                    vec![date, Self::wrap_day_interval(days)],
266                ))))
267            }
268            "ADDDATE" if f.args.len() == 2 && !matches!(f.args[1], Expression::Interval(_)) => {
269                let mut args = f.args;
270                let date = args.remove(0);
271                let days = args.remove(0);
272                Ok(Expression::Function(Box::new(Function::new(
273                    "DATE_ADD".to_string(),
274                    vec![date, Self::wrap_day_interval(days)],
275                ))))
276            }
277            "SUBDATE" if f.args.len() == 2 && !matches!(f.args[1], Expression::Interval(_)) => {
278                let mut args = f.args;
279                let date = args.remove(0);
280                let days = args.remove(0);
281                Ok(Expression::Function(Box::new(Function::new(
282                    "DATE_SUB".to_string(),
283                    vec![date, Self::wrap_day_interval(days)],
284                ))))
285            }
286
287            // ARRAY_AGG is native in StarRocks
288            "ARRAY_AGG" => Ok(Expression::Function(Box::new(f))),
289
290            // COLLECT_LIST -> ARRAY_AGG
291            "COLLECT_LIST" if !f.args.is_empty() => Ok(Expression::Function(Box::new(
292                Function::new("ARRAY_AGG".to_string(), f.args),
293            ))),
294
295            // ARRAY_JOIN is native in StarRocks
296            "ARRAY_JOIN" => Ok(Expression::Function(Box::new(f))),
297
298            // ARRAY_FLATTEN is native in StarRocks
299            "ARRAY_FLATTEN" => Ok(Expression::Function(Box::new(f))),
300
301            // FLATTEN -> ARRAY_FLATTEN
302            "FLATTEN" if f.args.len() == 1 => Ok(Expression::Function(Box::new(Function::new(
303                "ARRAY_FLATTEN".to_string(),
304                f.args,
305            )))),
306
307            // TO_DATE is native in StarRocks
308            "TO_DATE" => Ok(Expression::Function(Box::new(f))),
309
310            // DATE_FORMAT is native in StarRocks
311            "DATE_FORMAT" => Ok(Expression::Function(Box::new(f))),
312
313            // strftime -> DATE_FORMAT
314            "STRFTIME" if f.args.len() >= 2 => {
315                let mut args = f.args;
316                let format = args.remove(0);
317                let date = args.remove(0);
318                Ok(Expression::Function(Box::new(Function::new(
319                    "DATE_FORMAT".to_string(),
320                    vec![date, format],
321                ))))
322            }
323
324            // TO_CHAR -> DATE_FORMAT
325            "TO_CHAR" if f.args.len() >= 2 => Ok(Expression::Function(Box::new(Function::new(
326                "DATE_FORMAT".to_string(),
327                f.args,
328            )))),
329
330            // JSON_EXTRACT -> arrow operator in StarRocks
331            "JSON_EXTRACT" => Ok(Expression::Function(Box::new(f))),
332
333            // GET_JSON_OBJECT -> JSON_EXTRACT
334            "GET_JSON_OBJECT" if f.args.len() == 2 => Ok(Expression::Function(Box::new(
335                Function::new("JSON_EXTRACT".to_string(), f.args),
336            ))),
337
338            // REGEXP is native in StarRocks
339            "REGEXP" => Ok(Expression::Function(Box::new(f))),
340
341            // RLIKE is native in StarRocks
342            "RLIKE" => Ok(Expression::Function(Box::new(f))),
343
344            // REGEXP_LIKE -> REGEXP
345            "REGEXP_LIKE" if f.args.len() >= 2 => Ok(Expression::Function(Box::new(
346                Function::new("REGEXP".to_string(), f.args),
347            ))),
348
349            // ARRAY_INTERSECTION -> ARRAY_INTERSECT
350            "ARRAY_INTERSECTION" => Ok(Expression::Function(Box::new(Function::new(
351                "ARRAY_INTERSECT".to_string(),
352                f.args,
353            )))),
354
355            // ST_MAKEPOINT -> ST_POINT
356            "ST_MAKEPOINT" if f.args.len() == 2 => Ok(Expression::Function(Box::new(
357                Function::new("ST_POINT".to_string(), f.args),
358            ))),
359
360            // ST_DISTANCE(a, b) -> ST_DISTANCE_SPHERE(ST_X(a), ST_Y(a), ST_X(b), ST_Y(b))
361            "ST_DISTANCE" if f.args.len() == 2 => {
362                let a = f.args[0].clone();
363                let b = f.args[1].clone();
364                Ok(Expression::Function(Box::new(Function::new(
365                    "ST_DISTANCE_SPHERE".to_string(),
366                    vec![
367                        Expression::Function(Box::new(Function::new(
368                            "ST_X".to_string(),
369                            vec![a.clone()],
370                        ))),
371                        Expression::Function(Box::new(Function::new("ST_Y".to_string(), vec![a]))),
372                        Expression::Function(Box::new(Function::new(
373                            "ST_X".to_string(),
374                            vec![b.clone()],
375                        ))),
376                        Expression::Function(Box::new(Function::new("ST_Y".to_string(), vec![b]))),
377                    ],
378                ))))
379            }
380
381            // Pass through everything else
382            _ => Ok(Expression::Function(Box::new(f))),
383        }
384    }
385
386    fn transform_aggregate_function(
387        &self,
388        f: Box<crate::expressions::AggregateFunction>,
389    ) -> Result<Expression> {
390        let name_upper = f.name.to_uppercase();
391        match name_upper.as_str() {
392            // COUNT_IF -> SUM(CASE WHEN...)
393            "COUNT_IF" if !f.args.is_empty() => {
394                let condition = f.args.into_iter().next().unwrap();
395                let case_expr = Expression::Case(Box::new(Case {
396                    operand: None,
397                    whens: vec![(condition, Expression::number(1))],
398                    else_: Some(Expression::number(0)),
399                    comments: Vec::new(),
400                    inferred_type: None,
401                }));
402                Ok(Expression::Sum(Box::new(AggFunc {
403                    ignore_nulls: None,
404                    having_max: None,
405                    this: case_expr,
406                    distinct: f.distinct,
407                    filter: f.filter,
408                    order_by: Vec::new(),
409                    name: None,
410                    limit: None,
411                    inferred_type: None,
412                })))
413            }
414
415            // APPROX_COUNT_DISTINCT is native in StarRocks
416            "APPROX_COUNT_DISTINCT" => Ok(Expression::AggregateFunction(f)),
417
418            // Pass through everything else
419            _ => Ok(Expression::AggregateFunction(f)),
420        }
421    }
422
423    fn transform_cast(&self, c: Cast) -> Result<Expression> {
424        // StarRocks: CAST(x AS TIMESTAMP/TIMESTAMPTZ) -> TIMESTAMP(x) function
425        // Similar to MySQL behavior
426        match &c.to {
427            crate::expressions::DataType::Timestamp { .. } => Ok(Expression::Function(Box::new(
428                Function::new("TIMESTAMP".to_string(), vec![c.this]),
429            ))),
430            crate::expressions::DataType::Custom { name }
431                if name.to_uppercase() == "TIMESTAMPTZ"
432                    || name.to_uppercase() == "TIMESTAMPLTZ" =>
433            {
434                Ok(Expression::Function(Box::new(Function::new(
435                    "TIMESTAMP".to_string(),
436                    vec![c.this],
437                ))))
438            }
439            // StarRocks type mappings are handled in the generator
440            _ => Ok(Expression::Cast(Box::new(c))),
441        }
442    }
443
444    /// Transform LATERAL UNNEST for StarRocks
445    /// StarRocks requires UNNEST to have a default column alias of "unnest" if not specified.
446    /// Python reference: starrocks.py _parse_unnest
447    fn transform_lateral(&self, l: &mut Box<Lateral>) -> Result<()> {
448        // Check if the lateral expression contains UNNEST
449        if let Expression::Unnest(_) = &*l.this {
450            // If there's a table alias but no column aliases, add "unnest" as default column
451            if l.alias.is_some() && l.column_aliases.is_empty() {
452                l.column_aliases.push("unnest".to_string());
453            }
454            // If there's no alias at all, add both table alias "unnest" and column alias "unnest"
455            else if l.alias.is_none() {
456                l.alias = Some("unnest".to_string());
457                l.column_aliases.push("unnest".to_string());
458            }
459        }
460        Ok(())
461    }
462}