Skip to main content

polyglot_sql/dialects/
starrocks.rs

1//! StarRocks Dialect
2//!
3//! StarRocks-specific transformations based on sqlglot patterns.
4//! StarRocks is MySQL-compatible with OLAP extensions (similar to Doris).
5
6use super::{DialectImpl, DialectType};
7use crate::error::Result;
8use crate::expressions::{AggFunc, Case, Cast, Expression, Function, Lateral, VarArgFunc};
9use crate::generator::GeneratorConfig;
10use crate::tokens::TokenizerConfig;
11
12/// StarRocks dialect
13pub struct StarRocksDialect;
14
15impl DialectImpl for StarRocksDialect {
16    fn dialect_type(&self) -> DialectType {
17        DialectType::StarRocks
18    }
19
20    fn tokenizer_config(&self) -> TokenizerConfig {
21        use crate::tokens::TokenType;
22        let mut config = TokenizerConfig::default();
23        // StarRocks uses backticks for identifiers (MySQL-style)
24        config.identifiers.insert('`', '`');
25        // Remove double quotes from identifiers (MySQL-style)
26        config.identifiers.remove(&'"');
27        config.quotes.insert("\"".to_string(), "\"".to_string());
28        config.nested_comments = false;
29        // LARGEINT maps to INT128
30        config.keywords.insert("LARGEINT".to_string(), TokenType::Int128);
31        config
32    }
33
34    fn generator_config(&self) -> GeneratorConfig {
35        use crate::generator::IdentifierQuoteStyle;
36        GeneratorConfig {
37            identifier_quote: '`',
38            identifier_quote_style: IdentifierQuoteStyle::BACKTICK,
39            dialect: Some(DialectType::StarRocks),
40            // StarRocks: INSERT OVERWRITE (without TABLE keyword)
41            insert_overwrite: " OVERWRITE",
42            // StarRocks: PROPERTIES prefix for WITH properties
43            with_properties_prefix: "PROPERTIES",
44            // StarRocks uses MySQL-style settings
45            null_ordering_supported: false,
46            limit_only_literals: true,
47            semi_anti_join_with_side: false,
48            supports_table_alias_columns: false,
49            values_as_table: false,
50            tablesample_requires_parens: false,
51            tablesample_with_method: false,
52            aggregate_filter_supported: false,
53            try_supported: false,
54            supports_convert_timezone: false,
55            supports_uescape: false,
56            supports_between_flags: false,
57            query_hints: false,
58            parameter_token: "?",
59            supports_window_exclude: false,
60            supports_exploding_projections: false,
61            // StarRocks: COMMENT 'value' (naked property, no = sign)
62            schema_comment_with_eq: false,
63            ..Default::default()
64        }
65    }
66
67    fn transform_expr(&self, expr: Expression) -> Result<Expression> {
68        match expr {
69            // IFNULL is native in StarRocks (MySQL-style)
70            Expression::IfNull(f) => Ok(Expression::IfNull(f)),
71
72            // NVL -> IFNULL in StarRocks
73            Expression::Nvl(f) => Ok(Expression::IfNull(f)),
74
75            // TryCast -> not directly supported, use CAST
76            Expression::TryCast(c) => Ok(Expression::Cast(c)),
77
78            // SafeCast -> CAST in StarRocks
79            Expression::SafeCast(c) => Ok(Expression::Cast(c)),
80
81            // CountIf -> SUM(CASE WHEN condition THEN 1 ELSE 0 END)
82            Expression::CountIf(f) => {
83                let case_expr = Expression::Case(Box::new(Case {
84                    operand: None,
85                    whens: vec![(f.this.clone(), Expression::number(1))],
86                    else_: Some(Expression::number(0)),
87                }));
88                Ok(Expression::Sum(Box::new(AggFunc { ignore_nulls: None, having_max: None,
89                    this: case_expr,
90                    distinct: f.distinct,
91                    filter: f.filter,
92                    order_by: Vec::new(),
93                name: None,
94                limit: None,
95                })))
96            }
97
98            // RAND is native in StarRocks
99            Expression::Rand(r) => Ok(Expression::Rand(r)),
100
101            // JSON arrow syntax: preserve -> for StarRocks (arrow_json_extract_sql)
102            Expression::JsonExtract(mut f) => {
103                // Set arrow_syntax to true to preserve -> operator
104                f.arrow_syntax = true;
105                Ok(Expression::JsonExtract(f))
106            }
107
108            Expression::JsonExtractScalar(mut f) => {
109                // Set arrow_syntax to true to preserve ->> operator
110                f.arrow_syntax = true;
111                Ok(Expression::JsonExtractScalar(f))
112            }
113
114            // Generic function transformations
115            Expression::Function(f) => self.transform_function(*f),
116
117            // Generic aggregate function transformations
118            Expression::AggregateFunction(f) => self.transform_aggregate_function(f),
119
120            // Cast transformations
121            Expression::Cast(c) => self.transform_cast(*c),
122
123            // Handle LATERAL UNNEST - StarRocks requires column alias "unnest" by default
124            Expression::Lateral(mut l) => {
125                self.transform_lateral(&mut l)?;
126                Ok(Expression::Lateral(l))
127            }
128
129            // Pass through everything else
130            _ => Ok(expr),
131        }
132    }
133}
134
135impl StarRocksDialect {
136    fn transform_function(&self, f: Function) -> Result<Expression> {
137        let name_upper = f.name.to_uppercase();
138        match name_upper.as_str() {
139            // NVL -> IFNULL
140            "NVL" if f.args.len() == 2 => Ok(Expression::Function(Box::new(Function::new(
141                "IFNULL".to_string(),
142                f.args,
143            )))),
144
145            // ISNULL -> IFNULL
146            "ISNULL" if f.args.len() == 2 => Ok(Expression::Function(Box::new(Function::new(
147                "IFNULL".to_string(),
148                f.args,
149            )))),
150
151            // COALESCE is native in StarRocks
152            "COALESCE" => Ok(Expression::Coalesce(Box::new(VarArgFunc { original_name: None,
153                expressions: f.args,
154            }))),
155
156            // NOW is native in StarRocks
157            "NOW" => Ok(Expression::CurrentTimestamp(
158                crate::expressions::CurrentTimestamp { precision: None, sysdate: false },
159            )),
160
161            // GETDATE -> NOW in StarRocks
162            "GETDATE" => Ok(Expression::CurrentTimestamp(
163                crate::expressions::CurrentTimestamp { precision: None, sysdate: false },
164            )),
165
166            // GROUP_CONCAT is native in StarRocks
167            "GROUP_CONCAT" => Ok(Expression::Function(Box::new(f))),
168
169            // STRING_AGG -> GROUP_CONCAT
170            "STRING_AGG" if !f.args.is_empty() => Ok(Expression::Function(Box::new(Function::new(
171                "GROUP_CONCAT".to_string(),
172                f.args,
173            )))),
174
175            // LISTAGG -> GROUP_CONCAT
176            "LISTAGG" if !f.args.is_empty() => Ok(Expression::Function(Box::new(Function::new(
177                "GROUP_CONCAT".to_string(),
178                f.args,
179            )))),
180
181            // SUBSTR is native in StarRocks
182            "SUBSTR" => Ok(Expression::Function(Box::new(f))),
183
184            // SUBSTRING is native in StarRocks
185            "SUBSTRING" => Ok(Expression::Function(Box::new(f))),
186
187            // LENGTH is native in StarRocks
188            "LENGTH" => Ok(Expression::Function(Box::new(f))),
189
190            // LEN -> LENGTH
191            "LEN" if f.args.len() == 1 => Ok(Expression::Function(Box::new(Function::new(
192                "LENGTH".to_string(),
193                f.args,
194            )))),
195
196            // CHARINDEX -> INSTR in StarRocks (with swapped args)
197            "CHARINDEX" if f.args.len() >= 2 => {
198                let mut args = f.args;
199                let substring = args.remove(0);
200                let string = args.remove(0);
201                Ok(Expression::Function(Box::new(Function::new(
202                    "INSTR".to_string(),
203                    vec![string, substring],
204                ))))
205            }
206
207            // STRPOS -> INSTR
208            "STRPOS" if f.args.len() >= 2 => Ok(Expression::Function(Box::new(Function::new(
209                "INSTR".to_string(),
210                f.args,
211            )))),
212
213            // DATE_TRUNC is native in StarRocks
214            "DATE_TRUNC" => Ok(Expression::Function(Box::new(f))),
215
216            // ARRAY_AGG is native in StarRocks
217            "ARRAY_AGG" => Ok(Expression::Function(Box::new(f))),
218
219            // COLLECT_LIST -> ARRAY_AGG
220            "COLLECT_LIST" if !f.args.is_empty() => Ok(Expression::Function(Box::new(
221                Function::new("ARRAY_AGG".to_string(), f.args),
222            ))),
223
224            // ARRAY_JOIN is native in StarRocks
225            "ARRAY_JOIN" => Ok(Expression::Function(Box::new(f))),
226
227            // ARRAY_FLATTEN is native in StarRocks
228            "ARRAY_FLATTEN" => Ok(Expression::Function(Box::new(f))),
229
230            // FLATTEN -> ARRAY_FLATTEN
231            "FLATTEN" if f.args.len() == 1 => Ok(Expression::Function(Box::new(Function::new(
232                "ARRAY_FLATTEN".to_string(),
233                f.args,
234            )))),
235
236            // TO_DATE is native in StarRocks
237            "TO_DATE" => Ok(Expression::Function(Box::new(f))),
238
239            // DATE_FORMAT is native in StarRocks
240            "DATE_FORMAT" => Ok(Expression::Function(Box::new(f))),
241
242            // strftime -> DATE_FORMAT
243            "STRFTIME" if f.args.len() >= 2 => {
244                let mut args = f.args;
245                let format = args.remove(0);
246                let date = args.remove(0);
247                Ok(Expression::Function(Box::new(Function::new(
248                    "DATE_FORMAT".to_string(),
249                    vec![date, format],
250                ))))
251            }
252
253            // TO_CHAR -> DATE_FORMAT
254            "TO_CHAR" if f.args.len() >= 2 => Ok(Expression::Function(Box::new(Function::new(
255                "DATE_FORMAT".to_string(),
256                f.args,
257            )))),
258
259            // JSON_EXTRACT -> arrow operator in StarRocks
260            "JSON_EXTRACT" => Ok(Expression::Function(Box::new(f))),
261
262            // GET_JSON_OBJECT -> JSON_EXTRACT
263            "GET_JSON_OBJECT" if f.args.len() == 2 => Ok(Expression::Function(Box::new(
264                Function::new("JSON_EXTRACT".to_string(), f.args),
265            ))),
266
267            // REGEXP is native in StarRocks
268            "REGEXP" => Ok(Expression::Function(Box::new(f))),
269
270            // RLIKE is native in StarRocks
271            "RLIKE" => Ok(Expression::Function(Box::new(f))),
272
273            // REGEXP_LIKE -> REGEXP
274            "REGEXP_LIKE" if f.args.len() >= 2 => Ok(Expression::Function(Box::new(Function::new(
275                "REGEXP".to_string(),
276                f.args,
277            )))),
278
279            // ARRAY_INTERSECTION -> ARRAY_INTERSECT
280            "ARRAY_INTERSECTION" => Ok(Expression::Function(Box::new(Function::new(
281                "ARRAY_INTERSECT".to_string(),
282                f.args,
283            )))),
284
285            // ST_MAKEPOINT -> ST_POINT
286            "ST_MAKEPOINT" if f.args.len() == 2 => Ok(Expression::Function(Box::new(Function::new(
287                "ST_POINT".to_string(),
288                f.args,
289            )))),
290
291            // ST_DISTANCE(a, b) -> ST_DISTANCE_SPHERE(ST_X(a), ST_Y(a), ST_X(b), ST_Y(b))
292            "ST_DISTANCE" if f.args.len() == 2 => {
293                let a = f.args[0].clone();
294                let b = f.args[1].clone();
295                Ok(Expression::Function(Box::new(Function::new(
296                    "ST_DISTANCE_SPHERE".to_string(),
297                    vec![
298                        Expression::Function(Box::new(Function::new("ST_X".to_string(), vec![a.clone()]))),
299                        Expression::Function(Box::new(Function::new("ST_Y".to_string(), vec![a]))),
300                        Expression::Function(Box::new(Function::new("ST_X".to_string(), vec![b.clone()]))),
301                        Expression::Function(Box::new(Function::new("ST_Y".to_string(), vec![b]))),
302                    ],
303                ))))
304            }
305
306            // Pass through everything else
307            _ => Ok(Expression::Function(Box::new(f))),
308        }
309    }
310
311    fn transform_aggregate_function(
312        &self,
313        f: Box<crate::expressions::AggregateFunction>,
314    ) -> Result<Expression> {
315        let name_upper = f.name.to_uppercase();
316        match name_upper.as_str() {
317            // COUNT_IF -> SUM(CASE WHEN...)
318            "COUNT_IF" if !f.args.is_empty() => {
319                let condition = f.args.into_iter().next().unwrap();
320                let case_expr = Expression::Case(Box::new(Case {
321                    operand: None,
322                    whens: vec![(condition, Expression::number(1))],
323                    else_: Some(Expression::number(0)),
324                }));
325                Ok(Expression::Sum(Box::new(AggFunc { ignore_nulls: None, having_max: None,
326                    this: case_expr,
327                    distinct: f.distinct,
328                    filter: f.filter,
329                    order_by: Vec::new(),
330                name: None,
331                limit: None,
332                })))
333            }
334
335            // APPROX_COUNT_DISTINCT is native in StarRocks
336            "APPROX_COUNT_DISTINCT" => Ok(Expression::AggregateFunction(f)),
337
338            // Pass through everything else
339            _ => Ok(Expression::AggregateFunction(f)),
340        }
341    }
342
343    fn transform_cast(&self, c: Cast) -> Result<Expression> {
344        // StarRocks type mappings are handled in the generator
345        Ok(Expression::Cast(Box::new(c)))
346    }
347
348    /// Transform LATERAL UNNEST for StarRocks
349    /// StarRocks requires UNNEST to have a default column alias of "unnest" if not specified.
350    /// Python reference: starrocks.py _parse_unnest
351    fn transform_lateral(&self, l: &mut Box<Lateral>) -> Result<()> {
352        // Check if the lateral expression contains UNNEST
353        if let Expression::Unnest(_) = &*l.this {
354            // If there's a table alias but no column aliases, add "unnest" as default column
355            if l.alias.is_some() && l.column_aliases.is_empty() {
356                l.column_aliases.push("unnest".to_string());
357            }
358            // If there's no alias at all, add both table alias "unnest" and column alias "unnest"
359            else if l.alias.is_none() {
360                l.alias = Some("unnest".to_string());
361                l.column_aliases.push("unnest".to_string());
362            }
363        }
364        Ok(())
365    }
366}