Skip to main content

polyglot_sql/dialects/
clickhouse.rs

1//! ClickHouse Dialect
2//!
3//! ClickHouse-specific transformations based on sqlglot patterns.
4//! ClickHouse is case-sensitive and has unique function naming conventions.
5
6use super::{DialectImpl, DialectType};
7use crate::error::Result;
8use crate::expressions::{
9    AggregateFunction, BinaryOp, Case, Cast, Expression, Function, In, IsNull, LikeOp,
10    MapConstructor, Paren, UnaryOp,
11};
12use crate::generator::GeneratorConfig;
13use crate::tokens::TokenizerConfig;
14
15/// ClickHouse dialect
16pub struct ClickHouseDialect;
17
18impl DialectImpl for ClickHouseDialect {
19    fn dialect_type(&self) -> DialectType {
20        DialectType::ClickHouse
21    }
22
23    fn tokenizer_config(&self) -> TokenizerConfig {
24        let mut config = TokenizerConfig::default();
25        // ClickHouse uses double quotes and backticks for identifiers
26        config.identifiers.insert('"', '"');
27        config.identifiers.insert('`', '`');
28        // ClickHouse supports nested comments
29        config.nested_comments = true;
30        // ClickHouse allows identifiers to start with digits
31        config.identifiers_can_start_with_digit = true;
32        // ClickHouse uses backslash escaping in strings
33        config.string_escapes.push('\\');
34        // ClickHouse supports # as single-line comment
35        config.hash_comments = true;
36        // ClickHouse allows $ in identifiers
37        config.dollar_sign_is_identifier = true;
38        // ClickHouse: INSERT ... FORMAT <name> is followed by raw data
39        config.insert_format_raw_data = true;
40        // ClickHouse supports 0xDEADBEEF hex integer literals
41        config.hex_number_strings = true;
42        config.hex_string_is_integer_type = true;
43        // ClickHouse allows underscores as digit separators in numeric literals
44        config.numbers_can_be_underscore_separated = true;
45        config
46    }
47
48    fn generator_config(&self) -> GeneratorConfig {
49        use crate::generator::{IdentifierQuoteStyle, NormalizeFunctions};
50        GeneratorConfig {
51            identifier_quote: '"',
52            identifier_quote_style: IdentifierQuoteStyle::DOUBLE_QUOTE,
53            dialect: Some(DialectType::ClickHouse),
54            // ClickHouse uses uppercase keywords (matching Python SQLGlot behavior)
55            uppercase_keywords: true,
56            // ClickHouse function names are case-sensitive and typically camelCase
57            normalize_functions: NormalizeFunctions::None,
58            // ClickHouse identifiers are case-sensitive
59            case_sensitive_identifiers: true,
60            tablesample_keywords: "SAMPLE",
61            tablesample_requires_parens: false,
62            identifiers_can_start_with_digit: true,
63            // ClickHouse uses bracket-only notation for arrays: [1, 2, 3]
64            array_bracket_only: true,
65            ..Default::default()
66        }
67    }
68
69    fn transform_expr(&self, expr: Expression) -> Result<Expression> {
70        let wrap_predicate_left = |expr: Expression| -> Expression {
71            let needs_parens = matches!(
72                expr,
73                Expression::Add(_)
74                    | Expression::Sub(_)
75                    | Expression::Mul(_)
76                    | Expression::Div(_)
77                    | Expression::Mod(_)
78                    | Expression::Concat(_)
79                    | Expression::And(_)
80                    | Expression::Or(_)
81                    | Expression::Not(_)
82                    | Expression::Case(_)
83            );
84
85            if needs_parens {
86                Expression::Paren(Box::new(Paren {
87                    this: expr,
88                    trailing_comments: Vec::new(),
89                }))
90            } else {
91                expr
92            }
93        };
94
95        let wrap_not_target = |expr: Expression| -> Expression {
96            match expr {
97                Expression::Paren(_) => expr,
98                Expression::In(_)
99                | Expression::Between(_)
100                | Expression::Is(_)
101                | Expression::IsNull(_)
102                | Expression::IsTrue(_)
103                | Expression::IsFalse(_)
104                | Expression::IsJson(_)
105                | Expression::Like(_)
106                | Expression::ILike(_)
107                | Expression::SimilarTo(_)
108                | Expression::Glob(_)
109                | Expression::RegexpLike(_)
110                | Expression::RegexpILike(_)
111                | Expression::MemberOf(_) => Expression::Paren(Box::new(Paren {
112                    this: expr,
113                    trailing_comments: Vec::new(),
114                })),
115                _ => expr,
116            }
117        };
118
119        let unwrap_in_array = |mut expressions: Vec<Expression>,
120                               query: &Option<Expression>,
121                               unnest: &Option<Box<Expression>>|
122         -> Vec<Expression> {
123            if query.is_none() && unnest.is_none() && expressions.len() == 1 {
124                if matches!(expressions[0], Expression::ArrayFunc(_)) {
125                    if let Expression::ArrayFunc(arr) = expressions.remove(0) {
126                        return arr.expressions;
127                    }
128                }
129            }
130            expressions
131        };
132
133        match expr {
134            // TryCast stays as TryCast (ClickHouse doesn't have TRY_CAST by default)
135            // But we can emulate with toXOrNull functions
136            Expression::TryCast(c) => {
137                // For simplicity, just use regular CAST
138                // ClickHouse has toXOrNull/toXOrZero functions for safe casts
139                Ok(Expression::Cast(c))
140            }
141
142            // SafeCast -> CAST in ClickHouse
143            Expression::SafeCast(c) => Ok(Expression::Cast(c)),
144
145            // CountIf is native in ClickHouse (lowercase)
146            Expression::CountIf(f) => Ok(Expression::Function(Box::new(Function::new(
147                "countIf".to_string(),
148                vec![f.this],
149            )))),
150
151            // UNNEST -> arrayJoin in ClickHouse
152            Expression::Unnest(f) => Ok(Expression::Function(Box::new(Function::new(
153                "arrayJoin".to_string(),
154                vec![f.this],
155            )))),
156
157            // EXPLODE -> arrayJoin in ClickHouse
158            Expression::Explode(f) => Ok(Expression::Function(Box::new(Function::new(
159                "arrayJoin".to_string(),
160                vec![f.this],
161            )))),
162
163            // ExplodeOuter -> arrayJoin in ClickHouse
164            Expression::ExplodeOuter(f) => Ok(Expression::Function(Box::new(Function::new(
165                "arrayJoin".to_string(),
166                vec![f.this],
167            )))),
168
169            // RAND -> randCanonical() in ClickHouse
170            Expression::Rand(_) => Ok(Expression::Function(Box::new(Function::new(
171                "randCanonical".to_string(),
172                vec![],
173            )))),
174
175            // Random -> randCanonical() in ClickHouse
176            Expression::Random(_) => Ok(Expression::Function(Box::new(Function::new(
177                "randCanonical".to_string(),
178                vec![],
179            )))),
180
181            // startsWith -> startsWith
182            Expression::StartsWith(f) => Ok(Expression::Function(Box::new(Function::new(
183                "startsWith".to_string(),
184                vec![f.this, f.expression],
185            )))),
186
187            // endsWith -> endsWith
188            Expression::EndsWith(f) => Ok(Expression::Function(Box::new(Function::new(
189                "endsWith".to_string(),
190                vec![f.this, f.expression],
191            )))),
192
193            // ClickHouse prefers NOT (x IN (...)) over x NOT IN (...)
194            Expression::In(in_expr) if in_expr.not => {
195                if in_expr.global {
196                    return Ok(Expression::In(in_expr));
197                }
198                let In {
199                    this,
200                    expressions,
201                    query,
202                    unnest,
203                    global,
204                    is_field,
205                    ..
206                } = *in_expr;
207                let expressions = unwrap_in_array(expressions, &query, &unnest);
208                let base = Expression::In(Box::new(In {
209                    this: wrap_predicate_left(this),
210                    expressions,
211                    query,
212                    not: false,
213                    global,
214                    unnest,
215                    is_field,
216                }));
217                Ok(Expression::Not(Box::new(UnaryOp {
218                    this: wrap_not_target(base),
219                    inferred_type: None,
220                })))
221            }
222
223            // ClickHouse prefers NOT (x IS NULL) over x IS NOT NULL
224            Expression::IsNull(is_null) if is_null.not => {
225                let IsNull { this, .. } = *is_null;
226                let base = Expression::IsNull(Box::new(IsNull {
227                    this: wrap_predicate_left(this),
228                    not: false,
229                    postfix_form: false,
230                }));
231                Ok(Expression::Not(Box::new(UnaryOp {
232                    this: wrap_not_target(base),
233                    inferred_type: None,
234                })))
235            }
236
237            Expression::In(mut in_expr) => {
238                in_expr.expressions =
239                    unwrap_in_array(in_expr.expressions, &in_expr.query, &in_expr.unnest);
240                in_expr.this = wrap_predicate_left(in_expr.this);
241                Ok(Expression::In(in_expr))
242            }
243
244            Expression::IsNull(mut is_null) => {
245                is_null.this = wrap_predicate_left(is_null.this);
246                Ok(Expression::IsNull(is_null))
247            }
248
249            // IF(cond, true, false) -> CASE WHEN cond THEN true ELSE false END
250            Expression::IfFunc(f) => {
251                let f = *f;
252                Ok(Expression::Case(Box::new(Case {
253                    operand: None,
254                    whens: vec![(f.condition, f.true_value)],
255                    else_: f.false_value,
256                    comments: Vec::new(),
257                    inferred_type: None,
258                })))
259            }
260
261            Expression::Is(mut is_expr) => {
262                is_expr.left = wrap_predicate_left(is_expr.left);
263                Ok(Expression::Is(is_expr))
264            }
265
266            Expression::Or(op) => {
267                let BinaryOp {
268                    left,
269                    right,
270                    left_comments,
271                    operator_comments,
272                    trailing_comments,
273                    ..
274                } = *op;
275                let left = if matches!(left, Expression::And(_)) {
276                    Expression::Paren(Box::new(Paren {
277                        this: left,
278                        trailing_comments: Vec::new(),
279                    }))
280                } else {
281                    left
282                };
283                let right = if matches!(right, Expression::And(_)) {
284                    Expression::Paren(Box::new(Paren {
285                        this: right,
286                        trailing_comments: Vec::new(),
287                    }))
288                } else {
289                    right
290                };
291                Ok(Expression::Or(Box::new(BinaryOp {
292                    left,
293                    right,
294                    left_comments,
295                    operator_comments,
296                    trailing_comments,
297                    inferred_type: None,
298                })))
299            }
300
301            Expression::Not(op) => {
302                let inner = wrap_not_target(op.this);
303                Ok(Expression::Not(Box::new(UnaryOp {
304                    this: inner,
305                    inferred_type: None,
306                })))
307            }
308
309            Expression::MapFunc(map) if map.curly_brace_syntax => {
310                let MapConstructor { keys, values, .. } = *map;
311                let mut args = Vec::with_capacity(keys.len() * 2);
312                for (key, value) in keys.into_iter().zip(values.into_iter()) {
313                    args.push(key);
314                    args.push(value);
315                }
316                Ok(Expression::Function(Box::new(Function::new(
317                    "map".to_string(),
318                    args,
319                ))))
320            }
321
322            Expression::Insert(mut insert) => {
323                for row in insert.values.iter_mut() {
324                    for value in row.iter_mut() {
325                        if !matches!(value, Expression::Paren(_)) {
326                            let wrapped = Expression::Paren(Box::new(Paren {
327                                this: value.clone(),
328                                trailing_comments: Vec::new(),
329                            }));
330                            *value = wrapped;
331                        }
332                    }
333                }
334                Ok(Expression::Insert(insert))
335            }
336
337            // Generic function transformations
338            Expression::Function(f) => self.transform_function(*f),
339
340            // Generic aggregate function transformations
341            Expression::AggregateFunction(f) => self.transform_aggregate_function(f),
342
343            // Cast transformations
344            Expression::Cast(c) => self.transform_cast(*c),
345
346            // TYPEOF -> toTypeName in ClickHouse
347            Expression::Typeof(f) => Ok(Expression::Function(Box::new(Function::new(
348                "toTypeName".to_string(),
349                vec![f.this],
350            )))),
351
352            // Pass through everything else
353            _ => Ok(expr),
354        }
355    }
356}
357
358impl ClickHouseDialect {
359}
360
361impl ClickHouseDialect {
362    fn transform_function(&self, f: Function) -> Result<Expression> {
363        let name_upper = f.name.to_uppercase();
364        match name_upper.as_str() {
365            // UTCTimestamp() -> CURRENT_TIMESTAMP('UTC')
366            "UTCTIMESTAMP" => Ok(Expression::UtcTimestamp(Box::new(
367                crate::expressions::UtcTimestamp { this: None },
368            ))),
369
370            "CURRENTDATABASE" | "CURRENT_DATABASE" => Ok(Expression::Function(Box::new(
371                Function::new("CURRENT_DATABASE".to_string(), f.args),
372            ))),
373            "CURRENTSCHEMAS" | "CURRENT_SCHEMAS" => Ok(Expression::Function(Box::new(
374                Function::new("CURRENT_SCHEMAS".to_string(), f.args),
375            ))),
376            "LEVENSHTEIN" | "LEVENSHTEINDISTANCE" | "EDITDISTANCE" => Ok(Expression::Function(
377                Box::new(Function::new("editDistance".to_string(), f.args)),
378            )),
379            "CHAR" | "CHR" => Ok(Expression::Function(Box::new(Function::new(
380                "CHAR".to_string(),
381                f.args,
382            )))),
383            "STR_TO_DATE" => Ok(Expression::Function(Box::new(Function::new(
384                "STR_TO_DATE".to_string(),
385                f.args,
386            )))),
387            "JSONEXTRACTSTRING" => Ok(Expression::Function(Box::new(Function::new(
388                "JSONExtractString".to_string(),
389                f.args,
390            )))),
391            "MATCH" => Ok(Expression::Function(Box::new(Function::new(
392                "match".to_string(),
393                f.args,
394            )))),
395            "LIKE" if f.args.len() == 2 => {
396                let left = f.args[0].clone();
397                let right = f.args[1].clone();
398                Ok(Expression::Like(Box::new(LikeOp::new(left, right))))
399            }
400            "NOTLIKE" if f.args.len() == 2 => {
401                let left = f.args[0].clone();
402                let right = f.args[1].clone();
403                let like = Expression::Like(Box::new(LikeOp::new(left, right)));
404                Ok(Expression::Not(Box::new(UnaryOp {
405                    this: like,
406                    inferred_type: None,
407                })))
408            }
409            "ILIKE" if f.args.len() == 2 => {
410                let left = f.args[0].clone();
411                let right = f.args[1].clone();
412                Ok(Expression::ILike(Box::new(LikeOp::new(left, right))))
413            }
414            "AND" if f.args.len() >= 2 => {
415                let mut iter = f.args.into_iter();
416                let mut expr = iter.next().unwrap();
417                for arg in iter {
418                    expr = Expression::And(Box::new(BinaryOp::new(expr, arg)));
419                }
420                Ok(expr)
421            }
422            "OR" if f.args.len() >= 2 => {
423                let mut iter = f.args.into_iter();
424                let mut expr = iter.next().unwrap();
425                for arg in iter {
426                    expr = Expression::Or(Box::new(BinaryOp::new(expr, arg)));
427                }
428                self.transform_expr(expr)
429            }
430            // TYPEOF -> toTypeName in ClickHouse
431            "TYPEOF" => Ok(Expression::Function(Box::new(Function::new(
432                "toTypeName".to_string(),
433                f.args,
434            )))),
435
436            // DATE_TRUNC: ClickHouse uses dateTrunc (camelCase)
437            "DATE_TRUNC" | "DATETRUNC" => Ok(Expression::Function(Box::new(Function::new(
438                "dateTrunc".to_string(),
439                f.args,
440            )))),
441            "TOSTARTOFDAY" if f.args.len() == 1 => {
442                Ok(Expression::Function(Box::new(Function::new(
443                    "dateTrunc".to_string(),
444                    vec![Expression::string("DAY"), f.args[0].clone()],
445                ))))
446            }
447
448            // SUBSTRING_INDEX: preserve original case (substringIndex in ClickHouse)
449            "SUBSTRING_INDEX" => Ok(Expression::Function(Box::new(Function::new(
450                f.name.clone(),
451                f.args,
452            )))),
453
454            // IS_NAN / ISNAN -> isNaN (ClickHouse camelCase)
455            "IS_NAN" | "ISNAN" => Ok(Expression::Function(Box::new(Function::new(
456                "isNaN".to_string(),
457                f.args,
458            )))),
459
460            _ => Ok(Expression::Function(Box::new(f))),
461        }
462    }
463
464    fn transform_aggregate_function(
465        &self,
466        f: Box<crate::expressions::AggregateFunction>,
467    ) -> Result<Expression> {
468        let name_upper = f.name.to_uppercase();
469        match name_upper.as_str() {
470            // COUNT_IF -> countIf
471            "COUNT_IF" if !f.args.is_empty() => Ok(Expression::Function(Box::new(Function::new(
472                "countIf".to_string(),
473                f.args,
474            )))),
475
476            // SUM_IF -> sumIf
477            "SUM_IF" if !f.args.is_empty() => Ok(Expression::Function(Box::new(Function::new(
478                "sumIf".to_string(),
479                f.args,
480            )))),
481
482            // AVG_IF -> avgIf
483            "AVG_IF" if !f.args.is_empty() => Ok(Expression::Function(Box::new(Function::new(
484                "avgIf".to_string(),
485                f.args,
486            )))),
487
488            // ANY_VALUE -> any in ClickHouse
489            "ANY_VALUE" if !f.args.is_empty() => Ok(Expression::Function(Box::new(Function::new(
490                "any".to_string(),
491                f.args,
492            )))),
493
494            // GROUP_CONCAT -> groupArray + arrayStringConcat
495            "GROUP_CONCAT" if !f.args.is_empty() => {
496                let mut args = f.args;
497                let first = args.remove(0);
498                let separator = args.pop();
499                let group_array = Expression::Function(Box::new(Function::new(
500                    "groupArray".to_string(),
501                    vec![first],
502                )));
503                if let Some(sep) = separator {
504                    Ok(Expression::Function(Box::new(Function::new(
505                        "arrayStringConcat".to_string(),
506                        vec![group_array, sep],
507                    ))))
508                } else {
509                    Ok(Expression::Function(Box::new(Function::new(
510                        "arrayStringConcat".to_string(),
511                        vec![group_array],
512                    ))))
513                }
514            }
515
516            // STRING_AGG -> groupArray + arrayStringConcat
517            "STRING_AGG" if !f.args.is_empty() => {
518                let mut args = f.args;
519                let first = args.remove(0);
520                let separator = args.pop();
521                let group_array = Expression::Function(Box::new(Function::new(
522                    "groupArray".to_string(),
523                    vec![first],
524                )));
525                if let Some(sep) = separator {
526                    Ok(Expression::Function(Box::new(Function::new(
527                        "arrayStringConcat".to_string(),
528                        vec![group_array, sep],
529                    ))))
530                } else {
531                    Ok(Expression::Function(Box::new(Function::new(
532                        "arrayStringConcat".to_string(),
533                        vec![group_array],
534                    ))))
535                }
536            }
537
538            // LISTAGG -> groupArray + arrayStringConcat
539            "LISTAGG" if !f.args.is_empty() => {
540                let mut args = f.args;
541                let first = args.remove(0);
542                let separator = args.pop();
543                let group_array = Expression::Function(Box::new(Function::new(
544                    "groupArray".to_string(),
545                    vec![first],
546                )));
547                if let Some(sep) = separator {
548                    Ok(Expression::Function(Box::new(Function::new(
549                        "arrayStringConcat".to_string(),
550                        vec![group_array, sep],
551                    ))))
552                } else {
553                    Ok(Expression::Function(Box::new(Function::new(
554                        "arrayStringConcat".to_string(),
555                        vec![group_array],
556                    ))))
557                }
558            }
559
560            // ARRAY_AGG -> groupArray
561            "ARRAY_AGG" if !f.args.is_empty() => Ok(Expression::Function(Box::new(Function::new(
562                "groupArray".to_string(),
563                f.args,
564            )))),
565
566            // STDDEV -> stddevSamp in ClickHouse (sample stddev)
567            "STDDEV" if !f.args.is_empty() => {
568                Ok(Expression::AggregateFunction(Box::new(AggregateFunction {
569                    name: "stddevSamp".to_string(),
570                    args: f.args,
571                    distinct: f.distinct,
572                    filter: f.filter,
573                    order_by: Vec::new(),
574                    limit: None,
575                    ignore_nulls: None,
576                    inferred_type: None,
577                })))
578            }
579
580            // STDDEV_POP -> stddevPop
581            "STDDEV_POP" if !f.args.is_empty() => {
582                Ok(Expression::AggregateFunction(Box::new(AggregateFunction {
583                    name: "stddevPop".to_string(),
584                    args: f.args,
585                    distinct: f.distinct,
586                    filter: f.filter,
587                    order_by: Vec::new(),
588                    limit: None,
589                    ignore_nulls: None,
590                    inferred_type: None,
591                })))
592            }
593
594            // VARIANCE -> varSamp in ClickHouse
595            "VARIANCE" if !f.args.is_empty() => {
596                Ok(Expression::AggregateFunction(Box::new(AggregateFunction {
597                    name: "varSamp".to_string(),
598                    args: f.args,
599                    distinct: f.distinct,
600                    filter: f.filter,
601                    order_by: Vec::new(),
602                    limit: None,
603                    ignore_nulls: None,
604                    inferred_type: None,
605                })))
606            }
607
608            // VAR_POP -> varPop
609            "VAR_POP" if !f.args.is_empty() => {
610                Ok(Expression::AggregateFunction(Box::new(AggregateFunction {
611                    name: "varPop".to_string(),
612                    args: f.args,
613                    distinct: f.distinct,
614                    filter: f.filter,
615                    order_by: Vec::new(),
616                    limit: None,
617                    ignore_nulls: None,
618                    inferred_type: None,
619                })))
620            }
621
622            // MEDIAN -> median
623            "MEDIAN" if !f.args.is_empty() => {
624                Ok(Expression::AggregateFunction(Box::new(AggregateFunction {
625                    name: "median".to_string(),
626                    args: f.args,
627                    distinct: f.distinct,
628                    filter: f.filter,
629                    order_by: Vec::new(),
630                    limit: None,
631                    ignore_nulls: None,
632                    inferred_type: None,
633                })))
634            }
635
636            // APPROX_COUNT_DISTINCT -> uniq in ClickHouse
637            "APPROX_COUNT_DISTINCT" if !f.args.is_empty() => Ok(Expression::Function(Box::new(
638                Function::new("uniq".to_string(), f.args),
639            ))),
640
641            // APPROX_DISTINCT -> uniq
642            "APPROX_DISTINCT" if !f.args.is_empty() => Ok(Expression::Function(Box::new(
643                Function::new("uniq".to_string(), f.args),
644            ))),
645
646            _ => Ok(Expression::AggregateFunction(f)),
647        }
648    }
649
650    fn transform_cast(&self, c: Cast) -> Result<Expression> {
651        Ok(Expression::Cast(Box::new(c)))
652    }
653}