Skip to main content

polyglot_sql/dialects/
clickhouse.rs

1//! ClickHouse Dialect
2//!
3//! ClickHouse-specific transformations based on sqlglot patterns.
4//! ClickHouse is case-sensitive and has unique function naming conventions.
5
6use super::{DialectImpl, DialectType};
7use crate::error::Result;
8use crate::expressions::{AggregateFunction, BinaryOp, Case, Cast, Expression, Function, In, IsNull, LikeOp, MapConstructor, Paren, UnaryOp};
9use crate::generator::GeneratorConfig;
10use crate::tokens::TokenizerConfig;
11
12/// ClickHouse dialect
13pub struct ClickHouseDialect;
14
15impl DialectImpl for ClickHouseDialect {
16    fn dialect_type(&self) -> DialectType {
17        DialectType::ClickHouse
18    }
19
20    fn tokenizer_config(&self) -> TokenizerConfig {
21        let mut config = TokenizerConfig::default();
22        // ClickHouse uses double quotes and backticks for identifiers
23        config.identifiers.insert('"', '"');
24        config.identifiers.insert('`', '`');
25        // ClickHouse does NOT support nested comments
26        config.nested_comments = false;
27        // ClickHouse allows identifiers to start with digits
28        config.identifiers_can_start_with_digit = true;
29        // ClickHouse uses backslash escaping in strings
30        config.string_escapes.push('\\');
31        config
32    }
33
34    fn generator_config(&self) -> GeneratorConfig {
35        use crate::generator::{IdentifierQuoteStyle, NormalizeFunctions};
36        GeneratorConfig {
37            identifier_quote: '"',
38            identifier_quote_style: IdentifierQuoteStyle::DOUBLE_QUOTE,
39            dialect: Some(DialectType::ClickHouse),
40            // ClickHouse uses uppercase keywords (matching Python SQLGlot behavior)
41            uppercase_keywords: true,
42            // ClickHouse function names are case-sensitive and typically camelCase
43            normalize_functions: NormalizeFunctions::None,
44            // ClickHouse identifiers are case-sensitive
45            case_sensitive_identifiers: true,
46            tablesample_keywords: "SAMPLE",
47            tablesample_requires_parens: false,
48            identifiers_can_start_with_digit: true,
49            // ClickHouse uses bracket-only notation for arrays: [1, 2, 3]
50            array_bracket_only: true,
51            ..Default::default()
52        }
53    }
54
55    fn transform_expr(&self, expr: Expression) -> Result<Expression> {
56        let wrap_predicate_left = |expr: Expression| -> Expression {
57            let needs_parens = matches!(
58                expr,
59                Expression::Add(_)
60                    | Expression::Sub(_)
61                    | Expression::Mul(_)
62                    | Expression::Div(_)
63                    | Expression::Mod(_)
64                    | Expression::Concat(_)
65                    | Expression::And(_)
66                    | Expression::Or(_)
67                    | Expression::Not(_)
68                    | Expression::Case(_)
69            );
70
71            if needs_parens {
72                Expression::Paren(Box::new(Paren {
73                    this: expr,
74                    trailing_comments: Vec::new(),
75                }))
76            } else {
77                expr
78            }
79        };
80
81        let wrap_not_target = |expr: Expression| -> Expression {
82            match expr {
83                Expression::Paren(_) => expr,
84                Expression::In(_)
85                | Expression::Between(_)
86                | Expression::Is(_)
87                | Expression::IsNull(_)
88                | Expression::IsTrue(_)
89                | Expression::IsFalse(_)
90                | Expression::IsJson(_)
91                | Expression::Like(_)
92                | Expression::ILike(_)
93                | Expression::SimilarTo(_)
94                | Expression::Glob(_)
95                | Expression::RegexpLike(_)
96                | Expression::RegexpILike(_)
97                | Expression::MemberOf(_) => Expression::Paren(Box::new(Paren {
98                    this: expr,
99                    trailing_comments: Vec::new(),
100                })),
101                _ => expr,
102            }
103        };
104
105        let unwrap_in_array = |mut expressions: Vec<Expression>,
106                               query: &Option<Expression>,
107                               unnest: &Option<Box<Expression>>| -> Vec<Expression> {
108            if query.is_none() && unnest.is_none() && expressions.len() == 1 {
109                if matches!(expressions[0], Expression::ArrayFunc(_)) {
110                    if let Expression::ArrayFunc(arr) = expressions.remove(0) {
111                        return arr.expressions;
112                    }
113                }
114            }
115            expressions
116        };
117
118        match expr {
119            // TryCast stays as TryCast (ClickHouse doesn't have TRY_CAST by default)
120            // But we can emulate with toXOrNull functions
121            Expression::TryCast(c) => {
122                // For simplicity, just use regular CAST
123                // ClickHouse has toXOrNull/toXOrZero functions for safe casts
124                Ok(Expression::Cast(c))
125            }
126
127            // SafeCast -> CAST in ClickHouse
128            Expression::SafeCast(c) => Ok(Expression::Cast(c)),
129
130            // CountIf is native in ClickHouse (lowercase)
131            Expression::CountIf(f) => Ok(Expression::Function(Box::new(Function::new(
132                "countIf".to_string(),
133                vec![f.this],
134            )))),
135
136            // UNNEST -> arrayJoin in ClickHouse
137            Expression::Unnest(f) => Ok(Expression::Function(Box::new(Function::new(
138                "arrayJoin".to_string(),
139                vec![f.this],
140            )))),
141
142            // EXPLODE -> arrayJoin in ClickHouse
143            Expression::Explode(f) => Ok(Expression::Function(Box::new(Function::new(
144                "arrayJoin".to_string(),
145                vec![f.this],
146            )))),
147
148            // ExplodeOuter -> arrayJoin in ClickHouse
149            Expression::ExplodeOuter(f) => Ok(Expression::Function(Box::new(Function::new(
150                "arrayJoin".to_string(),
151                vec![f.this],
152            )))),
153
154            // RAND -> randCanonical() in ClickHouse
155            Expression::Rand(_) => Ok(Expression::Function(Box::new(Function::new(
156                "randCanonical".to_string(),
157                vec![],
158            )))),
159
160            // Random -> randCanonical() in ClickHouse
161            Expression::Random(_) => Ok(Expression::Function(Box::new(Function::new(
162                "randCanonical".to_string(),
163                vec![],
164            )))),
165
166            // startsWith -> startsWith
167            Expression::StartsWith(f) => Ok(Expression::Function(Box::new(Function::new(
168                "startsWith".to_string(),
169                vec![f.this, f.expression],
170            )))),
171
172            // endsWith -> endsWith
173            Expression::EndsWith(f) => Ok(Expression::Function(Box::new(Function::new(
174                "endsWith".to_string(),
175                vec![f.this, f.expression],
176            )))),
177
178            // ClickHouse prefers NOT (x IN (...)) over x NOT IN (...)
179            Expression::In(in_expr) if in_expr.not => {
180                if in_expr.global {
181                    return Ok(Expression::In(in_expr));
182                }
183                let In { this, expressions, query, unnest, global, .. } = *in_expr;
184                let expressions = unwrap_in_array(expressions, &query, &unnest);
185                let base = Expression::In(Box::new(In {
186                    this: wrap_predicate_left(this),
187                    expressions,
188                    query,
189                    not: false,
190                    global,
191                    unnest,
192                }));
193                Ok(Expression::Not(Box::new(UnaryOp {
194                    this: wrap_not_target(base),
195                })))
196            }
197
198            // ClickHouse prefers NOT (x IS NULL) over x IS NOT NULL
199            Expression::IsNull(is_null) if is_null.not => {
200                let IsNull { this, .. } = *is_null;
201                let base = Expression::IsNull(Box::new(IsNull {
202                    this: wrap_predicate_left(this),
203                    not: false,
204                    postfix_form: false,
205                }));
206                Ok(Expression::Not(Box::new(UnaryOp {
207                    this: wrap_not_target(base),
208                })))
209            }
210
211            Expression::In(mut in_expr) => {
212                in_expr.expressions = unwrap_in_array(in_expr.expressions, &in_expr.query, &in_expr.unnest);
213                in_expr.this = wrap_predicate_left(in_expr.this);
214                Ok(Expression::In(in_expr))
215            }
216
217            Expression::IsNull(mut is_null) => {
218                is_null.this = wrap_predicate_left(is_null.this);
219                Ok(Expression::IsNull(is_null))
220            }
221
222            // IF(cond, true, false) -> CASE WHEN cond THEN true ELSE false END
223            Expression::IfFunc(f) => {
224                let f = *f;
225                Ok(Expression::Case(Box::new(Case {
226                    operand: None,
227                    whens: vec![(f.condition, f.true_value)],
228                    else_: f.false_value,
229                })))
230            }
231
232            Expression::Is(mut is_expr) => {
233                is_expr.left = wrap_predicate_left(is_expr.left);
234                Ok(Expression::Is(is_expr))
235            }
236
237            Expression::Or(op) => {
238                let BinaryOp {
239                    left,
240                    right,
241                    left_comments,
242                    operator_comments,
243                    trailing_comments,
244                } = *op;
245                let left = if matches!(left, Expression::And(_)) {
246                    Expression::Paren(Box::new(Paren {
247                        this: left,
248                        trailing_comments: Vec::new(),
249                    }))
250                } else {
251                    left
252                };
253                let right = if matches!(right, Expression::And(_)) {
254                    Expression::Paren(Box::new(Paren {
255                        this: right,
256                        trailing_comments: Vec::new(),
257                    }))
258                } else {
259                    right
260                };
261                Ok(Expression::Or(Box::new(BinaryOp {
262                    left,
263                    right,
264                    left_comments,
265                    operator_comments,
266                    trailing_comments,
267                })))
268            }
269
270            Expression::Not(op) => {
271                let inner = wrap_not_target(op.this);
272                Ok(Expression::Not(Box::new(UnaryOp { this: inner })))
273            }
274
275            Expression::MapFunc(map) if map.curly_brace_syntax => {
276                let MapConstructor { keys, values, .. } = *map;
277                let mut args = Vec::with_capacity(keys.len() * 2);
278                for (key, value) in keys.into_iter().zip(values.into_iter()) {
279                    args.push(key);
280                    args.push(value);
281                }
282                Ok(Expression::Function(Box::new(Function::new(
283                    "map".to_string(),
284                    args,
285                ))))
286            }
287
288            Expression::Insert(mut insert) => {
289                for row in insert.values.iter_mut() {
290                    for value in row.iter_mut() {
291                        if !matches!(value, Expression::Paren(_)) {
292                            let wrapped = Expression::Paren(Box::new(Paren {
293                                this: value.clone(),
294                                trailing_comments: Vec::new(),
295                            }));
296                            *value = wrapped;
297                        }
298                    }
299                }
300                Ok(Expression::Insert(insert))
301            }
302
303            // Generic function transformations
304            Expression::Function(f) => self.transform_function(*f),
305
306            // Generic aggregate function transformations
307            Expression::AggregateFunction(f) => self.transform_aggregate_function(f),
308
309            // Cast transformations
310            Expression::Cast(c) => self.transform_cast(*c),
311
312            // TYPEOF -> toTypeName in ClickHouse
313            Expression::Typeof(f) => Ok(Expression::Function(Box::new(Function::new(
314                "toTypeName".to_string(),
315                vec![f.this],
316            )))),
317
318            // Pass through everything else
319            _ => Ok(expr),
320        }
321    }
322}
323
324impl ClickHouseDialect {
325    fn transform_function(&self, f: Function) -> Result<Expression> {
326        let name_upper = f.name.to_uppercase();
327        match name_upper.as_str() {
328            "CURRENTDATABASE" | "CURRENT_DATABASE" => Ok(Expression::Function(Box::new(Function::new(
329                "CURRENT_DATABASE".to_string(),
330                f.args,
331            )))),
332            "CURRENTSCHEMAS" | "CURRENT_SCHEMAS" => Ok(Expression::Function(Box::new(Function::new(
333                "CURRENT_SCHEMAS".to_string(),
334                f.args,
335            )))),
336            "LEVENSHTEIN" | "LEVENSHTEINDISTANCE" | "EDITDISTANCE" => Ok(Expression::Function(Box::new(
337                Function::new("editDistance".to_string(), f.args),
338            ))),
339            "CHAR" | "CHR" => Ok(Expression::Function(Box::new(Function::new(
340                "CHAR".to_string(),
341                f.args,
342            )))),
343            "STR_TO_DATE" => Ok(Expression::Function(Box::new(Function::new(
344                "STR_TO_DATE".to_string(),
345                f.args,
346            )))),
347            "JSONEXTRACTSTRING" => Ok(Expression::Function(Box::new(Function::new(
348                "JSONExtractString".to_string(),
349                f.args,
350            )))),
351            "MATCH" => Ok(Expression::Function(Box::new(Function::new(
352                "match".to_string(),
353                f.args,
354            )))),
355            "LIKE" if f.args.len() == 2 => {
356                let left = f.args[0].clone();
357                let right = f.args[1].clone();
358                Ok(Expression::Like(Box::new(LikeOp::new(left, right))))
359            }
360            "NOTLIKE" if f.args.len() == 2 => {
361                let left = f.args[0].clone();
362                let right = f.args[1].clone();
363                let like = Expression::Like(Box::new(LikeOp::new(left, right)));
364                Ok(Expression::Not(Box::new(UnaryOp { this: like })))
365            }
366            "ILIKE" if f.args.len() == 2 => {
367                let left = f.args[0].clone();
368                let right = f.args[1].clone();
369                Ok(Expression::ILike(Box::new(LikeOp::new(left, right))))
370            }
371            "AND" if f.args.len() >= 2 => {
372                let mut iter = f.args.into_iter();
373                let mut expr = iter.next().unwrap();
374                for arg in iter {
375                    expr = Expression::And(Box::new(BinaryOp::new(expr, arg)));
376                }
377                Ok(expr)
378            }
379            "OR" if f.args.len() >= 2 => {
380                let mut iter = f.args.into_iter();
381                let mut expr = iter.next().unwrap();
382                for arg in iter {
383                    expr = Expression::Or(Box::new(BinaryOp::new(expr, arg)));
384                }
385                self.transform_expr(expr)
386            }
387            // TYPEOF -> toTypeName in ClickHouse
388            "TYPEOF" => Ok(Expression::Function(Box::new(Function::new(
389                "toTypeName".to_string(),
390                f.args,
391            )))),
392
393            // DATE_TRUNC: ClickHouse uses dateTrunc (camelCase)
394            "DATE_TRUNC" | "DATETRUNC" => Ok(Expression::Function(Box::new(Function::new(
395                "dateTrunc".to_string(),
396                f.args,
397            )))),
398            "TOSTARTOFDAY" if f.args.len() == 1 => Ok(Expression::Function(Box::new(
399                Function::new("dateTrunc".to_string(), vec![Expression::string("DAY"), f.args[0].clone()]),
400            ))),
401
402            // SUBSTRING_INDEX: preserve original case (substringIndex in ClickHouse)
403            "SUBSTRING_INDEX" => Ok(Expression::Function(Box::new(Function::new(
404                f.name.clone(),
405                f.args,
406            )))),
407
408            // IS_NAN / ISNAN -> isNaN (ClickHouse camelCase)
409            "IS_NAN" | "ISNAN" => Ok(Expression::Function(Box::new(Function::new(
410                "isNaN".to_string(),
411                f.args,
412            )))),
413
414            _ => Ok(Expression::Function(Box::new(f))),
415        }
416    }
417
418    fn transform_aggregate_function(
419        &self,
420        f: Box<crate::expressions::AggregateFunction>,
421    ) -> Result<Expression> {
422        let name_upper = f.name.to_uppercase();
423        match name_upper.as_str() {
424            // COUNT_IF -> countIf
425            "COUNT_IF" if !f.args.is_empty() => Ok(Expression::Function(Box::new(Function::new(
426                "countIf".to_string(),
427                f.args,
428            )))),
429
430            // SUM_IF -> sumIf
431            "SUM_IF" if !f.args.is_empty() => Ok(Expression::Function(Box::new(Function::new(
432                "sumIf".to_string(),
433                f.args,
434            )))),
435
436            // AVG_IF -> avgIf
437            "AVG_IF" if !f.args.is_empty() => Ok(Expression::Function(Box::new(Function::new(
438                "avgIf".to_string(),
439                f.args,
440            )))),
441
442            // ANY_VALUE -> any in ClickHouse
443            "ANY_VALUE" if !f.args.is_empty() => Ok(Expression::Function(Box::new(
444                Function::new("any".to_string(), f.args),
445            ))),
446
447            // GROUP_CONCAT -> groupArray + arrayStringConcat
448            "GROUP_CONCAT" if !f.args.is_empty() => {
449                let mut args = f.args;
450                let first = args.remove(0);
451                let separator = args.pop();
452                let group_array = Expression::Function(Box::new(Function::new(
453                    "groupArray".to_string(),
454                    vec![first],
455                )));
456                if let Some(sep) = separator {
457                    Ok(Expression::Function(Box::new(Function::new(
458                        "arrayStringConcat".to_string(),
459                        vec![group_array, sep],
460                    ))))
461                } else {
462                    Ok(Expression::Function(Box::new(Function::new(
463                        "arrayStringConcat".to_string(),
464                        vec![group_array],
465                    ))))
466                }
467            }
468
469            // STRING_AGG -> groupArray + arrayStringConcat
470            "STRING_AGG" if !f.args.is_empty() => {
471                let mut args = f.args;
472                let first = args.remove(0);
473                let separator = args.pop();
474                let group_array = Expression::Function(Box::new(Function::new(
475                    "groupArray".to_string(),
476                    vec![first],
477                )));
478                if let Some(sep) = separator {
479                    Ok(Expression::Function(Box::new(Function::new(
480                        "arrayStringConcat".to_string(),
481                        vec![group_array, sep],
482                    ))))
483                } else {
484                    Ok(Expression::Function(Box::new(Function::new(
485                        "arrayStringConcat".to_string(),
486                        vec![group_array],
487                    ))))
488                }
489            }
490
491            // LISTAGG -> groupArray + arrayStringConcat
492            "LISTAGG" if !f.args.is_empty() => {
493                let mut args = f.args;
494                let first = args.remove(0);
495                let separator = args.pop();
496                let group_array = Expression::Function(Box::new(Function::new(
497                    "groupArray".to_string(),
498                    vec![first],
499                )));
500                if let Some(sep) = separator {
501                    Ok(Expression::Function(Box::new(Function::new(
502                        "arrayStringConcat".to_string(),
503                        vec![group_array, sep],
504                    ))))
505                } else {
506                    Ok(Expression::Function(Box::new(Function::new(
507                        "arrayStringConcat".to_string(),
508                        vec![group_array],
509                    ))))
510                }
511            }
512
513            // ARRAY_AGG -> groupArray
514            "ARRAY_AGG" if !f.args.is_empty() => Ok(Expression::Function(Box::new(
515                Function::new("groupArray".to_string(), f.args),
516            ))),
517
518            // STDDEV -> stddevSamp in ClickHouse (sample stddev)
519            "STDDEV" if !f.args.is_empty() => Ok(Expression::AggregateFunction(Box::new(
520                AggregateFunction {
521                    name: "stddevSamp".to_string(),
522                    args: f.args,
523                    distinct: f.distinct,
524                    filter: f.filter,
525                    order_by: Vec::new(),
526                    limit: None,
527                    ignore_nulls: None,
528                },
529            ))),
530
531            // STDDEV_POP -> stddevPop
532            "STDDEV_POP" if !f.args.is_empty() => Ok(Expression::AggregateFunction(Box::new(
533                AggregateFunction {
534                    name: "stddevPop".to_string(),
535                    args: f.args,
536                    distinct: f.distinct,
537                    filter: f.filter,
538                    order_by: Vec::new(),
539                    limit: None,
540                    ignore_nulls: None,
541                },
542            ))),
543
544            // VARIANCE -> varSamp in ClickHouse
545            "VARIANCE" if !f.args.is_empty() => Ok(Expression::AggregateFunction(Box::new(
546                AggregateFunction {
547                    name: "varSamp".to_string(),
548                    args: f.args,
549                    distinct: f.distinct,
550                    filter: f.filter,
551                    order_by: Vec::new(),
552                    limit: None,
553                    ignore_nulls: None,
554                },
555            ))),
556
557            // VAR_POP -> varPop
558            "VAR_POP" if !f.args.is_empty() => Ok(Expression::AggregateFunction(Box::new(
559                AggregateFunction {
560                    name: "varPop".to_string(),
561                    args: f.args,
562                    distinct: f.distinct,
563                    filter: f.filter,
564                    order_by: Vec::new(),
565                    limit: None,
566                    ignore_nulls: None,
567                },
568            ))),
569
570            // MEDIAN -> median
571            "MEDIAN" if !f.args.is_empty() => Ok(Expression::AggregateFunction(Box::new(
572                AggregateFunction {
573                    name: "median".to_string(),
574                    args: f.args,
575                    distinct: f.distinct,
576                    filter: f.filter,
577                    order_by: Vec::new(),
578                    limit: None,
579                    ignore_nulls: None,
580                },
581            ))),
582
583            // APPROX_COUNT_DISTINCT -> uniq in ClickHouse
584            "APPROX_COUNT_DISTINCT" if !f.args.is_empty() => Ok(Expression::Function(Box::new(
585                Function::new("uniq".to_string(), f.args),
586            ))),
587
588            // APPROX_DISTINCT -> uniq
589            "APPROX_DISTINCT" if !f.args.is_empty() => Ok(Expression::Function(Box::new(
590                Function::new("uniq".to_string(), f.args),
591            ))),
592
593            _ => Ok(Expression::AggregateFunction(f)),
594        }
595    }
596
597    fn transform_cast(&self, c: Cast) -> Result<Expression> {
598        // ClickHouse type mappings are handled in the generator
599        Ok(Expression::Cast(Box::new(c)))
600    }
601}