Skip to main content

polyglot_sql/dialects/
clickhouse.rs

1//! ClickHouse Dialect
2//!
3//! ClickHouse-specific transformations based on sqlglot patterns.
4//! ClickHouse is case-sensitive and has unique function naming conventions.
5
6use super::{DialectImpl, DialectType};
7use crate::error::Result;
8use crate::expressions::{
9    AggregateFunction, BinaryOp, Case, Cast, Expression, Function, In, IsNull, LikeOp,
10    MapConstructor, Paren, UnaryOp,
11};
12use crate::generator::GeneratorConfig;
13use crate::tokens::TokenizerConfig;
14
15/// ClickHouse dialect
16pub struct ClickHouseDialect;
17
18impl DialectImpl for ClickHouseDialect {
19    fn dialect_type(&self) -> DialectType {
20        DialectType::ClickHouse
21    }
22
23    fn tokenizer_config(&self) -> TokenizerConfig {
24        let mut config = TokenizerConfig::default();
25        // ClickHouse uses double quotes and backticks for identifiers
26        config.identifiers.insert('"', '"');
27        config.identifiers.insert('`', '`');
28        // ClickHouse supports nested comments
29        config.nested_comments = true;
30        // ClickHouse allows identifiers to start with digits
31        config.identifiers_can_start_with_digit = true;
32        // ClickHouse uses backslash escaping in strings
33        config.string_escapes.push('\\');
34        // ClickHouse supports # as single-line comment
35        config.hash_comments = true;
36        // ClickHouse allows $ in identifiers
37        config.dollar_sign_is_identifier = true;
38        // ClickHouse: INSERT ... FORMAT <name> is followed by raw data
39        config.insert_format_raw_data = true;
40        // ClickHouse supports 0xDEADBEEF hex integer literals
41        config.hex_number_strings = true;
42        config.hex_string_is_integer_type = true;
43        // ClickHouse allows underscores as digit separators in numeric literals
44        config.numbers_can_be_underscore_separated = true;
45        config
46    }
47
48    fn generator_config(&self) -> GeneratorConfig {
49        use crate::generator::{IdentifierQuoteStyle, NormalizeFunctions};
50        GeneratorConfig {
51            identifier_quote: '"',
52            identifier_quote_style: IdentifierQuoteStyle::DOUBLE_QUOTE,
53            dialect: Some(DialectType::ClickHouse),
54            // ClickHouse uses uppercase keywords (matching Python SQLGlot behavior)
55            uppercase_keywords: true,
56            // ClickHouse function names are case-sensitive and typically camelCase
57            normalize_functions: NormalizeFunctions::None,
58            // ClickHouse identifiers are case-sensitive
59            case_sensitive_identifiers: true,
60            tablesample_keywords: "SAMPLE",
61            tablesample_requires_parens: false,
62            identifiers_can_start_with_digit: true,
63            // ClickHouse uses bracket-only notation for arrays: [1, 2, 3]
64            array_bracket_only: true,
65            ..Default::default()
66        }
67    }
68
69    fn transform_expr(&self, expr: Expression) -> Result<Expression> {
70        let wrap_predicate_left = |expr: Expression| -> Expression {
71            let needs_parens = matches!(
72                expr,
73                Expression::Add(_)
74                    | Expression::Sub(_)
75                    | Expression::Mul(_)
76                    | Expression::Div(_)
77                    | Expression::Mod(_)
78                    | Expression::Concat(_)
79                    | Expression::And(_)
80                    | Expression::Or(_)
81                    | Expression::Not(_)
82                    | Expression::Case(_)
83            );
84
85            if needs_parens {
86                Expression::Paren(Box::new(Paren {
87                    this: expr,
88                    trailing_comments: Vec::new(),
89                }))
90            } else {
91                expr
92            }
93        };
94
95        let wrap_not_target = |expr: Expression| -> Expression {
96            match expr {
97                Expression::Paren(_) => expr,
98                Expression::In(_)
99                | Expression::Between(_)
100                | Expression::Is(_)
101                | Expression::IsNull(_)
102                | Expression::IsTrue(_)
103                | Expression::IsFalse(_)
104                | Expression::IsJson(_)
105                | Expression::Like(_)
106                | Expression::ILike(_)
107                | Expression::SimilarTo(_)
108                | Expression::Glob(_)
109                | Expression::RegexpLike(_)
110                | Expression::RegexpILike(_)
111                | Expression::MemberOf(_) => Expression::Paren(Box::new(Paren {
112                    this: expr,
113                    trailing_comments: Vec::new(),
114                })),
115                _ => expr,
116            }
117        };
118
119        let unwrap_in_array = |mut expressions: Vec<Expression>,
120                               query: &Option<Expression>,
121                               unnest: &Option<Box<Expression>>|
122         -> Vec<Expression> {
123            if query.is_none() && unnest.is_none() && expressions.len() == 1 {
124                if matches!(expressions[0], Expression::ArrayFunc(_)) {
125                    if let Expression::ArrayFunc(arr) = expressions.remove(0) {
126                        return arr.expressions;
127                    }
128                }
129            }
130            expressions
131        };
132
133        match expr {
134            // TryCast stays as TryCast (ClickHouse doesn't have TRY_CAST by default)
135            // But we can emulate with toXOrNull functions
136            Expression::TryCast(c) => {
137                // For simplicity, just use regular CAST
138                // ClickHouse has toXOrNull/toXOrZero functions for safe casts
139                Ok(Expression::Cast(c))
140            }
141
142            // SafeCast -> CAST in ClickHouse
143            Expression::SafeCast(c) => Ok(Expression::Cast(c)),
144
145            // CountIf is native in ClickHouse (lowercase)
146            Expression::CountIf(f) => Ok(Expression::Function(Box::new(Function::new(
147                "countIf".to_string(),
148                vec![f.this],
149            )))),
150
151            // UNNEST -> arrayJoin in ClickHouse
152            Expression::Unnest(f) => Ok(Expression::Function(Box::new(Function::new(
153                "arrayJoin".to_string(),
154                vec![f.this],
155            )))),
156
157            // EXPLODE -> arrayJoin in ClickHouse
158            Expression::Explode(f) => Ok(Expression::Function(Box::new(Function::new(
159                "arrayJoin".to_string(),
160                vec![f.this],
161            )))),
162
163            // ExplodeOuter -> arrayJoin in ClickHouse
164            Expression::ExplodeOuter(f) => Ok(Expression::Function(Box::new(Function::new(
165                "arrayJoin".to_string(),
166                vec![f.this],
167            )))),
168
169            // RAND -> randCanonical() in ClickHouse
170            Expression::Rand(_) => Ok(Expression::Function(Box::new(Function::new(
171                "randCanonical".to_string(),
172                vec![],
173            )))),
174
175            // Random -> randCanonical() in ClickHouse
176            Expression::Random(_) => Ok(Expression::Function(Box::new(Function::new(
177                "randCanonical".to_string(),
178                vec![],
179            )))),
180
181            // startsWith -> startsWith
182            Expression::StartsWith(f) => Ok(Expression::Function(Box::new(Function::new(
183                "startsWith".to_string(),
184                vec![f.this, f.expression],
185            )))),
186
187            // endsWith -> endsWith
188            Expression::EndsWith(f) => Ok(Expression::Function(Box::new(Function::new(
189                "endsWith".to_string(),
190                vec![f.this, f.expression],
191            )))),
192
193            // ClickHouse prefers NOT (x IN (...)) over x NOT IN (...)
194            Expression::In(in_expr) if in_expr.not => {
195                if in_expr.global {
196                    return Ok(Expression::In(in_expr));
197                }
198                let In {
199                    this,
200                    expressions,
201                    query,
202                    unnest,
203                    global,
204                    is_field,
205                    ..
206                } = *in_expr;
207                let expressions = unwrap_in_array(expressions, &query, &unnest);
208                let base = Expression::In(Box::new(In {
209                    this: wrap_predicate_left(this),
210                    expressions,
211                    query,
212                    not: false,
213                    global,
214                    unnest,
215                    is_field,
216                }));
217                Ok(Expression::Not(Box::new(UnaryOp {
218                    this: wrap_not_target(base),
219                    inferred_type: None,
220                })))
221            }
222
223            // ClickHouse prefers NOT (x IS NULL) over x IS NOT NULL
224            Expression::IsNull(is_null) if is_null.not => {
225                let IsNull { this, .. } = *is_null;
226                let base = Expression::IsNull(Box::new(IsNull {
227                    this: wrap_predicate_left(this),
228                    not: false,
229                    postfix_form: false,
230                }));
231                Ok(Expression::Not(Box::new(UnaryOp {
232                    this: wrap_not_target(base),
233                    inferred_type: None,
234                })))
235            }
236
237            Expression::In(mut in_expr) => {
238                in_expr.expressions =
239                    unwrap_in_array(in_expr.expressions, &in_expr.query, &in_expr.unnest);
240                in_expr.this = wrap_predicate_left(in_expr.this);
241                Ok(Expression::In(in_expr))
242            }
243
244            Expression::IsNull(mut is_null) => {
245                is_null.this = wrap_predicate_left(is_null.this);
246                Ok(Expression::IsNull(is_null))
247            }
248
249            // IF(cond, true, false) -> CASE WHEN cond THEN true ELSE false END
250            Expression::IfFunc(f) => {
251                let f = *f;
252                Ok(Expression::Case(Box::new(Case {
253                    operand: None,
254                    whens: vec![(f.condition, f.true_value)],
255                    else_: f.false_value,
256                    comments: Vec::new(),
257                    inferred_type: None,
258                })))
259            }
260
261            Expression::Is(mut is_expr) => {
262                is_expr.left = wrap_predicate_left(is_expr.left);
263                Ok(Expression::Is(is_expr))
264            }
265
266            Expression::Or(op) => {
267                let BinaryOp {
268                    left,
269                    right,
270                    left_comments,
271                    operator_comments,
272                    trailing_comments,
273                    ..
274                } = *op;
275                let left = if matches!(left, Expression::And(_)) {
276                    Expression::Paren(Box::new(Paren {
277                        this: left,
278                        trailing_comments: Vec::new(),
279                    }))
280                } else {
281                    left
282                };
283                let right = if matches!(right, Expression::And(_)) {
284                    Expression::Paren(Box::new(Paren {
285                        this: right,
286                        trailing_comments: Vec::new(),
287                    }))
288                } else {
289                    right
290                };
291                Ok(Expression::Or(Box::new(BinaryOp {
292                    left,
293                    right,
294                    left_comments,
295                    operator_comments,
296                    trailing_comments,
297                    inferred_type: None,
298                })))
299            }
300
301            Expression::Not(op) => {
302                let inner = wrap_not_target(op.this);
303                Ok(Expression::Not(Box::new(UnaryOp {
304                    this: inner,
305                    inferred_type: None,
306                })))
307            }
308
309            Expression::MapFunc(map) if map.curly_brace_syntax => {
310                let MapConstructor { keys, values, .. } = *map;
311                let mut args = Vec::with_capacity(keys.len() * 2);
312                for (key, value) in keys.into_iter().zip(values.into_iter()) {
313                    args.push(key);
314                    args.push(value);
315                }
316                Ok(Expression::Function(Box::new(Function::new(
317                    "map".to_string(),
318                    args,
319                ))))
320            }
321
322            Expression::Insert(mut insert) => {
323                for row in insert.values.iter_mut() {
324                    for value in row.iter_mut() {
325                        if !matches!(value, Expression::Paren(_)) {
326                            let wrapped = Expression::Paren(Box::new(Paren {
327                                this: value.clone(),
328                                trailing_comments: Vec::new(),
329                            }));
330                            *value = wrapped;
331                        }
332                    }
333                }
334                Ok(Expression::Insert(insert))
335            }
336
337            // Generic function transformations
338            Expression::Function(f) => self.transform_function(*f),
339
340            // Generic aggregate function transformations
341            Expression::AggregateFunction(f) => self.transform_aggregate_function(f),
342
343            // Cast transformations
344            Expression::Cast(c) => self.transform_cast(*c),
345
346            // TYPEOF -> toTypeName in ClickHouse
347            Expression::Typeof(f) => Ok(Expression::Function(Box::new(Function::new(
348                "toTypeName".to_string(),
349                vec![f.this],
350            )))),
351
352            // Pass through everything else
353            _ => Ok(expr),
354        }
355    }
356}
357
358impl ClickHouseDialect {}
359
360impl ClickHouseDialect {
361    fn transform_function(&self, f: Function) -> Result<Expression> {
362        let name_upper = f.name.to_uppercase();
363        match name_upper.as_str() {
364            // UTCTimestamp() -> CURRENT_TIMESTAMP('UTC')
365            "UTCTIMESTAMP" => Ok(Expression::UtcTimestamp(Box::new(
366                crate::expressions::UtcTimestamp { this: None },
367            ))),
368
369            "CURRENTDATABASE" | "CURRENT_DATABASE" => Ok(Expression::Function(Box::new(
370                Function::new("CURRENT_DATABASE".to_string(), f.args),
371            ))),
372            "CURRENTSCHEMAS" | "CURRENT_SCHEMAS" => Ok(Expression::Function(Box::new(
373                Function::new("CURRENT_SCHEMAS".to_string(), f.args),
374            ))),
375            "LEVENSHTEIN" | "LEVENSHTEINDISTANCE" | "EDITDISTANCE" => Ok(Expression::Function(
376                Box::new(Function::new("editDistance".to_string(), f.args)),
377            )),
378            "CHAR" | "CHR" => Ok(Expression::Function(Box::new(Function::new(
379                "CHAR".to_string(),
380                f.args,
381            )))),
382            "STR_TO_DATE" => Ok(Expression::Function(Box::new(Function::new(
383                "STR_TO_DATE".to_string(),
384                f.args,
385            )))),
386            "JSONEXTRACTSTRING" => Ok(Expression::Function(Box::new(Function::new(
387                "JSONExtractString".to_string(),
388                f.args,
389            )))),
390            "MATCH" => Ok(Expression::Function(Box::new(Function::new(
391                "match".to_string(),
392                f.args,
393            )))),
394            "LIKE" if f.args.len() == 2 => {
395                let left = f.args[0].clone();
396                let right = f.args[1].clone();
397                Ok(Expression::Like(Box::new(LikeOp::new(left, right))))
398            }
399            "NOTLIKE" if f.args.len() == 2 => {
400                let left = f.args[0].clone();
401                let right = f.args[1].clone();
402                let like = Expression::Like(Box::new(LikeOp::new(left, right)));
403                Ok(Expression::Not(Box::new(UnaryOp {
404                    this: like,
405                    inferred_type: None,
406                })))
407            }
408            "ILIKE" if f.args.len() == 2 => {
409                let left = f.args[0].clone();
410                let right = f.args[1].clone();
411                Ok(Expression::ILike(Box::new(LikeOp::new(left, right))))
412            }
413            "AND" if f.args.len() >= 2 => {
414                let mut iter = f.args.into_iter();
415                let mut expr = iter.next().unwrap();
416                for arg in iter {
417                    expr = Expression::And(Box::new(BinaryOp::new(expr, arg)));
418                }
419                Ok(expr)
420            }
421            "OR" if f.args.len() >= 2 => {
422                let mut iter = f.args.into_iter();
423                let mut expr = iter.next().unwrap();
424                for arg in iter {
425                    expr = Expression::Or(Box::new(BinaryOp::new(expr, arg)));
426                }
427                self.transform_expr(expr)
428            }
429            // TYPEOF -> toTypeName in ClickHouse
430            "TYPEOF" => Ok(Expression::Function(Box::new(Function::new(
431                "toTypeName".to_string(),
432                f.args,
433            )))),
434
435            // DATE_TRUNC: ClickHouse uses dateTrunc (camelCase)
436            "DATE_TRUNC" | "DATETRUNC" => Ok(Expression::Function(Box::new(Function::new(
437                "dateTrunc".to_string(),
438                f.args,
439            )))),
440            "TOSTARTOFDAY" if f.args.len() == 1 => {
441                Ok(Expression::Function(Box::new(Function::new(
442                    "dateTrunc".to_string(),
443                    vec![Expression::string("DAY"), f.args[0].clone()],
444                ))))
445            }
446
447            // SUBSTRING_INDEX: preserve original case (substringIndex in ClickHouse)
448            "SUBSTRING_INDEX" => Ok(Expression::Function(Box::new(Function::new(
449                f.name.clone(),
450                f.args,
451            )))),
452
453            // IS_NAN / ISNAN -> isNaN (ClickHouse camelCase)
454            "IS_NAN" | "ISNAN" => Ok(Expression::Function(Box::new(Function::new(
455                "isNaN".to_string(),
456                f.args,
457            )))),
458
459            _ => Ok(Expression::Function(Box::new(f))),
460        }
461    }
462
463    fn transform_aggregate_function(
464        &self,
465        f: Box<crate::expressions::AggregateFunction>,
466    ) -> Result<Expression> {
467        let name_upper = f.name.to_uppercase();
468        match name_upper.as_str() {
469            // COUNT_IF -> countIf
470            "COUNT_IF" if !f.args.is_empty() => Ok(Expression::Function(Box::new(Function::new(
471                "countIf".to_string(),
472                f.args,
473            )))),
474
475            // SUM_IF -> sumIf
476            "SUM_IF" if !f.args.is_empty() => Ok(Expression::Function(Box::new(Function::new(
477                "sumIf".to_string(),
478                f.args,
479            )))),
480
481            // AVG_IF -> avgIf
482            "AVG_IF" if !f.args.is_empty() => Ok(Expression::Function(Box::new(Function::new(
483                "avgIf".to_string(),
484                f.args,
485            )))),
486
487            // ANY_VALUE -> any in ClickHouse
488            "ANY_VALUE" if !f.args.is_empty() => Ok(Expression::Function(Box::new(Function::new(
489                "any".to_string(),
490                f.args,
491            )))),
492
493            // GROUP_CONCAT -> groupArray + arrayStringConcat
494            "GROUP_CONCAT" if !f.args.is_empty() => {
495                let mut args = f.args;
496                let first = args.remove(0);
497                let separator = args.pop();
498                let group_array = Expression::Function(Box::new(Function::new(
499                    "groupArray".to_string(),
500                    vec![first],
501                )));
502                if let Some(sep) = separator {
503                    Ok(Expression::Function(Box::new(Function::new(
504                        "arrayStringConcat".to_string(),
505                        vec![group_array, sep],
506                    ))))
507                } else {
508                    Ok(Expression::Function(Box::new(Function::new(
509                        "arrayStringConcat".to_string(),
510                        vec![group_array],
511                    ))))
512                }
513            }
514
515            // STRING_AGG -> groupArray + arrayStringConcat
516            "STRING_AGG" if !f.args.is_empty() => {
517                let mut args = f.args;
518                let first = args.remove(0);
519                let separator = args.pop();
520                let group_array = Expression::Function(Box::new(Function::new(
521                    "groupArray".to_string(),
522                    vec![first],
523                )));
524                if let Some(sep) = separator {
525                    Ok(Expression::Function(Box::new(Function::new(
526                        "arrayStringConcat".to_string(),
527                        vec![group_array, sep],
528                    ))))
529                } else {
530                    Ok(Expression::Function(Box::new(Function::new(
531                        "arrayStringConcat".to_string(),
532                        vec![group_array],
533                    ))))
534                }
535            }
536
537            // LISTAGG -> groupArray + arrayStringConcat
538            "LISTAGG" if !f.args.is_empty() => {
539                let mut args = f.args;
540                let first = args.remove(0);
541                let separator = args.pop();
542                let group_array = Expression::Function(Box::new(Function::new(
543                    "groupArray".to_string(),
544                    vec![first],
545                )));
546                if let Some(sep) = separator {
547                    Ok(Expression::Function(Box::new(Function::new(
548                        "arrayStringConcat".to_string(),
549                        vec![group_array, sep],
550                    ))))
551                } else {
552                    Ok(Expression::Function(Box::new(Function::new(
553                        "arrayStringConcat".to_string(),
554                        vec![group_array],
555                    ))))
556                }
557            }
558
559            // ARRAY_AGG -> groupArray
560            "ARRAY_AGG" if !f.args.is_empty() => Ok(Expression::Function(Box::new(Function::new(
561                "groupArray".to_string(),
562                f.args,
563            )))),
564
565            // STDDEV -> stddevSamp in ClickHouse (sample stddev)
566            "STDDEV" if !f.args.is_empty() => {
567                Ok(Expression::AggregateFunction(Box::new(AggregateFunction {
568                    name: "stddevSamp".to_string(),
569                    args: f.args,
570                    distinct: f.distinct,
571                    filter: f.filter,
572                    order_by: Vec::new(),
573                    limit: None,
574                    ignore_nulls: None,
575                    inferred_type: None,
576                })))
577            }
578
579            // STDDEV_POP -> stddevPop
580            "STDDEV_POP" if !f.args.is_empty() => {
581                Ok(Expression::AggregateFunction(Box::new(AggregateFunction {
582                    name: "stddevPop".to_string(),
583                    args: f.args,
584                    distinct: f.distinct,
585                    filter: f.filter,
586                    order_by: Vec::new(),
587                    limit: None,
588                    ignore_nulls: None,
589                    inferred_type: None,
590                })))
591            }
592
593            // VARIANCE -> varSamp in ClickHouse
594            "VARIANCE" if !f.args.is_empty() => {
595                Ok(Expression::AggregateFunction(Box::new(AggregateFunction {
596                    name: "varSamp".to_string(),
597                    args: f.args,
598                    distinct: f.distinct,
599                    filter: f.filter,
600                    order_by: Vec::new(),
601                    limit: None,
602                    ignore_nulls: None,
603                    inferred_type: None,
604                })))
605            }
606
607            // VAR_POP -> varPop
608            "VAR_POP" if !f.args.is_empty() => {
609                Ok(Expression::AggregateFunction(Box::new(AggregateFunction {
610                    name: "varPop".to_string(),
611                    args: f.args,
612                    distinct: f.distinct,
613                    filter: f.filter,
614                    order_by: Vec::new(),
615                    limit: None,
616                    ignore_nulls: None,
617                    inferred_type: None,
618                })))
619            }
620
621            // MEDIAN -> median
622            "MEDIAN" if !f.args.is_empty() => {
623                Ok(Expression::AggregateFunction(Box::new(AggregateFunction {
624                    name: "median".to_string(),
625                    args: f.args,
626                    distinct: f.distinct,
627                    filter: f.filter,
628                    order_by: Vec::new(),
629                    limit: None,
630                    ignore_nulls: None,
631                    inferred_type: None,
632                })))
633            }
634
635            // APPROX_COUNT_DISTINCT -> uniq in ClickHouse
636            "APPROX_COUNT_DISTINCT" if !f.args.is_empty() => Ok(Expression::Function(Box::new(
637                Function::new("uniq".to_string(), f.args),
638            ))),
639
640            // APPROX_DISTINCT -> uniq
641            "APPROX_DISTINCT" if !f.args.is_empty() => Ok(Expression::Function(Box::new(
642                Function::new("uniq".to_string(), f.args),
643            ))),
644
645            _ => Ok(Expression::AggregateFunction(f)),
646        }
647    }
648
649    fn transform_cast(&self, c: Cast) -> Result<Expression> {
650        Ok(Expression::Cast(Box::new(c)))
651    }
652}