Skip to main content

polyglot_sql/dialects/
clickhouse.rs

1//! ClickHouse Dialect
2//!
3//! ClickHouse-specific transformations based on sqlglot patterns.
4//! ClickHouse is case-sensitive and has unique function naming conventions.
5
6use super::{DialectImpl, DialectType};
7use crate::error::Result;
8use crate::expressions::{
9    AggregateFunction, BinaryOp, Case, Cast, Expression, Function, In, IsNull, LikeOp,
10    MapConstructor, Paren, UnaryOp,
11};
12use crate::generator::GeneratorConfig;
13use crate::tokens::TokenizerConfig;
14
15/// ClickHouse dialect
16pub struct ClickHouseDialect;
17
18impl DialectImpl for ClickHouseDialect {
19    fn dialect_type(&self) -> DialectType {
20        DialectType::ClickHouse
21    }
22
23    fn tokenizer_config(&self) -> TokenizerConfig {
24        let mut config = TokenizerConfig::default();
25        // ClickHouse uses double quotes and backticks for identifiers
26        config.identifiers.insert('"', '"');
27        config.identifiers.insert('`', '`');
28        // ClickHouse supports nested comments
29        config.nested_comments = true;
30        // ClickHouse allows identifiers to start with digits
31        config.identifiers_can_start_with_digit = true;
32        // ClickHouse uses backslash escaping in strings
33        config.string_escapes.push('\\');
34        // ClickHouse supports # as single-line comment
35        config.hash_comments = true;
36        // ClickHouse allows $ in identifiers
37        config.dollar_sign_is_identifier = true;
38        // ClickHouse: INSERT ... FORMAT <name> is followed by raw data
39        config.insert_format_raw_data = true;
40        // ClickHouse supports 0xDEADBEEF hex integer literals
41        config.hex_number_strings = true;
42        config.hex_string_is_integer_type = true;
43        config
44    }
45
46    fn generator_config(&self) -> GeneratorConfig {
47        use crate::generator::{IdentifierQuoteStyle, NormalizeFunctions};
48        GeneratorConfig {
49            identifier_quote: '"',
50            identifier_quote_style: IdentifierQuoteStyle::DOUBLE_QUOTE,
51            dialect: Some(DialectType::ClickHouse),
52            // ClickHouse uses uppercase keywords (matching Python SQLGlot behavior)
53            uppercase_keywords: true,
54            // ClickHouse function names are case-sensitive and typically camelCase
55            normalize_functions: NormalizeFunctions::None,
56            // ClickHouse identifiers are case-sensitive
57            case_sensitive_identifiers: true,
58            tablesample_keywords: "SAMPLE",
59            tablesample_requires_parens: false,
60            identifiers_can_start_with_digit: true,
61            // ClickHouse uses bracket-only notation for arrays: [1, 2, 3]
62            array_bracket_only: true,
63            ..Default::default()
64        }
65    }
66
67    fn transform_expr(&self, expr: Expression) -> Result<Expression> {
68        let wrap_predicate_left = |expr: Expression| -> Expression {
69            let needs_parens = matches!(
70                expr,
71                Expression::Add(_)
72                    | Expression::Sub(_)
73                    | Expression::Mul(_)
74                    | Expression::Div(_)
75                    | Expression::Mod(_)
76                    | Expression::Concat(_)
77                    | Expression::And(_)
78                    | Expression::Or(_)
79                    | Expression::Not(_)
80                    | Expression::Case(_)
81            );
82
83            if needs_parens {
84                Expression::Paren(Box::new(Paren {
85                    this: expr,
86                    trailing_comments: Vec::new(),
87                }))
88            } else {
89                expr
90            }
91        };
92
93        let wrap_not_target = |expr: Expression| -> Expression {
94            match expr {
95                Expression::Paren(_) => expr,
96                Expression::In(_)
97                | Expression::Between(_)
98                | Expression::Is(_)
99                | Expression::IsNull(_)
100                | Expression::IsTrue(_)
101                | Expression::IsFalse(_)
102                | Expression::IsJson(_)
103                | Expression::Like(_)
104                | Expression::ILike(_)
105                | Expression::SimilarTo(_)
106                | Expression::Glob(_)
107                | Expression::RegexpLike(_)
108                | Expression::RegexpILike(_)
109                | Expression::MemberOf(_) => Expression::Paren(Box::new(Paren {
110                    this: expr,
111                    trailing_comments: Vec::new(),
112                })),
113                _ => expr,
114            }
115        };
116
117        let unwrap_in_array = |mut expressions: Vec<Expression>,
118                               query: &Option<Expression>,
119                               unnest: &Option<Box<Expression>>|
120         -> Vec<Expression> {
121            if query.is_none() && unnest.is_none() && expressions.len() == 1 {
122                if matches!(expressions[0], Expression::ArrayFunc(_)) {
123                    if let Expression::ArrayFunc(arr) = expressions.remove(0) {
124                        return arr.expressions;
125                    }
126                }
127            }
128            expressions
129        };
130
131        match expr {
132            // TryCast stays as TryCast (ClickHouse doesn't have TRY_CAST by default)
133            // But we can emulate with toXOrNull functions
134            Expression::TryCast(c) => {
135                // For simplicity, just use regular CAST
136                // ClickHouse has toXOrNull/toXOrZero functions for safe casts
137                Ok(Expression::Cast(c))
138            }
139
140            // SafeCast -> CAST in ClickHouse
141            Expression::SafeCast(c) => Ok(Expression::Cast(c)),
142
143            // CountIf is native in ClickHouse (lowercase)
144            Expression::CountIf(f) => Ok(Expression::Function(Box::new(Function::new(
145                "countIf".to_string(),
146                vec![f.this],
147            )))),
148
149            // UNNEST -> arrayJoin in ClickHouse
150            Expression::Unnest(f) => Ok(Expression::Function(Box::new(Function::new(
151                "arrayJoin".to_string(),
152                vec![f.this],
153            )))),
154
155            // EXPLODE -> arrayJoin in ClickHouse
156            Expression::Explode(f) => Ok(Expression::Function(Box::new(Function::new(
157                "arrayJoin".to_string(),
158                vec![f.this],
159            )))),
160
161            // ExplodeOuter -> arrayJoin in ClickHouse
162            Expression::ExplodeOuter(f) => Ok(Expression::Function(Box::new(Function::new(
163                "arrayJoin".to_string(),
164                vec![f.this],
165            )))),
166
167            // RAND -> randCanonical() in ClickHouse
168            Expression::Rand(_) => Ok(Expression::Function(Box::new(Function::new(
169                "randCanonical".to_string(),
170                vec![],
171            )))),
172
173            // Random -> randCanonical() in ClickHouse
174            Expression::Random(_) => Ok(Expression::Function(Box::new(Function::new(
175                "randCanonical".to_string(),
176                vec![],
177            )))),
178
179            // startsWith -> startsWith
180            Expression::StartsWith(f) => Ok(Expression::Function(Box::new(Function::new(
181                "startsWith".to_string(),
182                vec![f.this, f.expression],
183            )))),
184
185            // endsWith -> endsWith
186            Expression::EndsWith(f) => Ok(Expression::Function(Box::new(Function::new(
187                "endsWith".to_string(),
188                vec![f.this, f.expression],
189            )))),
190
191            // ClickHouse prefers NOT (x IN (...)) over x NOT IN (...)
192            Expression::In(in_expr) if in_expr.not => {
193                if in_expr.global {
194                    return Ok(Expression::In(in_expr));
195                }
196                let In {
197                    this,
198                    expressions,
199                    query,
200                    unnest,
201                    global,
202                    is_field,
203                    ..
204                } = *in_expr;
205                let expressions = unwrap_in_array(expressions, &query, &unnest);
206                let base = Expression::In(Box::new(In {
207                    this: wrap_predicate_left(this),
208                    expressions,
209                    query,
210                    not: false,
211                    global,
212                    unnest,
213                    is_field,
214                }));
215                Ok(Expression::Not(Box::new(UnaryOp {
216                    this: wrap_not_target(base),
217                    inferred_type: None,
218                })))
219            }
220
221            // ClickHouse prefers NOT (x IS NULL) over x IS NOT NULL
222            Expression::IsNull(is_null) if is_null.not => {
223                let IsNull { this, .. } = *is_null;
224                let base = Expression::IsNull(Box::new(IsNull {
225                    this: wrap_predicate_left(this),
226                    not: false,
227                    postfix_form: false,
228                }));
229                Ok(Expression::Not(Box::new(UnaryOp {
230                    this: wrap_not_target(base),
231                    inferred_type: None,
232                })))
233            }
234
235            Expression::In(mut in_expr) => {
236                in_expr.expressions =
237                    unwrap_in_array(in_expr.expressions, &in_expr.query, &in_expr.unnest);
238                in_expr.this = wrap_predicate_left(in_expr.this);
239                Ok(Expression::In(in_expr))
240            }
241
242            Expression::IsNull(mut is_null) => {
243                is_null.this = wrap_predicate_left(is_null.this);
244                Ok(Expression::IsNull(is_null))
245            }
246
247            // IF(cond, true, false) -> CASE WHEN cond THEN true ELSE false END
248            Expression::IfFunc(f) => {
249                let f = *f;
250                Ok(Expression::Case(Box::new(Case {
251                    operand: None,
252                    whens: vec![(f.condition, f.true_value)],
253                    else_: f.false_value,
254                    comments: Vec::new(),
255                    inferred_type: None,
256                })))
257            }
258
259            Expression::Is(mut is_expr) => {
260                is_expr.left = wrap_predicate_left(is_expr.left);
261                Ok(Expression::Is(is_expr))
262            }
263
264            Expression::Or(op) => {
265                let BinaryOp {
266                    left,
267                    right,
268                    left_comments,
269                    operator_comments,
270                    trailing_comments,
271                    ..
272                } = *op;
273                let left = if matches!(left, Expression::And(_)) {
274                    Expression::Paren(Box::new(Paren {
275                        this: left,
276                        trailing_comments: Vec::new(),
277                    }))
278                } else {
279                    left
280                };
281                let right = if matches!(right, Expression::And(_)) {
282                    Expression::Paren(Box::new(Paren {
283                        this: right,
284                        trailing_comments: Vec::new(),
285                    }))
286                } else {
287                    right
288                };
289                Ok(Expression::Or(Box::new(BinaryOp {
290                    left,
291                    right,
292                    left_comments,
293                    operator_comments,
294                    trailing_comments,
295                    inferred_type: None,
296                })))
297            }
298
299            Expression::Not(op) => {
300                let inner = wrap_not_target(op.this);
301                Ok(Expression::Not(Box::new(UnaryOp {
302                    this: inner,
303                    inferred_type: None,
304                })))
305            }
306
307            Expression::MapFunc(map) if map.curly_brace_syntax => {
308                let MapConstructor { keys, values, .. } = *map;
309                let mut args = Vec::with_capacity(keys.len() * 2);
310                for (key, value) in keys.into_iter().zip(values.into_iter()) {
311                    args.push(key);
312                    args.push(value);
313                }
314                Ok(Expression::Function(Box::new(Function::new(
315                    "map".to_string(),
316                    args,
317                ))))
318            }
319
320            Expression::Insert(mut insert) => {
321                for row in insert.values.iter_mut() {
322                    for value in row.iter_mut() {
323                        if !matches!(value, Expression::Paren(_)) {
324                            let wrapped = Expression::Paren(Box::new(Paren {
325                                this: value.clone(),
326                                trailing_comments: Vec::new(),
327                            }));
328                            *value = wrapped;
329                        }
330                    }
331                }
332                Ok(Expression::Insert(insert))
333            }
334
335            // Generic function transformations
336            Expression::Function(f) => self.transform_function(*f),
337
338            // Generic aggregate function transformations
339            Expression::AggregateFunction(f) => self.transform_aggregate_function(f),
340
341            // Cast transformations
342            Expression::Cast(c) => self.transform_cast(*c),
343
344            // TYPEOF -> toTypeName in ClickHouse
345            Expression::Typeof(f) => Ok(Expression::Function(Box::new(Function::new(
346                "toTypeName".to_string(),
347                vec![f.this],
348            )))),
349
350            // Pass through everything else
351            _ => Ok(expr),
352        }
353    }
354}
355
356impl ClickHouseDialect {
357}
358
359impl ClickHouseDialect {
360    fn transform_function(&self, f: Function) -> Result<Expression> {
361        let name_upper = f.name.to_uppercase();
362        match name_upper.as_str() {
363            // UTCTimestamp() -> CURRENT_TIMESTAMP('UTC')
364            "UTCTIMESTAMP" => Ok(Expression::UtcTimestamp(Box::new(
365                crate::expressions::UtcTimestamp { this: None },
366            ))),
367
368            "CURRENTDATABASE" | "CURRENT_DATABASE" => Ok(Expression::Function(Box::new(
369                Function::new("CURRENT_DATABASE".to_string(), f.args),
370            ))),
371            "CURRENTSCHEMAS" | "CURRENT_SCHEMAS" => Ok(Expression::Function(Box::new(
372                Function::new("CURRENT_SCHEMAS".to_string(), f.args),
373            ))),
374            "LEVENSHTEIN" | "LEVENSHTEINDISTANCE" | "EDITDISTANCE" => Ok(Expression::Function(
375                Box::new(Function::new("editDistance".to_string(), f.args)),
376            )),
377            "CHAR" | "CHR" => Ok(Expression::Function(Box::new(Function::new(
378                "CHAR".to_string(),
379                f.args,
380            )))),
381            "STR_TO_DATE" => Ok(Expression::Function(Box::new(Function::new(
382                "STR_TO_DATE".to_string(),
383                f.args,
384            )))),
385            "JSONEXTRACTSTRING" => Ok(Expression::Function(Box::new(Function::new(
386                "JSONExtractString".to_string(),
387                f.args,
388            )))),
389            "MATCH" => Ok(Expression::Function(Box::new(Function::new(
390                "match".to_string(),
391                f.args,
392            )))),
393            "LIKE" if f.args.len() == 2 => {
394                let left = f.args[0].clone();
395                let right = f.args[1].clone();
396                Ok(Expression::Like(Box::new(LikeOp::new(left, right))))
397            }
398            "NOTLIKE" if f.args.len() == 2 => {
399                let left = f.args[0].clone();
400                let right = f.args[1].clone();
401                let like = Expression::Like(Box::new(LikeOp::new(left, right)));
402                Ok(Expression::Not(Box::new(UnaryOp {
403                    this: like,
404                    inferred_type: None,
405                })))
406            }
407            "ILIKE" if f.args.len() == 2 => {
408                let left = f.args[0].clone();
409                let right = f.args[1].clone();
410                Ok(Expression::ILike(Box::new(LikeOp::new(left, right))))
411            }
412            "AND" if f.args.len() >= 2 => {
413                let mut iter = f.args.into_iter();
414                let mut expr = iter.next().unwrap();
415                for arg in iter {
416                    expr = Expression::And(Box::new(BinaryOp::new(expr, arg)));
417                }
418                Ok(expr)
419            }
420            "OR" if f.args.len() >= 2 => {
421                let mut iter = f.args.into_iter();
422                let mut expr = iter.next().unwrap();
423                for arg in iter {
424                    expr = Expression::Or(Box::new(BinaryOp::new(expr, arg)));
425                }
426                self.transform_expr(expr)
427            }
428            // TYPEOF -> toTypeName in ClickHouse
429            "TYPEOF" => Ok(Expression::Function(Box::new(Function::new(
430                "toTypeName".to_string(),
431                f.args,
432            )))),
433
434            // DATE_TRUNC: ClickHouse uses dateTrunc (camelCase)
435            "DATE_TRUNC" | "DATETRUNC" => Ok(Expression::Function(Box::new(Function::new(
436                "dateTrunc".to_string(),
437                f.args,
438            )))),
439            "TOSTARTOFDAY" if f.args.len() == 1 => {
440                Ok(Expression::Function(Box::new(Function::new(
441                    "dateTrunc".to_string(),
442                    vec![Expression::string("DAY"), f.args[0].clone()],
443                ))))
444            }
445
446            // SUBSTRING_INDEX: preserve original case (substringIndex in ClickHouse)
447            "SUBSTRING_INDEX" => Ok(Expression::Function(Box::new(Function::new(
448                f.name.clone(),
449                f.args,
450            )))),
451
452            // IS_NAN / ISNAN -> isNaN (ClickHouse camelCase)
453            "IS_NAN" | "ISNAN" => Ok(Expression::Function(Box::new(Function::new(
454                "isNaN".to_string(),
455                f.args,
456            )))),
457
458            _ => Ok(Expression::Function(Box::new(f))),
459        }
460    }
461
462    fn transform_aggregate_function(
463        &self,
464        f: Box<crate::expressions::AggregateFunction>,
465    ) -> Result<Expression> {
466        let name_upper = f.name.to_uppercase();
467        match name_upper.as_str() {
468            // COUNT_IF -> countIf
469            "COUNT_IF" if !f.args.is_empty() => Ok(Expression::Function(Box::new(Function::new(
470                "countIf".to_string(),
471                f.args,
472            )))),
473
474            // SUM_IF -> sumIf
475            "SUM_IF" if !f.args.is_empty() => Ok(Expression::Function(Box::new(Function::new(
476                "sumIf".to_string(),
477                f.args,
478            )))),
479
480            // AVG_IF -> avgIf
481            "AVG_IF" if !f.args.is_empty() => Ok(Expression::Function(Box::new(Function::new(
482                "avgIf".to_string(),
483                f.args,
484            )))),
485
486            // ANY_VALUE -> any in ClickHouse
487            "ANY_VALUE" if !f.args.is_empty() => Ok(Expression::Function(Box::new(Function::new(
488                "any".to_string(),
489                f.args,
490            )))),
491
492            // GROUP_CONCAT -> groupArray + arrayStringConcat
493            "GROUP_CONCAT" if !f.args.is_empty() => {
494                let mut args = f.args;
495                let first = args.remove(0);
496                let separator = args.pop();
497                let group_array = Expression::Function(Box::new(Function::new(
498                    "groupArray".to_string(),
499                    vec![first],
500                )));
501                if let Some(sep) = separator {
502                    Ok(Expression::Function(Box::new(Function::new(
503                        "arrayStringConcat".to_string(),
504                        vec![group_array, sep],
505                    ))))
506                } else {
507                    Ok(Expression::Function(Box::new(Function::new(
508                        "arrayStringConcat".to_string(),
509                        vec![group_array],
510                    ))))
511                }
512            }
513
514            // STRING_AGG -> groupArray + arrayStringConcat
515            "STRING_AGG" if !f.args.is_empty() => {
516                let mut args = f.args;
517                let first = args.remove(0);
518                let separator = args.pop();
519                let group_array = Expression::Function(Box::new(Function::new(
520                    "groupArray".to_string(),
521                    vec![first],
522                )));
523                if let Some(sep) = separator {
524                    Ok(Expression::Function(Box::new(Function::new(
525                        "arrayStringConcat".to_string(),
526                        vec![group_array, sep],
527                    ))))
528                } else {
529                    Ok(Expression::Function(Box::new(Function::new(
530                        "arrayStringConcat".to_string(),
531                        vec![group_array],
532                    ))))
533                }
534            }
535
536            // LISTAGG -> groupArray + arrayStringConcat
537            "LISTAGG" if !f.args.is_empty() => {
538                let mut args = f.args;
539                let first = args.remove(0);
540                let separator = args.pop();
541                let group_array = Expression::Function(Box::new(Function::new(
542                    "groupArray".to_string(),
543                    vec![first],
544                )));
545                if let Some(sep) = separator {
546                    Ok(Expression::Function(Box::new(Function::new(
547                        "arrayStringConcat".to_string(),
548                        vec![group_array, sep],
549                    ))))
550                } else {
551                    Ok(Expression::Function(Box::new(Function::new(
552                        "arrayStringConcat".to_string(),
553                        vec![group_array],
554                    ))))
555                }
556            }
557
558            // ARRAY_AGG -> groupArray
559            "ARRAY_AGG" if !f.args.is_empty() => Ok(Expression::Function(Box::new(Function::new(
560                "groupArray".to_string(),
561                f.args,
562            )))),
563
564            // STDDEV -> stddevSamp in ClickHouse (sample stddev)
565            "STDDEV" if !f.args.is_empty() => {
566                Ok(Expression::AggregateFunction(Box::new(AggregateFunction {
567                    name: "stddevSamp".to_string(),
568                    args: f.args,
569                    distinct: f.distinct,
570                    filter: f.filter,
571                    order_by: Vec::new(),
572                    limit: None,
573                    ignore_nulls: None,
574                    inferred_type: None,
575                })))
576            }
577
578            // STDDEV_POP -> stddevPop
579            "STDDEV_POP" if !f.args.is_empty() => {
580                Ok(Expression::AggregateFunction(Box::new(AggregateFunction {
581                    name: "stddevPop".to_string(),
582                    args: f.args,
583                    distinct: f.distinct,
584                    filter: f.filter,
585                    order_by: Vec::new(),
586                    limit: None,
587                    ignore_nulls: None,
588                    inferred_type: None,
589                })))
590            }
591
592            // VARIANCE -> varSamp in ClickHouse
593            "VARIANCE" if !f.args.is_empty() => {
594                Ok(Expression::AggregateFunction(Box::new(AggregateFunction {
595                    name: "varSamp".to_string(),
596                    args: f.args,
597                    distinct: f.distinct,
598                    filter: f.filter,
599                    order_by: Vec::new(),
600                    limit: None,
601                    ignore_nulls: None,
602                    inferred_type: None,
603                })))
604            }
605
606            // VAR_POP -> varPop
607            "VAR_POP" if !f.args.is_empty() => {
608                Ok(Expression::AggregateFunction(Box::new(AggregateFunction {
609                    name: "varPop".to_string(),
610                    args: f.args,
611                    distinct: f.distinct,
612                    filter: f.filter,
613                    order_by: Vec::new(),
614                    limit: None,
615                    ignore_nulls: None,
616                    inferred_type: None,
617                })))
618            }
619
620            // MEDIAN -> median
621            "MEDIAN" if !f.args.is_empty() => {
622                Ok(Expression::AggregateFunction(Box::new(AggregateFunction {
623                    name: "median".to_string(),
624                    args: f.args,
625                    distinct: f.distinct,
626                    filter: f.filter,
627                    order_by: Vec::new(),
628                    limit: None,
629                    ignore_nulls: None,
630                    inferred_type: None,
631                })))
632            }
633
634            // APPROX_COUNT_DISTINCT -> uniq in ClickHouse
635            "APPROX_COUNT_DISTINCT" if !f.args.is_empty() => Ok(Expression::Function(Box::new(
636                Function::new("uniq".to_string(), f.args),
637            ))),
638
639            // APPROX_DISTINCT -> uniq
640            "APPROX_DISTINCT" if !f.args.is_empty() => Ok(Expression::Function(Box::new(
641                Function::new("uniq".to_string(), f.args),
642            ))),
643
644            _ => Ok(Expression::AggregateFunction(f)),
645        }
646    }
647
648    fn transform_cast(&self, c: Cast) -> Result<Expression> {
649        Ok(Expression::Cast(Box::new(c)))
650    }
651}