use super::{DialectImpl, DialectType};
#[cfg(feature = "transpile")]
use crate::error::Result;
#[cfg(feature = "transpile")]
use crate::expressions::{
AggregateFunction, BinaryOp, Case, Cast, Expression, Function, In, IsNull, LikeOp,
MapConstructor, Paren, UnaryOp,
};
#[cfg(feature = "generate")]
use crate::generator::GeneratorConfig;
use crate::tokens::TokenizerConfig;
pub struct ClickHouseDialect;
impl DialectImpl for ClickHouseDialect {
fn dialect_type(&self) -> DialectType {
DialectType::ClickHouse
}
fn tokenizer_config(&self) -> TokenizerConfig {
let mut config = TokenizerConfig::default();
config.identifiers.insert('"', '"');
config.identifiers.insert('`', '`');
config.nested_comments = true;
config.identifiers_can_start_with_digit = true;
config.string_escapes.push('\\');
config.hash_comments = true;
config.dollar_sign_is_identifier = true;
config.insert_format_raw_data = true;
config.hex_number_strings = true;
config.hex_string_is_integer_type = true;
config.numbers_can_be_underscore_separated = true;
config.recover_terminal_backslash_quote = true;
config.recover_unterminated_string = true;
config
}
#[cfg(feature = "generate")]
fn generator_config(&self) -> GeneratorConfig {
use crate::generator::{IdentifierQuoteStyle, NormalizeFunctions};
GeneratorConfig {
identifier_quote: '"',
identifier_quote_style: IdentifierQuoteStyle::DOUBLE_QUOTE,
dialect: Some(DialectType::ClickHouse),
uppercase_keywords: true,
normalize_functions: NormalizeFunctions::None,
case_sensitive_identifiers: true,
tablesample_keywords: "SAMPLE",
tablesample_requires_parens: false,
identifiers_can_start_with_digit: true,
array_bracket_only: true,
..Default::default()
}
}
#[cfg(feature = "transpile")]
fn transform_expr(&self, expr: Expression) -> Result<Expression> {
let wrap_predicate_left = |expr: Expression| -> Expression {
let needs_parens = matches!(
expr,
Expression::Add(_)
| Expression::Sub(_)
| Expression::Mul(_)
| Expression::Div(_)
| Expression::Mod(_)
| Expression::Concat(_)
| Expression::And(_)
| Expression::Or(_)
| Expression::Not(_)
| Expression::Case(_)
);
if needs_parens {
Expression::Paren(Box::new(Paren {
this: expr,
trailing_comments: Vec::new(),
}))
} else {
expr
}
};
let wrap_not_target = |expr: Expression| -> Expression {
match expr {
Expression::Paren(_) => expr,
Expression::In(_)
| Expression::Between(_)
| Expression::Is(_)
| Expression::IsNull(_)
| Expression::IsTrue(_)
| Expression::IsFalse(_)
| Expression::IsJson(_)
| Expression::Like(_)
| Expression::ILike(_)
| Expression::SimilarTo(_)
| Expression::Glob(_)
| Expression::RegexpLike(_)
| Expression::RegexpILike(_)
| Expression::MemberOf(_) => Expression::Paren(Box::new(Paren {
this: expr,
trailing_comments: Vec::new(),
})),
_ => expr,
}
};
let unwrap_in_array = |mut expressions: Vec<Expression>,
query: &Option<Expression>,
unnest: &Option<Box<Expression>>|
-> Vec<Expression> {
if query.is_none() && unnest.is_none() && expressions.len() == 1 {
if matches!(expressions[0], Expression::ArrayFunc(_)) {
if let Expression::ArrayFunc(arr) = expressions.remove(0) {
return arr.expressions;
}
}
}
expressions
};
match expr {
Expression::TryCast(c) => {
Ok(Expression::Cast(c))
}
Expression::SafeCast(c) => Ok(Expression::Cast(c)),
Expression::CountIf(f) => Ok(Expression::Function(Box::new(Function::new(
"countIf".to_string(),
vec![f.this],
)))),
Expression::Unnest(f) => Ok(Expression::Function(Box::new(Function::new(
"arrayJoin".to_string(),
vec![f.this],
)))),
Expression::Explode(f) => Ok(Expression::Function(Box::new(Function::new(
"arrayJoin".to_string(),
vec![f.this],
)))),
Expression::ExplodeOuter(f) => Ok(Expression::Function(Box::new(Function::new(
"arrayJoin".to_string(),
vec![f.this],
)))),
Expression::Rand(_) => Ok(Expression::Function(Box::new(Function::new(
"randCanonical".to_string(),
vec![],
)))),
Expression::Random(_) => Ok(Expression::Function(Box::new(Function::new(
"randCanonical".to_string(),
vec![],
)))),
Expression::StartsWith(f) => Ok(Expression::Function(Box::new(Function::new(
"startsWith".to_string(),
vec![f.this, f.expression],
)))),
Expression::EndsWith(f) => Ok(Expression::Function(Box::new(Function::new(
"endsWith".to_string(),
vec![f.this, f.expression],
)))),
Expression::In(in_expr) if in_expr.not => {
if in_expr.global {
return Ok(Expression::In(in_expr));
}
let In {
this,
expressions,
query,
unnest,
global,
is_field,
..
} = *in_expr;
let expressions = unwrap_in_array(expressions, &query, &unnest);
let base = Expression::In(Box::new(In {
this: wrap_predicate_left(this),
expressions,
query,
not: false,
global,
unnest,
is_field,
}));
Ok(Expression::Not(Box::new(UnaryOp {
this: wrap_not_target(base),
inferred_type: None,
})))
}
Expression::IsNull(is_null) if is_null.not => {
let IsNull { this, .. } = *is_null;
let base = Expression::IsNull(Box::new(IsNull {
this: wrap_predicate_left(this),
not: false,
postfix_form: false,
}));
Ok(Expression::Not(Box::new(UnaryOp {
this: wrap_not_target(base),
inferred_type: None,
})))
}
Expression::In(mut in_expr) => {
in_expr.expressions =
unwrap_in_array(in_expr.expressions, &in_expr.query, &in_expr.unnest);
in_expr.this = wrap_predicate_left(in_expr.this);
Ok(Expression::In(in_expr))
}
Expression::IsNull(mut is_null) => {
is_null.this = wrap_predicate_left(is_null.this);
Ok(Expression::IsNull(is_null))
}
Expression::IfFunc(f) => {
let f = *f;
let has_aliased_arg = matches!(f.condition, Expression::Alias(_))
|| matches!(f.true_value, Expression::Alias(_))
|| matches!(f.false_value.as_ref(), Some(Expression::Alias(_)));
if has_aliased_arg {
return Ok(Expression::IfFunc(Box::new(f)));
}
Ok(Expression::Case(Box::new(Case {
operand: None,
whens: vec![(f.condition, f.true_value)],
else_: f.false_value,
comments: Vec::new(),
inferred_type: None,
})))
}
Expression::Is(mut is_expr) => {
is_expr.left = wrap_predicate_left(is_expr.left);
Ok(Expression::Is(is_expr))
}
Expression::Or(op) => {
let BinaryOp {
left,
right,
left_comments,
operator_comments,
trailing_comments,
..
} = *op;
let left = if matches!(left, Expression::And(_)) {
Expression::Paren(Box::new(Paren {
this: left,
trailing_comments: Vec::new(),
}))
} else {
left
};
let right = if matches!(right, Expression::And(_)) {
Expression::Paren(Box::new(Paren {
this: right,
trailing_comments: Vec::new(),
}))
} else {
right
};
Ok(Expression::Or(Box::new(BinaryOp {
left,
right,
left_comments,
operator_comments,
trailing_comments,
inferred_type: None,
})))
}
Expression::Not(op) => {
let inner = wrap_not_target(op.this);
Ok(Expression::Not(Box::new(UnaryOp {
this: inner,
inferred_type: None,
})))
}
Expression::MapFunc(map) if map.curly_brace_syntax => {
let MapConstructor { keys, values, .. } = *map;
let mut args = Vec::with_capacity(keys.len() * 2);
for (key, value) in keys.into_iter().zip(values.into_iter()) {
args.push(key);
args.push(value);
}
Ok(Expression::Function(Box::new(Function::new(
"map".to_string(),
args,
))))
}
Expression::Insert(mut insert) => {
for row in insert.values.iter_mut() {
for value in row.iter_mut() {
if !matches!(value, Expression::Paren(_)) {
let wrapped = Expression::Paren(Box::new(Paren {
this: value.clone(),
trailing_comments: Vec::new(),
}));
*value = wrapped;
}
}
}
Ok(Expression::Insert(insert))
}
Expression::Function(f) => self.transform_function(*f),
Expression::AggregateFunction(f) => self.transform_aggregate_function(f),
Expression::Cast(c) => self.transform_cast(*c),
Expression::Typeof(f) => Ok(Expression::Function(Box::new(Function::new(
"toTypeName".to_string(),
vec![f.this],
)))),
_ => Ok(expr),
}
}
}
#[cfg(feature = "transpile")]
impl ClickHouseDialect {}
#[cfg(feature = "transpile")]
impl ClickHouseDialect {
fn transform_function(&self, f: Function) -> Result<Expression> {
let name_upper = f.name.to_uppercase();
match name_upper.as_str() {
"UTCTIMESTAMP" => Ok(Expression::UtcTimestamp(Box::new(
crate::expressions::UtcTimestamp { this: None },
))),
"CURRENTDATABASE" | "CURRENT_DATABASE" => Ok(Expression::Function(Box::new(
Function::new("CURRENT_DATABASE".to_string(), f.args),
))),
"CURRENTSCHEMAS" | "CURRENT_SCHEMAS" => Ok(Expression::Function(Box::new(
Function::new("CURRENT_SCHEMAS".to_string(), f.args),
))),
"LEVENSHTEIN" | "LEVENSHTEINDISTANCE" | "EDITDISTANCE" => Ok(Expression::Function(
Box::new(Function::new("editDistance".to_string(), f.args)),
)),
"CHAR" | "CHR" => Ok(Expression::Function(Box::new(Function::new(
"CHAR".to_string(),
f.args,
)))),
"STR_TO_DATE" => Ok(Expression::Function(Box::new(Function::new(
"STR_TO_DATE".to_string(),
f.args,
)))),
"JSONEXTRACTSTRING" => Ok(Expression::Function(Box::new(Function::new(
"JSONExtractString".to_string(),
f.args,
)))),
"MATCH" => Ok(Expression::Function(Box::new(Function::new(
"match".to_string(),
f.args,
)))),
"LIKE" if f.args.len() == 2 => {
let left = f.args[0].clone();
let right = f.args[1].clone();
Ok(Expression::Like(Box::new(LikeOp::new(left, right))))
}
"NOTLIKE" if f.args.len() == 2 => {
let left = f.args[0].clone();
let right = f.args[1].clone();
let like = Expression::Like(Box::new(LikeOp::new(left, right)));
Ok(Expression::Not(Box::new(UnaryOp {
this: like,
inferred_type: None,
})))
}
"ILIKE" if f.args.len() == 2 => {
let left = f.args[0].clone();
let right = f.args[1].clone();
Ok(Expression::ILike(Box::new(LikeOp::new(left, right))))
}
"AND" if f.args.len() >= 2 => {
let mut iter = f.args.into_iter();
let mut expr = iter.next().unwrap();
for arg in iter {
expr = Expression::And(Box::new(BinaryOp::new(expr, arg)));
}
Ok(expr)
}
"OR" if f.args.len() >= 2 => {
let mut iter = f.args.into_iter();
let mut expr = iter.next().unwrap();
for arg in iter {
expr = Expression::Or(Box::new(BinaryOp::new(expr, arg)));
}
self.transform_expr(expr)
}
"XOR" if f.args.len() >= 2 => {
let mut iter = f.args.into_iter().map(Self::wrap_nested_xor_arg);
let mut expr = iter.next().unwrap();
for arg in iter {
expr = Expression::Function(Box::new(Function::new(
f.name.clone(),
vec![expr, arg],
)));
}
Ok(expr)
}
"TYPEOF" => Ok(Expression::Function(Box::new(Function::new(
"toTypeName".to_string(),
f.args,
)))),
"DATE_TRUNC" | "DATETRUNC" => Ok(Expression::Function(Box::new(Function::new(
"dateTrunc".to_string(),
f.args,
)))),
"TOSTARTOFDAY" if f.args.len() == 1 => {
Ok(Expression::Function(Box::new(Function::new(
"dateTrunc".to_string(),
vec![Expression::string("DAY"), f.args[0].clone()],
))))
}
"SUBSTRING_INDEX" => Ok(Expression::Function(Box::new(Function::new(
f.name.clone(),
f.args,
)))),
"IS_NAN" | "ISNAN" => Ok(Expression::Function(Box::new(Function::new(
"isNaN".to_string(),
f.args,
)))),
_ => Ok(Expression::Function(Box::new(f))),
}
}
fn transform_aggregate_function(
&self,
f: Box<crate::expressions::AggregateFunction>,
) -> Result<Expression> {
let name_upper = f.name.to_uppercase();
match name_upper.as_str() {
"COUNT_IF" if !f.args.is_empty() => Ok(Expression::Function(Box::new(Function::new(
"countIf".to_string(),
f.args,
)))),
"SUM_IF" if !f.args.is_empty() => Ok(Expression::Function(Box::new(Function::new(
"sumIf".to_string(),
f.args,
)))),
"AVG_IF" if !f.args.is_empty() => Ok(Expression::Function(Box::new(Function::new(
"avgIf".to_string(),
f.args,
)))),
"ANY_VALUE" if !f.args.is_empty() => Ok(Expression::Function(Box::new(Function::new(
"any".to_string(),
f.args,
)))),
"GROUP_CONCAT" if !f.args.is_empty() => {
let mut args = f.args;
let first = args.remove(0);
let separator = args.pop();
let group_array = Expression::Function(Box::new(Function::new(
"groupArray".to_string(),
vec![first],
)));
if let Some(sep) = separator {
Ok(Expression::Function(Box::new(Function::new(
"arrayStringConcat".to_string(),
vec![group_array, sep],
))))
} else {
Ok(Expression::Function(Box::new(Function::new(
"arrayStringConcat".to_string(),
vec![group_array],
))))
}
}
"STRING_AGG" if !f.args.is_empty() => {
let mut args = f.args;
let first = args.remove(0);
let separator = args.pop();
let group_array = Expression::Function(Box::new(Function::new(
"groupArray".to_string(),
vec![first],
)));
if let Some(sep) = separator {
Ok(Expression::Function(Box::new(Function::new(
"arrayStringConcat".to_string(),
vec![group_array, sep],
))))
} else {
Ok(Expression::Function(Box::new(Function::new(
"arrayStringConcat".to_string(),
vec![group_array],
))))
}
}
"LISTAGG" if !f.args.is_empty() => {
let mut args = f.args;
let first = args.remove(0);
let separator = args.pop();
let group_array = Expression::Function(Box::new(Function::new(
"groupArray".to_string(),
vec![first],
)));
if let Some(sep) = separator {
Ok(Expression::Function(Box::new(Function::new(
"arrayStringConcat".to_string(),
vec![group_array, sep],
))))
} else {
Ok(Expression::Function(Box::new(Function::new(
"arrayStringConcat".to_string(),
vec![group_array],
))))
}
}
"ARRAY_AGG" if !f.args.is_empty() => Ok(Expression::Function(Box::new(Function::new(
"groupArray".to_string(),
f.args,
)))),
"STDDEV" if !f.args.is_empty() => {
Ok(Expression::AggregateFunction(Box::new(AggregateFunction {
name: "stddevSamp".to_string(),
args: f.args,
distinct: f.distinct,
filter: f.filter,
order_by: Vec::new(),
limit: None,
ignore_nulls: None,
inferred_type: None,
})))
}
"STDDEV_POP" if !f.args.is_empty() => {
Ok(Expression::AggregateFunction(Box::new(AggregateFunction {
name: "stddevPop".to_string(),
args: f.args,
distinct: f.distinct,
filter: f.filter,
order_by: Vec::new(),
limit: None,
ignore_nulls: None,
inferred_type: None,
})))
}
"VARIANCE" if !f.args.is_empty() => {
Ok(Expression::AggregateFunction(Box::new(AggregateFunction {
name: "varSamp".to_string(),
args: f.args,
distinct: f.distinct,
filter: f.filter,
order_by: Vec::new(),
limit: None,
ignore_nulls: None,
inferred_type: None,
})))
}
"VAR_POP" if !f.args.is_empty() => {
Ok(Expression::AggregateFunction(Box::new(AggregateFunction {
name: "varPop".to_string(),
args: f.args,
distinct: f.distinct,
filter: f.filter,
order_by: Vec::new(),
limit: None,
ignore_nulls: None,
inferred_type: None,
})))
}
"MEDIAN" if !f.args.is_empty() => {
Ok(Expression::AggregateFunction(Box::new(AggregateFunction {
name: "median".to_string(),
args: f.args,
distinct: f.distinct,
filter: f.filter,
order_by: Vec::new(),
limit: None,
ignore_nulls: None,
inferred_type: None,
})))
}
"APPROX_COUNT_DISTINCT" if !f.args.is_empty() => Ok(Expression::Function(Box::new(
Function::new("uniq".to_string(), f.args),
))),
"APPROX_DISTINCT" if !f.args.is_empty() => Ok(Expression::Function(Box::new(
Function::new("uniq".to_string(), f.args),
))),
_ => Ok(Expression::AggregateFunction(f)),
}
}
fn transform_cast(&self, c: Cast) -> Result<Expression> {
Ok(Expression::Cast(Box::new(c)))
}
fn wrap_nested_xor_arg(expr: Expression) -> Expression {
if matches!(&expr, Expression::Function(f) if f.name.eq_ignore_ascii_case("XOR")) {
Expression::Paren(Box::new(Paren {
this: expr,
trailing_comments: Vec::new(),
}))
} else {
expr
}
}
}