polyglot-sql 0.3.3

SQL parsing, validating, formatting, and dialect translation library
Documentation
//! Doris Dialect
//!
//! Apache Doris-specific transformations based on sqlglot patterns.
//! Doris is MySQL-compatible with some extensions for analytics.

use super::{DialectImpl, DialectType};
use crate::error::Result;
use crate::expressions::{
    AggFunc, Case, Cast, Expression, Function, Interval, IntervalUnit, IntervalUnitSpec, VarArgFunc,
};
use crate::generator::GeneratorConfig;
use crate::tokens::TokenizerConfig;

/// Doris dialect
pub struct DorisDialect;

impl DialectImpl for DorisDialect {
    fn dialect_type(&self) -> DialectType {
        DialectType::Doris
    }

    fn tokenizer_config(&self) -> TokenizerConfig {
        let mut config = TokenizerConfig::default();
        // Doris uses backticks for identifiers (MySQL-style)
        config.identifiers.insert('`', '`');
        config.nested_comments = false;
        config
    }

    fn generator_config(&self) -> GeneratorConfig {
        use crate::generator::IdentifierQuoteStyle;
        GeneratorConfig {
            identifier_quote: '`',
            identifier_quote_style: IdentifierQuoteStyle::BACKTICK,
            dialect: Some(DialectType::Doris),
            // Doris: COMMENT 'value' (naked property, no = sign)
            schema_comment_with_eq: false,
            // Doris: PROPERTIES ('key'='value') instead of WITH ('key'='value')
            with_properties_prefix: "PROPERTIES",
            ..Default::default()
        }
    }

    fn transform_expr(&self, expr: Expression) -> Result<Expression> {
        match expr {
            // IFNULL is native in Doris (MySQL-style)
            Expression::IfNull(f) => Ok(Expression::IfNull(f)),

            // NVL -> IFNULL in Doris
            Expression::Nvl(f) => Ok(Expression::IfNull(f)),

            // TryCast -> not directly supported, use CAST
            Expression::TryCast(c) => Ok(Expression::Cast(c)),

            // SafeCast -> CAST in Doris
            Expression::SafeCast(c) => Ok(Expression::Cast(c)),

            // CountIf -> SUM(CASE WHEN condition THEN 1 ELSE 0 END)
            Expression::CountIf(f) => {
                let case_expr = Expression::Case(Box::new(Case {
                    operand: None,
                    whens: vec![(f.this.clone(), Expression::number(1))],
                    else_: Some(Expression::number(0)),
                    comments: Vec::new(),
                    inferred_type: None,
                }));
                Ok(Expression::Sum(Box::new(AggFunc {
                    ignore_nulls: None,
                    having_max: None,
                    this: case_expr,
                    distinct: f.distinct,
                    filter: f.filter,
                    order_by: Vec::new(),
                    name: None,
                    limit: None,
                    inferred_type: None,
                })))
            }

            // RAND is native in Doris
            Expression::Rand(r) => Ok(Expression::Rand(r)),

            // REGEXP_LIKE -> REGEXP in Doris
            Expression::RegexpLike(r) => {
                let mut args = vec![r.this, r.pattern];
                if let Some(flags) = r.flags {
                    args.push(flags);
                }
                Ok(Expression::Function(Box::new(Function::new(
                    "REGEXP".to_string(),
                    args,
                ))))
            }

            // Generic function transformations
            Expression::Function(f) => self.transform_function(*f),

            // Generic aggregate function transformations
            Expression::AggregateFunction(f) => self.transform_aggregate_function(f),

            // Cast transformations
            Expression::Cast(c) => self.transform_cast(*c),

            // Pass through everything else
            _ => Ok(expr),
        }
    }
}

impl DorisDialect {
    fn wrap_day_interval(expr: Expression) -> Expression {
        Expression::Interval(Box::new(Interval {
            this: Some(expr),
            unit: Some(IntervalUnitSpec::Simple {
                unit: IntervalUnit::Day,
                use_plural: false,
            }),
        }))
    }

    fn transform_function(&self, f: Function) -> Result<Expression> {
        let name_upper = f.name.to_uppercase();
        match name_upper.as_str() {
            // NVL -> IFNULL
            "NVL" if f.args.len() == 2 => Ok(Expression::Function(Box::new(Function::new(
                "IFNULL".to_string(),
                f.args,
            )))),

            // ISNULL -> IFNULL
            "ISNULL" if f.args.len() == 2 => Ok(Expression::Function(Box::new(Function::new(
                "IFNULL".to_string(),
                f.args,
            )))),

            // COALESCE is native in Doris
            "COALESCE" => Ok(Expression::Coalesce(Box::new(VarArgFunc {
                original_name: None,
                expressions: f.args,
                inferred_type: None,
            }))),

            // NOW is native in Doris
            "NOW" => Ok(Expression::CurrentTimestamp(
                crate::expressions::CurrentTimestamp {
                    precision: None,
                    sysdate: false,
                },
            )),

            // GETDATE -> NOW in Doris
            "GETDATE" => Ok(Expression::CurrentTimestamp(
                crate::expressions::CurrentTimestamp {
                    precision: None,
                    sysdate: false,
                },
            )),

            // CURRENT_TIMESTAMP is native
            "CURRENT_TIMESTAMP" => Ok(Expression::CurrentTimestamp(
                crate::expressions::CurrentTimestamp {
                    precision: None,
                    sysdate: false,
                },
            )),

            // GROUP_CONCAT is native in Doris
            "GROUP_CONCAT" => Ok(Expression::Function(Box::new(f))),

            // STRING_AGG -> GROUP_CONCAT
            "STRING_AGG" if !f.args.is_empty() => Ok(Expression::Function(Box::new(
                Function::new("GROUP_CONCAT".to_string(), f.args),
            ))),

            // LISTAGG -> GROUP_CONCAT
            "LISTAGG" if !f.args.is_empty() => Ok(Expression::Function(Box::new(Function::new(
                "GROUP_CONCAT".to_string(),
                f.args,
            )))),

            // SUBSTR is native in Doris
            "SUBSTR" => Ok(Expression::Function(Box::new(f))),

            // SUBSTRING is native in Doris
            "SUBSTRING" => Ok(Expression::Function(Box::new(f))),

            // LENGTH is native in Doris
            "LENGTH" => Ok(Expression::Function(Box::new(f))),

            // LEN -> LENGTH
            "LEN" if f.args.len() == 1 => Ok(Expression::Function(Box::new(Function::new(
                "LENGTH".to_string(),
                f.args,
            )))),

            // CHARINDEX -> INSTR in Doris (with swapped args)
            "CHARINDEX" if f.args.len() >= 2 => {
                let mut args = f.args;
                let substring = args.remove(0);
                let string = args.remove(0);
                Ok(Expression::Function(Box::new(Function::new(
                    "INSTR".to_string(),
                    vec![string, substring],
                ))))
            }

            // STRPOS -> INSTR
            "STRPOS" if f.args.len() >= 2 => Ok(Expression::Function(Box::new(Function::new(
                "INSTR".to_string(),
                f.args,
            )))),

            // LOCATE is native in Doris (keep as-is)
            "LOCATE" => Ok(Expression::Function(Box::new(f))),

            // INSTR is native in Doris
            "INSTR" => Ok(Expression::Function(Box::new(f))),

            // DATE_TRUNC is native in Doris
            "DATE_TRUNC" => Ok(Expression::Function(Box::new(f))),

            // Doris normalizes MySQL-style day shorthand to INTERVAL syntax.
            "DATE_ADD" if f.args.len() == 2 && !matches!(f.args[1], Expression::Interval(_)) => {
                let mut args = f.args;
                let date = args.remove(0);
                let days = args.remove(0);
                Ok(Expression::Function(Box::new(Function::new(
                    "DATE_ADD".to_string(),
                    vec![date, Self::wrap_day_interval(days)],
                ))))
            }
            "DATE_SUB" if f.args.len() == 2 && !matches!(f.args[1], Expression::Interval(_)) => {
                let mut args = f.args;
                let date = args.remove(0);
                let days = args.remove(0);
                Ok(Expression::Function(Box::new(Function::new(
                    "DATE_SUB".to_string(),
                    vec![date, Self::wrap_day_interval(days)],
                ))))
            }
            "ADDDATE" if f.args.len() == 2 && !matches!(f.args[1], Expression::Interval(_)) => {
                let mut args = f.args;
                let date = args.remove(0);
                let days = args.remove(0);
                Ok(Expression::Function(Box::new(Function::new(
                    "DATE_ADD".to_string(),
                    vec![date, Self::wrap_day_interval(days)],
                ))))
            }
            "SUBDATE" if f.args.len() == 2 && !matches!(f.args[1], Expression::Interval(_)) => {
                let mut args = f.args;
                let date = args.remove(0);
                let days = args.remove(0);
                Ok(Expression::Function(Box::new(Function::new(
                    "DATE_SUB".to_string(),
                    vec![date, Self::wrap_day_interval(days)],
                ))))
            }

            // COLLECT_LIST is native in Doris
            "COLLECT_LIST" => Ok(Expression::Function(Box::new(f))),

            // COLLECT_SET is native in Doris
            "COLLECT_SET" => Ok(Expression::Function(Box::new(f))),

            // ARRAY_AGG -> COLLECT_LIST
            "ARRAY_AGG" if !f.args.is_empty() => Ok(Expression::Function(Box::new(Function::new(
                "COLLECT_LIST".to_string(),
                f.args,
            )))),

            // TO_DATE is native in Doris
            "TO_DATE" => Ok(Expression::Function(Box::new(f))),

            // TO_TIMESTAMP -> FROM_UNIXTIME or similar
            "TO_TIMESTAMP" => Ok(Expression::Function(Box::new(f))),

            // DATE_FORMAT is native in Doris (MySQL-style)
            "DATE_FORMAT" => Ok(Expression::Function(Box::new(f))),

            // strftime -> DATE_FORMAT
            "STRFTIME" if f.args.len() >= 2 => {
                let mut args = f.args;
                let format = args.remove(0);
                let date = args.remove(0);
                Ok(Expression::Function(Box::new(Function::new(
                    "DATE_FORMAT".to_string(),
                    vec![date, format],
                ))))
            }

            // TO_CHAR -> DATE_FORMAT
            "TO_CHAR" if f.args.len() >= 2 => Ok(Expression::Function(Box::new(Function::new(
                "DATE_FORMAT".to_string(),
                f.args,
            )))),

            // JSON_EXTRACT is native in Doris
            "JSON_EXTRACT" => Ok(Expression::Function(Box::new(f))),

            // GET_JSON_OBJECT -> JSON_EXTRACT
            "GET_JSON_OBJECT" if f.args.len() == 2 => Ok(Expression::Function(Box::new(
                Function::new("JSON_EXTRACT".to_string(), f.args),
            ))),

            // REGEXP is native in Doris
            "REGEXP" => Ok(Expression::Function(Box::new(f))),

            // RLIKE is native in Doris
            "RLIKE" => Ok(Expression::Function(Box::new(f))),

            // REGEXP_LIKE -> REGEXP
            "REGEXP_LIKE" if f.args.len() >= 2 => Ok(Expression::Function(Box::new(
                Function::new("REGEXP".to_string(), f.args),
            ))),

            // MONTHS_ADD is native in Doris
            "MONTHS_ADD" => Ok(Expression::Function(Box::new(f))),

            // ADD_MONTHS -> MONTHS_ADD
            "ADD_MONTHS" if f.args.len() == 2 => Ok(Expression::Function(Box::new(Function::new(
                "MONTHS_ADD".to_string(),
                f.args,
            )))),

            // Pass through everything else
            _ => Ok(Expression::Function(Box::new(f))),
        }
    }

    fn transform_aggregate_function(
        &self,
        f: Box<crate::expressions::AggregateFunction>,
    ) -> Result<Expression> {
        let name_upper = f.name.to_uppercase();
        match name_upper.as_str() {
            // COUNT_IF -> SUM(CASE WHEN...)
            "COUNT_IF" if !f.args.is_empty() => {
                let condition = f.args.into_iter().next().unwrap();
                let case_expr = Expression::Case(Box::new(Case {
                    operand: None,
                    whens: vec![(condition, Expression::number(1))],
                    else_: Some(Expression::number(0)),
                    comments: Vec::new(),
                    inferred_type: None,
                }));
                Ok(Expression::Sum(Box::new(AggFunc {
                    ignore_nulls: None,
                    having_max: None,
                    this: case_expr,
                    distinct: f.distinct,
                    filter: f.filter,
                    order_by: Vec::new(),
                    name: None,
                    limit: None,
                    inferred_type: None,
                })))
            }

            // APPROX_COUNT_DISTINCT is native in Doris
            "APPROX_COUNT_DISTINCT" => Ok(Expression::AggregateFunction(f)),

            // HLL_COUNT_DISTINCT -> APPROX_COUNT_DISTINCT
            "HLL_COUNT_DISTINCT" if !f.args.is_empty() => Ok(Expression::Function(Box::new(
                Function::new("APPROX_COUNT_DISTINCT".to_string(), f.args),
            ))),

            // MAX_BY is native in Doris
            "MAX_BY" => Ok(Expression::AggregateFunction(f)),

            // MIN_BY is native in Doris
            "MIN_BY" => Ok(Expression::AggregateFunction(f)),

            // Pass through everything else
            _ => Ok(Expression::AggregateFunction(f)),
        }
    }

    fn transform_cast(&self, c: Cast) -> Result<Expression> {
        // Doris type mappings are handled in the generator
        Ok(Expression::Cast(Box::new(c)))
    }
}