Skip to main content

sqlglot_rust/dialects/
mod.rs

1use serde::{Deserialize, Serialize};
2
3use crate::ast::*;
4
5pub mod plugin;
6pub mod time;
7
8/// Supported SQL dialects.
9///
10/// Mirrors the full set of dialects supported by Python's sqlglot library.
11/// Dialects are grouped into **Official** (core, higher-priority maintenance)
12/// and **Community** (contributed, fully functional) tiers.
13#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
14pub enum Dialect {
15    // ── Core / base ──────────────────────────────────────────────────────
16    /// ANSI SQL standard (default / base dialect)
17    Ansi,
18
19    // ── Official dialects ────────────────────────────────────────────────
20    /// AWS Athena (Presto-based)
21    Athena,
22    /// Google BigQuery
23    BigQuery,
24    /// ClickHouse
25    ClickHouse,
26    /// Databricks (Spark-based)
27    Databricks,
28    /// DuckDB
29    DuckDb,
30    /// Apache Hive
31    Hive,
32    /// MySQL
33    Mysql,
34    /// Oracle Database
35    Oracle,
36    /// PostgreSQL
37    Postgres,
38    /// Presto
39    Presto,
40    /// Amazon Redshift (Postgres-based)
41    Redshift,
42    /// Snowflake
43    Snowflake,
44    /// Apache Spark SQL
45    Spark,
46    /// SQLite
47    Sqlite,
48    /// StarRocks (MySQL-compatible)
49    StarRocks,
50    /// Trino (Presto successor)
51    Trino,
52    /// Microsoft SQL Server (T-SQL)
53    Tsql,
54
55    // ── Community dialects ───────────────────────────────────────────────
56    /// Apache Doris (MySQL-compatible)
57    Doris,
58    /// Dremio
59    Dremio,
60    /// Apache Drill
61    Drill,
62    /// Apache Druid
63    Druid,
64    /// Exasol
65    Exasol,
66    /// Microsoft Fabric (T-SQL variant)
67    Fabric,
68    /// Materialize (Postgres-compatible)
69    Materialize,
70    /// PRQL (Pipelined Relational Query Language)
71    Prql,
72    /// RisingWave (Postgres-compatible)
73    RisingWave,
74    /// SingleStore (MySQL-compatible)
75    SingleStore,
76    /// Tableau
77    Tableau,
78    /// Teradata
79    Teradata,
80}
81
82impl std::fmt::Display for Dialect {
83    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
84        match self {
85            Dialect::Ansi => write!(f, "ANSI SQL"),
86            Dialect::Athena => write!(f, "Athena"),
87            Dialect::BigQuery => write!(f, "BigQuery"),
88            Dialect::ClickHouse => write!(f, "ClickHouse"),
89            Dialect::Databricks => write!(f, "Databricks"),
90            Dialect::DuckDb => write!(f, "DuckDB"),
91            Dialect::Hive => write!(f, "Hive"),
92            Dialect::Mysql => write!(f, "MySQL"),
93            Dialect::Oracle => write!(f, "Oracle"),
94            Dialect::Postgres => write!(f, "PostgreSQL"),
95            Dialect::Presto => write!(f, "Presto"),
96            Dialect::Redshift => write!(f, "Redshift"),
97            Dialect::Snowflake => write!(f, "Snowflake"),
98            Dialect::Spark => write!(f, "Spark"),
99            Dialect::Sqlite => write!(f, "SQLite"),
100            Dialect::StarRocks => write!(f, "StarRocks"),
101            Dialect::Trino => write!(f, "Trino"),
102            Dialect::Tsql => write!(f, "T-SQL"),
103            Dialect::Doris => write!(f, "Doris"),
104            Dialect::Dremio => write!(f, "Dremio"),
105            Dialect::Drill => write!(f, "Drill"),
106            Dialect::Druid => write!(f, "Druid"),
107            Dialect::Exasol => write!(f, "Exasol"),
108            Dialect::Fabric => write!(f, "Fabric"),
109            Dialect::Materialize => write!(f, "Materialize"),
110            Dialect::Prql => write!(f, "PRQL"),
111            Dialect::RisingWave => write!(f, "RisingWave"),
112            Dialect::SingleStore => write!(f, "SingleStore"),
113            Dialect::Tableau => write!(f, "Tableau"),
114            Dialect::Teradata => write!(f, "Teradata"),
115        }
116    }
117}
118
119impl Dialect {
120    /// Returns the support tier for this dialect.
121    #[must_use]
122    pub fn support_level(&self) -> &'static str {
123        match self {
124            Dialect::Ansi
125            | Dialect::Athena
126            | Dialect::BigQuery
127            | Dialect::ClickHouse
128            | Dialect::Databricks
129            | Dialect::DuckDb
130            | Dialect::Hive
131            | Dialect::Mysql
132            | Dialect::Oracle
133            | Dialect::Postgres
134            | Dialect::Presto
135            | Dialect::Redshift
136            | Dialect::Snowflake
137            | Dialect::Spark
138            | Dialect::Sqlite
139            | Dialect::StarRocks
140            | Dialect::Trino
141            | Dialect::Tsql => "Official",
142
143            Dialect::Doris
144            | Dialect::Dremio
145            | Dialect::Drill
146            | Dialect::Druid
147            | Dialect::Exasol
148            | Dialect::Fabric
149            | Dialect::Materialize
150            | Dialect::Prql
151            | Dialect::RisingWave
152            | Dialect::SingleStore
153            | Dialect::Tableau
154            | Dialect::Teradata => "Community",
155        }
156    }
157
158    /// Returns all dialect variants.
159    #[must_use]
160    pub fn all() -> &'static [Dialect] {
161        &[
162            Dialect::Ansi,
163            Dialect::Athena,
164            Dialect::BigQuery,
165            Dialect::ClickHouse,
166            Dialect::Databricks,
167            Dialect::Doris,
168            Dialect::Dremio,
169            Dialect::Drill,
170            Dialect::Druid,
171            Dialect::DuckDb,
172            Dialect::Exasol,
173            Dialect::Fabric,
174            Dialect::Hive,
175            Dialect::Materialize,
176            Dialect::Mysql,
177            Dialect::Oracle,
178            Dialect::Postgres,
179            Dialect::Presto,
180            Dialect::Prql,
181            Dialect::Redshift,
182            Dialect::RisingWave,
183            Dialect::SingleStore,
184            Dialect::Snowflake,
185            Dialect::Spark,
186            Dialect::Sqlite,
187            Dialect::StarRocks,
188            Dialect::Tableau,
189            Dialect::Teradata,
190            Dialect::Trino,
191            Dialect::Tsql,
192        ]
193    }
194
195    /// Parse a dialect name (case-insensitive) into a `Dialect`.
196    pub fn from_str(s: &str) -> Option<Dialect> {
197        match s.to_lowercase().as_str() {
198            "" | "ansi" => Some(Dialect::Ansi),
199            "athena" => Some(Dialect::Athena),
200            "bigquery" => Some(Dialect::BigQuery),
201            "clickhouse" => Some(Dialect::ClickHouse),
202            "databricks" => Some(Dialect::Databricks),
203            "doris" => Some(Dialect::Doris),
204            "dremio" => Some(Dialect::Dremio),
205            "drill" => Some(Dialect::Drill),
206            "druid" => Some(Dialect::Druid),
207            "duckdb" => Some(Dialect::DuckDb),
208            "exasol" => Some(Dialect::Exasol),
209            "fabric" => Some(Dialect::Fabric),
210            "hive" => Some(Dialect::Hive),
211            "materialize" => Some(Dialect::Materialize),
212            "mysql" => Some(Dialect::Mysql),
213            "oracle" => Some(Dialect::Oracle),
214            "postgres" | "postgresql" => Some(Dialect::Postgres),
215            "presto" => Some(Dialect::Presto),
216            "prql" => Some(Dialect::Prql),
217            "redshift" => Some(Dialect::Redshift),
218            "risingwave" => Some(Dialect::RisingWave),
219            "singlestore" => Some(Dialect::SingleStore),
220            "snowflake" => Some(Dialect::Snowflake),
221            "spark" => Some(Dialect::Spark),
222            "sqlite" => Some(Dialect::Sqlite),
223            "starrocks" => Some(Dialect::StarRocks),
224            "tableau" => Some(Dialect::Tableau),
225            "teradata" => Some(Dialect::Teradata),
226            "trino" => Some(Dialect::Trino),
227            "tsql" | "mssql" | "sqlserver" => Some(Dialect::Tsql),
228            _ => None,
229        }
230    }
231}
232
233// ═══════════════════════════════════════════════════════════════════════════
234// Dialect families — helpers for grouping similar dialects
235// ═══════════════════════════════════════════════════════════════════════════
236
237/// Dialects in the MySQL family (use SUBSTR, IFNULL, similar type system).
238fn is_mysql_family(d: Dialect) -> bool {
239    matches!(
240        d,
241        Dialect::Mysql | Dialect::Doris | Dialect::SingleStore | Dialect::StarRocks
242    )
243}
244
245/// Dialects in the Postgres family (support ILIKE, BYTEA, SUBSTRING).
246fn is_postgres_family(d: Dialect) -> bool {
247    matches!(
248        d,
249        Dialect::Postgres | Dialect::Redshift | Dialect::Materialize | Dialect::RisingWave
250    )
251}
252
253/// Dialects in the Presto family (ANSI-like, VARCHAR oriented).
254fn is_presto_family(d: Dialect) -> bool {
255    matches!(d, Dialect::Presto | Dialect::Trino | Dialect::Athena)
256}
257
258/// Dialects in the Hive/Spark family (use STRING type, SUBSTR).
259fn is_hive_family(d: Dialect) -> bool {
260    matches!(d, Dialect::Hive | Dialect::Spark | Dialect::Databricks)
261}
262
263/// Dialects in the T-SQL family.
264fn is_tsql_family(d: Dialect) -> bool {
265    matches!(d, Dialect::Tsql | Dialect::Fabric)
266}
267
268/// Dialects that natively support ILIKE.
269pub(crate) fn supports_ilike_builtin(d: Dialect) -> bool {
270    matches!(
271        d,
272        Dialect::Postgres
273            | Dialect::Redshift
274            | Dialect::Materialize
275            | Dialect::RisingWave
276            | Dialect::DuckDb
277            | Dialect::Snowflake
278            | Dialect::ClickHouse
279            | Dialect::Trino
280            | Dialect::Presto
281            | Dialect::Athena
282            | Dialect::Databricks
283            | Dialect::Spark
284            | Dialect::Hive
285            | Dialect::StarRocks
286            | Dialect::Exasol
287            | Dialect::Druid
288            | Dialect::Dremio
289    )
290}
291
292// ═══════════════════════════════════════════════════════════════════════════
293// Statement / expression transforms
294// ═══════════════════════════════════════════════════════════════════════════
295
296/// Transform a statement from one dialect to another.
297///
298/// This applies dialect-specific rewrite rules such as:
299/// - Type mapping (e.g., `TEXT` → `STRING` for BigQuery)
300/// - Function name mapping (e.g., `NOW()` → `CURRENT_TIMESTAMP()`)
301/// - ILIKE → LIKE with LOWER() wrapping for dialects that don't support ILIKE
302#[must_use]
303pub fn transform(statement: &Statement, from: Dialect, to: Dialect) -> Statement {
304    if from == to {
305        return statement.clone();
306    }
307    let mut stmt = statement.clone();
308    transform_statement(&mut stmt, to);
309    stmt
310}
311
312fn transform_statement(statement: &mut Statement, target: Dialect) {
313    match statement {
314        Statement::Select(sel) => {
315            // Transform LIMIT / TOP / FETCH FIRST for the target dialect
316            transform_limit(sel, target);
317            // Transform identifier quoting for the target dialect
318            transform_quotes_in_select(sel, target);
319
320            for item in &mut sel.columns {
321                if let SelectItem::Expr { expr, .. } = item {
322                    *expr = transform_expr(expr.clone(), target);
323                }
324            }
325            if let Some(wh) = &mut sel.where_clause {
326                *wh = transform_expr(wh.clone(), target);
327            }
328            for gb in &mut sel.group_by {
329                *gb = transform_expr(gb.clone(), target);
330            }
331            if let Some(having) = &mut sel.having {
332                *having = transform_expr(having.clone(), target);
333            }
334        }
335        Statement::Insert(ins) => {
336            if let InsertSource::Values(rows) = &mut ins.source {
337                for row in rows {
338                    for val in row {
339                        *val = transform_expr(val.clone(), target);
340                    }
341                }
342            }
343            // Transform RETURNING expressions
344            for item in &mut ins.returning {
345                if let SelectItem::Expr { expr, .. } = item {
346                    *expr = transform_expr(expr.clone(), target);
347                }
348            }
349        }
350        Statement::Update(upd) => {
351            for (_, val) in &mut upd.assignments {
352                *val = transform_expr(val.clone(), target);
353            }
354            if let Some(wh) = &mut upd.where_clause {
355                *wh = transform_expr(wh.clone(), target);
356            }
357            // Transform RETURNING expressions
358            for item in &mut upd.returning {
359                if let SelectItem::Expr { expr, .. } = item {
360                    *expr = transform_expr(expr.clone(), target);
361                }
362            }
363        }
364        // DDL: map data types in CREATE TABLE column definitions
365        Statement::CreateTable(ct) => {
366            for col in &mut ct.columns {
367                col.data_type = map_data_type(col.data_type.clone(), target);
368                if let Some(default) = &mut col.default {
369                    *default = transform_expr(default.clone(), target);
370                }
371            }
372            // Transform constraints (CHECK expressions)
373            for constraint in &mut ct.constraints {
374                if let TableConstraint::Check { expr, .. } = constraint {
375                    *expr = transform_expr(expr.clone(), target);
376                }
377            }
378            // Transform AS SELECT subquery
379            if let Some(as_select) = &mut ct.as_select {
380                transform_statement(as_select, target);
381            }
382        }
383        // DDL: map data types in ALTER TABLE ADD COLUMN
384        Statement::AlterTable(alt) => {
385            for action in &mut alt.actions {
386                match action {
387                    AlterTableAction::AddColumn(col) => {
388                        col.data_type = map_data_type(col.data_type.clone(), target);
389                        if let Some(default) = &mut col.default {
390                            *default = transform_expr(default.clone(), target);
391                        }
392                    }
393                    AlterTableAction::AlterColumnType { data_type, .. } => {
394                        *data_type = map_data_type(data_type.clone(), target);
395                    }
396                    _ => {}
397                }
398            }
399        }
400        _ => {}
401    }
402}
403
404/// Transform an expression for the target dialect.
405fn transform_expr(expr: Expr, target: Dialect) -> Expr {
406    match expr {
407        // Map function names across dialects
408        Expr::Function {
409            name,
410            args,
411            distinct,
412            filter,
413            over,
414        } => {
415            let new_name = map_function_name(&name, target);
416            let new_args: Vec<Expr> = args
417                .into_iter()
418                .map(|a| transform_expr(a, target))
419                .collect();
420            Expr::Function {
421                name: new_name,
422                args: new_args,
423                distinct,
424                filter: filter.map(|f| Box::new(transform_expr(*f, target))),
425                over,
426            }
427        }
428        // Recurse into typed function child expressions, with special handling
429        // for date/time formatting functions that need format string conversion
430        Expr::TypedFunction { func, filter, over } => {
431            let transformed_func = transform_typed_function(func, target);
432            Expr::TypedFunction {
433                func: transformed_func,
434                filter: filter.map(|f| Box::new(transform_expr(*f, target))),
435                over,
436            }
437        }
438        // ILIKE → LOWER(expr) LIKE LOWER(pattern) for non-supporting dialects
439        Expr::ILike {
440            expr,
441            pattern,
442            negated,
443            escape,
444        } if !supports_ilike_builtin(target) => Expr::Like {
445            expr: Box::new(Expr::TypedFunction {
446                func: TypedFunction::Lower {
447                    expr: Box::new(transform_expr(*expr, target)),
448                },
449                filter: None,
450                over: None,
451            }),
452            pattern: Box::new(Expr::TypedFunction {
453                func: TypedFunction::Lower {
454                    expr: Box::new(transform_expr(*pattern, target)),
455                },
456                filter: None,
457                over: None,
458            }),
459            negated,
460            escape,
461        },
462        // SIMILAR TO → LIKE for T-SQL (lossy: regex features dropped)
463        Expr::SimilarTo {
464            expr,
465            pattern,
466            negated,
467            escape,
468        } if is_tsql_family(target) => {
469            let transformed_pattern = transform_expr(*pattern, target);
470            let simplified = simplify_similar_to_pattern(&transformed_pattern);
471            Expr::Like {
472                expr: Box::new(transform_expr(*expr, target)),
473                pattern: Box::new(simplified),
474                negated,
475                escape,
476            }
477        }
478        // Map data types in CAST
479        Expr::Cast { expr, data_type } => Expr::Cast {
480            expr: Box::new(transform_expr(*expr, target)),
481            data_type: map_data_type(data_type, target),
482        },
483        // Recurse into binary ops, with T-SQL specific transforms
484        Expr::BinaryOp { left, op, right } => {
485            // Change 3: || → CONCAT() for T-SQL
486            // Collect args BEFORE recursive transform to flatten the full chain
487            if op == BinaryOperator::Concat && is_tsql_family(target) {
488                let mut args = Vec::new();
489                collect_concat_args(
490                    &Expr::BinaryOp {
491                        left,
492                        op: BinaryOperator::Concat,
493                        right,
494                    },
495                    &mut args,
496                );
497                // Now transform each collected arg
498                let args = args
499                    .into_iter()
500                    .map(|a| transform_expr(a, target))
501                    .collect();
502                return Expr::Function {
503                    name: "CONCAT".to_string(),
504                    args,
505                    distinct: false,
506                    filter: None,
507                    over: None,
508                };
509            }
510
511            let left_transformed = transform_expr(*left, target);
512            let right_transformed = transform_expr(*right, target);
513
514            // Change 6: expr ± INTERVAL → DATEADD() for T-SQL
515            if is_tsql_family(target) && matches!(op, BinaryOperator::Plus | BinaryOperator::Minus)
516            {
517                if let Some(dateadd) =
518                    try_transform_interval_arithmetic(&left_transformed, &op, &right_transformed)
519                {
520                    return dateadd;
521                }
522            }
523
524            Expr::BinaryOp {
525                left: Box::new(left_transformed),
526                op,
527                right: Box::new(right_transformed),
528            }
529        }
530        Expr::UnaryOp { op, expr } => Expr::UnaryOp {
531            op,
532            expr: Box::new(transform_expr(*expr, target)),
533        },
534        Expr::Nested(inner) => Expr::Nested(Box::new(transform_expr(*inner, target))),
535        // Transform quoting on column references
536        Expr::Column {
537            table,
538            name,
539            quote_style,
540            table_quote_style,
541        } => {
542            let new_qs = if quote_style.is_quoted() {
543                QuoteStyle::for_dialect(target)
544            } else {
545                QuoteStyle::None
546            };
547            let new_tqs = if table_quote_style.is_quoted() {
548                QuoteStyle::for_dialect(target)
549            } else {
550                QuoteStyle::None
551            };
552            Expr::Column {
553                table,
554                name,
555                quote_style: new_qs,
556                table_quote_style: new_tqs,
557            }
558        }
559        // Everything else stays the same
560        other => other,
561    }
562}
563
564// ═══════════════════════════════════════════════════════════════════════════
565// Typed function transformation with format string conversion
566// ═══════════════════════════════════════════════════════════════════════════
567
568/// Transform a TypedFunction, including date/time format string conversion.
569///
570/// For TimeToStr and StrToTime functions, this converts the format string
571/// from the source dialect's convention to the target dialect's convention.
572fn transform_typed_function(func: TypedFunction, target: Dialect) -> TypedFunction {
573    match func {
574        TypedFunction::TimeToStr { expr, format } => {
575            let transformed_expr = Box::new(transform_expr(*expr, target));
576            let transformed_format = transform_format_expr(*format, target);
577            TypedFunction::TimeToStr {
578                expr: transformed_expr,
579                format: Box::new(transformed_format),
580            }
581        }
582        TypedFunction::StrToTime { expr, format } => {
583            let transformed_expr = Box::new(transform_expr(*expr, target));
584            let transformed_format = transform_format_expr(*format, target);
585            TypedFunction::StrToTime {
586                expr: transformed_expr,
587                format: Box::new(transformed_format),
588            }
589        }
590        // For all other typed functions, just transform child expressions
591        other => other.transform_children(&|e| transform_expr(e, target)),
592    }
593}
594
595/// Transform a format string expression for the target dialect.
596///
597/// If the expression is a string literal, convert the format specifiers.
598/// Otherwise, just recursively transform child expressions.
599fn transform_format_expr(expr: Expr, target: Dialect) -> Expr {
600    // We need to know the source dialect to convert properly.
601    // Since we don't have access to the source dialect here, we use heuristics
602    // to detect the format style based on the format string content.
603    match &expr {
604        Expr::StringLiteral(s) | Expr::NationalStringLiteral(s) => {
605            let detected_source = detect_format_style(s);
606            let target_style = time::TimeFormatStyle::for_dialect(target);
607
608            // Only convert if styles differ
609            if detected_source != target_style {
610                let converted = time::format_time(s, detected_source, target_style);
611                match expr {
612                    Expr::NationalStringLiteral(_) => Expr::NationalStringLiteral(converted),
613                    _ => Expr::StringLiteral(converted),
614                }
615            } else {
616                expr
617            }
618        }
619        _ => transform_expr(expr, target),
620    }
621}
622
623/// Detect the format style from a format string based on its content.
624fn detect_format_style(format_str: &str) -> time::TimeFormatStyle {
625    // Check for style-specific patterns
626    if format_str.contains('%') {
627        // strftime-style format
628        if format_str.contains("%i") {
629            // MySQL uses %i for minutes
630            time::TimeFormatStyle::Mysql
631        } else {
632            // Generic strftime (SQLite, BigQuery, etc.)
633            time::TimeFormatStyle::Strftime
634        }
635    } else if format_str.contains("YYYY") || format_str.contains("yyyy") {
636        // Check for Java vs Postgres/Snowflake
637        if format_str.contains("HH24") || format_str.contains("MI") || format_str.contains("SS") {
638            // Postgres/Oracle style
639            time::TimeFormatStyle::Postgres
640        } else if format_str.contains("mm") && format_str.contains("ss") {
641            // Java style (lowercase seconds and minutes)
642            time::TimeFormatStyle::Java
643        } else if format_str.contains("FF") {
644            // Snowflake fractional seconds
645            time::TimeFormatStyle::Snowflake
646        } else if format_str.contains("MM") && format_str.contains("DD") {
647            // Could be Postgres or Snowflake - default to Postgres
648            time::TimeFormatStyle::Postgres
649        } else {
650            // Default to Java for ambiguous cases with lowercase patterns
651            time::TimeFormatStyle::Java
652        }
653    } else {
654        // Unknown format - default to strftime
655        time::TimeFormatStyle::Strftime
656    }
657}
658
659// ═══════════════════════════════════════════════════════════════════════════
660// Function name mapping
661// ═══════════════════════════════════════════════════════════════════════════
662
663/// Map function names between dialects.
664pub(crate) fn map_function_name(name: &str, target: Dialect) -> String {
665    let upper = name.to_uppercase();
666    match upper.as_str() {
667        // ── NOW / CURRENT_TIMESTAMP / GETDATE ────────────────────────────
668        "NOW" => {
669            if is_tsql_family(target) {
670                "GETDATE".to_string()
671            } else if matches!(
672                target,
673                Dialect::Ansi
674                    | Dialect::BigQuery
675                    | Dialect::Snowflake
676                    | Dialect::Oracle
677                    | Dialect::ClickHouse
678                    | Dialect::Exasol
679                    | Dialect::Teradata
680                    | Dialect::Druid
681                    | Dialect::Dremio
682                    | Dialect::Tableau
683            ) || is_presto_family(target)
684                || is_hive_family(target)
685            {
686                "CURRENT_TIMESTAMP".to_string()
687            } else {
688                // Postgres, MySQL, SQLite, DuckDB, Redshift, etc. – keep NOW
689                name.to_string()
690            }
691        }
692        "GETDATE" => {
693            if is_tsql_family(target) {
694                name.to_string()
695            } else if is_postgres_family(target)
696                || matches!(target, Dialect::Mysql | Dialect::DuckDb | Dialect::Sqlite)
697            {
698                "NOW".to_string()
699            } else {
700                "CURRENT_TIMESTAMP".to_string()
701            }
702        }
703
704        // ── LEN / LENGTH ─────────────────────────────────────────────────
705        "LEN" => {
706            if is_tsql_family(target) || matches!(target, Dialect::BigQuery | Dialect::Snowflake) {
707                name.to_string()
708            } else {
709                "LENGTH".to_string()
710            }
711        }
712        "LENGTH" if is_tsql_family(target) => "LEN".to_string(),
713
714        // ── SUBSTR / SUBSTRING ───────────────────────────────────────────
715        "SUBSTR" => {
716            if is_mysql_family(target)
717                || matches!(target, Dialect::Sqlite | Dialect::Oracle)
718                || is_hive_family(target)
719            {
720                "SUBSTR".to_string()
721            } else {
722                "SUBSTRING".to_string()
723            }
724        }
725        "SUBSTRING" => {
726            if is_mysql_family(target)
727                || matches!(target, Dialect::Sqlite | Dialect::Oracle)
728                || is_hive_family(target)
729            {
730                "SUBSTR".to_string()
731            } else {
732                name.to_string()
733            }
734        }
735
736        // ── IFNULL / COALESCE / ISNULL ───────────────────────────────────
737        "IFNULL" => {
738            if is_tsql_family(target) {
739                "ISNULL".to_string()
740            } else if is_mysql_family(target) || matches!(target, Dialect::Sqlite) {
741                // MySQL family + SQLite natively support IFNULL
742                name.to_string()
743            } else {
744                "COALESCE".to_string()
745            }
746        }
747        "ISNULL" => {
748            if is_tsql_family(target) {
749                name.to_string()
750            } else if is_mysql_family(target) || matches!(target, Dialect::Sqlite) {
751                "IFNULL".to_string()
752            } else {
753                "COALESCE".to_string()
754            }
755        }
756
757        // ── NVL → COALESCE (Oracle to others) ───────────────────────────
758        "NVL" => {
759            if matches!(target, Dialect::Oracle | Dialect::Snowflake) {
760                name.to_string()
761            } else if is_mysql_family(target) || matches!(target, Dialect::Sqlite) {
762                "IFNULL".to_string()
763            } else if is_tsql_family(target) {
764                "ISNULL".to_string()
765            } else {
766                "COALESCE".to_string()
767            }
768        }
769
770        // ── RANDOM / RAND ────────────────────────────────────────────────
771        "RANDOM" => {
772            if matches!(
773                target,
774                Dialect::Postgres | Dialect::Sqlite | Dialect::DuckDb
775            ) {
776                name.to_string()
777            } else {
778                "RAND".to_string()
779            }
780        }
781        "RAND" => {
782            if matches!(
783                target,
784                Dialect::Postgres | Dialect::Sqlite | Dialect::DuckDb
785            ) {
786                "RANDOM".to_string()
787            } else {
788                name.to_string()
789            }
790        }
791
792        // ── POSITION / CHARINDEX ─────────────────────────────────────────
793        "POSITION" if is_tsql_family(target) => "CHARINDEX".to_string(),
794        "CHARINDEX" if is_postgres_family(target) => "POSITION".to_string(),
795
796        // Everything else – preserve original name
797        _ => name.to_string(),
798    }
799}
800
801// ═══════════════════════════════════════════════════════════════════════════
802// Data-type mapping
803// ═══════════════════════════════════════════════════════════════════════════
804
805/// Map data types between dialects.
806pub(crate) fn map_data_type(dt: DataType, target: Dialect) -> DataType {
807    match (dt, target) {
808        // ── T-SQL type mappings ─────────────────────────────────────────
809        (DataType::Text, t) if is_tsql_family(t) => {
810            DataType::Varchar(None) // NVARCHAR(MAX) emitted by generator via Unknown
811        }
812        (DataType::Boolean, t) if is_tsql_family(t) => DataType::Bit(None),
813        (DataType::Bytea, t) if is_tsql_family(t) => DataType::Varbinary(None),
814        (DataType::Json, t) if is_tsql_family(t) => DataType::Varchar(None),
815        (DataType::Jsonb, t) if is_tsql_family(t) => DataType::Varchar(None),
816        (DataType::Uuid, t) if is_tsql_family(t) => {
817            DataType::Unknown("UNIQUEIDENTIFIER".to_string())
818        }
819        (DataType::Serial, t) if is_tsql_family(t) => DataType::Int,
820        (DataType::BigSerial, t) if is_tsql_family(t) => DataType::BigInt,
821        (DataType::SmallSerial, t) if is_tsql_family(t) => DataType::SmallInt,
822        (DataType::Timestamp { .. }, t) if is_tsql_family(t) => {
823            DataType::Unknown("DATETIME2".to_string())
824        }
825        (DataType::Real, t) if is_tsql_family(t) => DataType::Real,
826
827        // ── TEXT / STRING ────────────────────────────────────────────────
828        // TEXT → STRING for BigQuery, Hive, Spark, Databricks
829        (DataType::Text, t) if matches!(t, Dialect::BigQuery) || is_hive_family(t) => {
830            DataType::String
831        }
832        // STRING → TEXT for Postgres family, MySQL family, SQLite
833        (DataType::String, t)
834            if is_postgres_family(t) || is_mysql_family(t) || matches!(t, Dialect::Sqlite) =>
835        {
836            DataType::Text
837        }
838
839        // ── INT → BIGINT (BigQuery) ─────────────────────────────────────
840        (DataType::Int, Dialect::BigQuery) => DataType::BigInt,
841
842        // ── FLOAT → DOUBLE (BigQuery) ───────────────────────────────────
843        (DataType::Float, Dialect::BigQuery) => DataType::Double,
844
845        // ── BYTEA ↔ BLOB ────────────────────────────────────────────────
846        (DataType::Bytea, t)
847            if is_mysql_family(t)
848                || matches!(t, Dialect::Sqlite | Dialect::Oracle)
849                || is_hive_family(t) =>
850        {
851            DataType::Blob
852        }
853        (DataType::Blob, t) if is_postgres_family(t) => DataType::Bytea,
854
855        // ── BOOLEAN → BOOL ──────────────────────────────────────────────
856        (DataType::Boolean, Dialect::Mysql) => DataType::Boolean,
857
858        // Everything else is unchanged
859        (dt, _) => dt,
860    }
861}
862
863// ═══════════════════════════════════════════════════════════════════════════
864// LIMIT / TOP / FETCH FIRST transform
865// ═══════════════════════════════════════════════════════════════════════════
866
867/// Transform LIMIT / TOP / FETCH FIRST between dialects.
868///
869/// - T-SQL family:  `LIMIT n` → `TOP n` (OFFSET + FETCH handled separately)
870/// - Oracle:        `LIMIT n` → `FETCH FIRST n ROWS ONLY`
871/// - All others:    `TOP n` / `FETCH FIRST n` → `LIMIT n`
872fn transform_limit(sel: &mut SelectStatement, target: Dialect) {
873    if is_tsql_family(target) {
874        // Move LIMIT → TOP for T-SQL (only when there's no OFFSET)
875        if let Some(limit) = sel.limit.take() {
876            if sel.offset.is_none() {
877                sel.top = Some(Box::new(limit));
878            } else {
879                // T-SQL with OFFSET uses OFFSET n ROWS FETCH NEXT m ROWS ONLY
880                sel.fetch_first = Some(limit);
881                // T-SQL OFFSET/FETCH requires ORDER BY. Add ORDER BY (SELECT NULL) if absent.
882                if sel.order_by.is_empty() {
883                    sel.order_by = vec![OrderByItem {
884                        expr: Expr::Subquery(Box::new(Statement::Select(SelectStatement {
885                            comments: Vec::new(),
886                            ctes: Vec::new(),
887                            distinct: false,
888                            top: None,
889                            columns: vec![SelectItem::Expr {
890                                expr: Expr::Null,
891                                alias: None,
892                                alias_quote_style: QuoteStyle::None,
893                            }],
894                            from: None,
895                            joins: Vec::new(),
896                            where_clause: None,
897                            group_by: Vec::new(),
898                            having: None,
899                            order_by: Vec::new(),
900                            limit: None,
901                            offset: None,
902                            fetch_first: None,
903                            qualify: None,
904                            window_definitions: Vec::new(),
905                        }))),
906                        ascending: true,
907                        nulls_first: None,
908                    }];
909                }
910            }
911        }
912        // Also move fetch_first → top when no offset
913        if sel.offset.is_none() {
914            if let Some(fetch) = sel.fetch_first.take() {
915                sel.top = Some(Box::new(fetch));
916            }
917        }
918    } else if matches!(target, Dialect::Oracle) {
919        // Oracle prefers FETCH FIRST n ROWS ONLY (SQL:2008 syntax)
920        if let Some(limit) = sel.limit.take() {
921            sel.fetch_first = Some(limit);
922        }
923        if let Some(top) = sel.top.take() {
924            sel.fetch_first = Some(*top);
925        }
926    } else {
927        // All other dialects: normalize to LIMIT
928        if let Some(top) = sel.top.take() {
929            if sel.limit.is_none() {
930                sel.limit = Some(*top);
931            }
932        }
933        if let Some(fetch) = sel.fetch_first.take() {
934            if sel.limit.is_none() {
935                sel.limit = Some(fetch);
936            }
937        }
938    }
939}
940
941// ═══════════════════════════════════════════════════════════════════════════
942// Quoted-identifier transform
943// ═══════════════════════════════════════════════════════════════════════════
944
945/// Convert any quoted identifiers in expressions to the target dialect's
946/// quoting convention.
947fn transform_quotes(expr: Expr, target: Dialect) -> Expr {
948    match expr {
949        Expr::Column {
950            table,
951            name,
952            quote_style,
953            table_quote_style,
954        } => {
955            let new_qs = if quote_style.is_quoted() {
956                QuoteStyle::for_dialect(target)
957            } else {
958                QuoteStyle::None
959            };
960            let new_tqs = if table_quote_style.is_quoted() {
961                QuoteStyle::for_dialect(target)
962            } else {
963                QuoteStyle::None
964            };
965            Expr::Column {
966                table,
967                name,
968                quote_style: new_qs,
969                table_quote_style: new_tqs,
970            }
971        }
972        // Recurse into sub-expressions
973        Expr::BinaryOp { left, op, right } => Expr::BinaryOp {
974            left: Box::new(transform_quotes(*left, target)),
975            op,
976            right: Box::new(transform_quotes(*right, target)),
977        },
978        Expr::UnaryOp { op, expr } => Expr::UnaryOp {
979            op,
980            expr: Box::new(transform_quotes(*expr, target)),
981        },
982        Expr::Function {
983            name,
984            args,
985            distinct,
986            filter,
987            over,
988        } => Expr::Function {
989            name,
990            args: args
991                .into_iter()
992                .map(|a| transform_quotes(a, target))
993                .collect(),
994            distinct,
995            filter: filter.map(|f| Box::new(transform_quotes(*f, target))),
996            over,
997        },
998        Expr::TypedFunction { func, filter, over } => Expr::TypedFunction {
999            func: func.transform_children(&|e| transform_quotes(e, target)),
1000            filter: filter.map(|f| Box::new(transform_quotes(*f, target))),
1001            over,
1002        },
1003        Expr::Nested(inner) => Expr::Nested(Box::new(transform_quotes(*inner, target))),
1004        Expr::Alias { expr, name } => Expr::Alias {
1005            expr: Box::new(transform_quotes(*expr, target)),
1006            name,
1007        },
1008        other => other,
1009    }
1010}
1011
1012/// Transform quoting for all identifier-bearing nodes inside a SELECT.
1013fn transform_quotes_in_select(sel: &mut SelectStatement, target: Dialect) {
1014    // Columns in the select list
1015    for item in &mut sel.columns {
1016        if let SelectItem::Expr { expr, .. } = item {
1017            *expr = transform_quotes(expr.clone(), target);
1018        }
1019    }
1020    // WHERE
1021    if let Some(wh) = &mut sel.where_clause {
1022        *wh = transform_quotes(wh.clone(), target);
1023    }
1024    // GROUP BY
1025    for gb in &mut sel.group_by {
1026        *gb = transform_quotes(gb.clone(), target);
1027    }
1028    // HAVING
1029    if let Some(having) = &mut sel.having {
1030        *having = transform_quotes(having.clone(), target);
1031    }
1032    // ORDER BY
1033    for ob in &mut sel.order_by {
1034        ob.expr = transform_quotes(ob.expr.clone(), target);
1035    }
1036    // Table refs (FROM, JOINs)
1037    if let Some(from) = &mut sel.from {
1038        transform_quotes_in_table_source(&mut from.source, target);
1039    }
1040    for join in &mut sel.joins {
1041        transform_quotes_in_table_source(&mut join.table, target);
1042        if let Some(on) = &mut join.on {
1043            *on = transform_quotes(on.clone(), target);
1044        }
1045    }
1046}
1047
1048fn transform_quotes_in_table_source(source: &mut TableSource, target: Dialect) {
1049    match source {
1050        TableSource::Table(tref) => {
1051            if tref.name_quote_style.is_quoted() {
1052                tref.name_quote_style = QuoteStyle::for_dialect(target);
1053            }
1054        }
1055        TableSource::Subquery { .. } => {}
1056        TableSource::TableFunction { .. } => {}
1057        TableSource::Lateral { source } => transform_quotes_in_table_source(source, target),
1058        TableSource::Pivot { source, .. } | TableSource::Unpivot { source, .. } => {
1059            transform_quotes_in_table_source(source, target);
1060        }
1061        TableSource::Unnest { .. } => {}
1062    }
1063}
1064
1065// ═══════════════════════════════════════════════════════════════════════════
1066// Concat operator transform (Change 3: || → CONCAT() for T-SQL)
1067// ═══════════════════════════════════════════════════════════════════════════
1068
1069/// Collect all operands from a chain of `||` (Concat) operations into a flat list.
1070fn collect_concat_args(expr: &Expr, args: &mut Vec<Expr>) {
1071    match expr {
1072        Expr::BinaryOp {
1073            left,
1074            op: BinaryOperator::Concat,
1075            right,
1076        } => {
1077            collect_concat_args(left, args);
1078            collect_concat_args(right, args);
1079        }
1080        other => args.push(other.clone()),
1081    }
1082}
1083
1084// ═══════════════════════════════════════════════════════════════════════════
1085// Interval arithmetic transform (Change 6: expr ± INTERVAL → DATEADD())
1086// ═══════════════════════════════════════════════════════════════════════════
1087
1088/// Try to transform `expr ± INTERVAL 'n unit'` into `DATEADD(unit, ±n, expr)` for T-SQL.
1089/// Returns `Some(transformed_expr)` if the right side is an interval, `None` otherwise.
1090fn try_transform_interval_arithmetic(
1091    left: &Expr,
1092    op: &BinaryOperator,
1093    right: &Expr,
1094) -> Option<Expr> {
1095    // Check right side is an interval
1096    if let Expr::Interval { value, unit } = right {
1097        if let Some((count, unit_name)) = parse_interval_value(value, unit) {
1098            let final_count = if matches!(op, BinaryOperator::Minus) {
1099                -count
1100            } else {
1101                count
1102            };
1103            return Some(Expr::Function {
1104                name: "DATEADD".to_string(),
1105                args: vec![
1106                    // Use a Column expr for the datepart keyword (unquoted identifier)
1107                    Expr::Column {
1108                        table: None,
1109                        name: unit_name,
1110                        quote_style: QuoteStyle::None,
1111                        table_quote_style: QuoteStyle::None,
1112                    },
1113                    Expr::Number(final_count.to_string()),
1114                    left.clone(),
1115                ],
1116                distinct: false,
1117                filter: None,
1118                over: None,
1119            });
1120        }
1121    }
1122
1123    // Check left side is an interval (less common: INTERVAL '7 days' + col)
1124    if let Expr::Interval { value, unit } = left {
1125        if matches!(op, BinaryOperator::Plus) {
1126            if let Some((count, unit_name)) = parse_interval_value(value, unit) {
1127                return Some(Expr::Function {
1128                    name: "DATEADD".to_string(),
1129                    args: vec![
1130                        Expr::Column {
1131                            table: None,
1132                            name: unit_name,
1133                            quote_style: QuoteStyle::None,
1134                            table_quote_style: QuoteStyle::None,
1135                        },
1136                        Expr::Number(count.to_string()),
1137                        right.clone(),
1138                    ],
1139                    distinct: false,
1140                    filter: None,
1141                    over: None,
1142                });
1143            }
1144        }
1145    }
1146
1147    None
1148}
1149
1150/// Parse an interval value expression and optional unit into (count, T-SQL datepart name).
1151fn parse_interval_value(value: &Expr, unit: &Option<DateTimeField>) -> Option<(i64, String)> {
1152    // Case 1: INTERVAL '7 days' (value is a string literal containing "7 days")
1153    if let Expr::StringLiteral(s) = value {
1154        let parts: Vec<&str> = s.trim().split_whitespace().collect();
1155        if parts.len() == 2 {
1156            let count: i64 = parts[0].parse().ok()?;
1157            let unit_name = normalize_interval_unit(parts[1])?;
1158            return Some((count, unit_name));
1159        }
1160        if parts.len() == 1 {
1161            // Just a number in the string, unit must come from the `unit` field
1162            let count: i64 = parts[0].parse().ok()?;
1163            if let Some(u) = unit {
1164                let unit_name = datetime_field_to_tsql(u)?;
1165                return Some((count, unit_name));
1166            }
1167        }
1168    }
1169
1170    // Case 2: INTERVAL 7 DAY (value is a number, unit is DateTimeField)
1171    if let Expr::Number(n) = value {
1172        let count: i64 = n.parse().ok()?;
1173        if let Some(u) = unit {
1174            let unit_name = datetime_field_to_tsql(u)?;
1175            return Some((count, unit_name));
1176        }
1177    }
1178
1179    None
1180}
1181
1182/// Normalize an interval unit string to a T-SQL DATEADD part name.
1183fn normalize_interval_unit(unit: &str) -> Option<String> {
1184    let lower = unit.to_lowercase();
1185    let normalized = lower.trim_end_matches('s');
1186    match normalized {
1187        "year" => Some("YEAR".to_string()),
1188        "month" => Some("MONTH".to_string()),
1189        "week" => Some("WEEK".to_string()),
1190        "day" => Some("DAY".to_string()),
1191        "hour" => Some("HOUR".to_string()),
1192        "minute" => Some("MINUTE".to_string()),
1193        "second" => Some("SECOND".to_string()),
1194        "millisecond" => Some("MILLISECOND".to_string()),
1195        "microsecond" => Some("MICROSECOND".to_string()),
1196        _ => None,
1197    }
1198}
1199
1200/// Convert a DateTimeField to T-SQL DATEADD unit name.
1201fn datetime_field_to_tsql(field: &DateTimeField) -> Option<String> {
1202    match field {
1203        DateTimeField::Year => Some("YEAR".to_string()),
1204        DateTimeField::Quarter => Some("QUARTER".to_string()),
1205        DateTimeField::Month => Some("MONTH".to_string()),
1206        DateTimeField::Week => Some("WEEK".to_string()),
1207        DateTimeField::Day => Some("DAY".to_string()),
1208        DateTimeField::Hour => Some("HOUR".to_string()),
1209        DateTimeField::Minute => Some("MINUTE".to_string()),
1210        DateTimeField::Second => Some("SECOND".to_string()),
1211        DateTimeField::Millisecond => Some("MILLISECOND".to_string()),
1212        DateTimeField::Microsecond => Some("MICROSECOND".to_string()),
1213        _ => None,
1214    }
1215}
1216
1217// ═══════════════════════════════════════════════════════════════════════════
1218// SIMILAR TO → LIKE pattern simplification (Change 9)
1219// ═══════════════════════════════════════════════════════════════════════════
1220
1221/// Simplify a SIMILAR TO pattern for use with LIKE.
1222/// Strips regex features (|, (), +, *) that T-SQL LIKE doesn't support.
1223fn simplify_similar_to_pattern(pattern: &Expr) -> Expr {
1224    if let Expr::StringLiteral(s) = pattern {
1225        let simplified = s.replace('|', "%").replace('(', "").replace(')', "");
1226        Expr::StringLiteral(simplified)
1227    } else {
1228        pattern.clone()
1229    }
1230}