Skip to main content

sqlglot_rust/dialects/
mod.rs

1use serde::{Deserialize, Serialize};
2
3use crate::ast::*;
4
5pub mod plugin;
6pub mod time;
7
8/// Supported SQL dialects.
9///
10/// Mirrors the full set of dialects supported by Python's sqlglot library.
11/// Dialects are grouped into **Official** (core, higher-priority maintenance)
12/// and **Community** (contributed, fully functional) tiers.
13#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
14pub enum Dialect {
15    // ── Core / base ──────────────────────────────────────────────────────
16    /// ANSI SQL standard (default / base dialect)
17    Ansi,
18
19    // ── Official dialects ────────────────────────────────────────────────
20    /// AWS Athena (Presto-based)
21    Athena,
22    /// Google BigQuery
23    BigQuery,
24    /// ClickHouse
25    ClickHouse,
26    /// Databricks (Spark-based)
27    Databricks,
28    /// DuckDB
29    DuckDb,
30    /// Apache Hive
31    Hive,
32    /// MySQL
33    Mysql,
34    /// Oracle Database
35    Oracle,
36    /// PostgreSQL
37    Postgres,
38    /// Presto
39    Presto,
40    /// Amazon Redshift (Postgres-based)
41    Redshift,
42    /// Snowflake
43    Snowflake,
44    /// Apache Spark SQL
45    Spark,
46    /// SQLite
47    Sqlite,
48    /// StarRocks (MySQL-compatible)
49    StarRocks,
50    /// Trino (Presto successor)
51    Trino,
52    /// Microsoft SQL Server (T-SQL)
53    Tsql,
54
55    // ── Community dialects ───────────────────────────────────────────────
56    /// Apache Doris (MySQL-compatible)
57    Doris,
58    /// Dremio
59    Dremio,
60    /// Apache Drill
61    Drill,
62    /// Apache Druid
63    Druid,
64    /// Exasol
65    Exasol,
66    /// Microsoft Fabric (T-SQL variant)
67    Fabric,
68    /// Materialize (Postgres-compatible)
69    Materialize,
70    /// PRQL (Pipelined Relational Query Language)
71    Prql,
72    /// RisingWave (Postgres-compatible)
73    RisingWave,
74    /// SingleStore (MySQL-compatible)
75    SingleStore,
76    /// Tableau
77    Tableau,
78    /// Teradata
79    Teradata,
80}
81
82impl std::fmt::Display for Dialect {
83    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
84        match self {
85            Dialect::Ansi => write!(f, "ANSI SQL"),
86            Dialect::Athena => write!(f, "Athena"),
87            Dialect::BigQuery => write!(f, "BigQuery"),
88            Dialect::ClickHouse => write!(f, "ClickHouse"),
89            Dialect::Databricks => write!(f, "Databricks"),
90            Dialect::DuckDb => write!(f, "DuckDB"),
91            Dialect::Hive => write!(f, "Hive"),
92            Dialect::Mysql => write!(f, "MySQL"),
93            Dialect::Oracle => write!(f, "Oracle"),
94            Dialect::Postgres => write!(f, "PostgreSQL"),
95            Dialect::Presto => write!(f, "Presto"),
96            Dialect::Redshift => write!(f, "Redshift"),
97            Dialect::Snowflake => write!(f, "Snowflake"),
98            Dialect::Spark => write!(f, "Spark"),
99            Dialect::Sqlite => write!(f, "SQLite"),
100            Dialect::StarRocks => write!(f, "StarRocks"),
101            Dialect::Trino => write!(f, "Trino"),
102            Dialect::Tsql => write!(f, "T-SQL"),
103            Dialect::Doris => write!(f, "Doris"),
104            Dialect::Dremio => write!(f, "Dremio"),
105            Dialect::Drill => write!(f, "Drill"),
106            Dialect::Druid => write!(f, "Druid"),
107            Dialect::Exasol => write!(f, "Exasol"),
108            Dialect::Fabric => write!(f, "Fabric"),
109            Dialect::Materialize => write!(f, "Materialize"),
110            Dialect::Prql => write!(f, "PRQL"),
111            Dialect::RisingWave => write!(f, "RisingWave"),
112            Dialect::SingleStore => write!(f, "SingleStore"),
113            Dialect::Tableau => write!(f, "Tableau"),
114            Dialect::Teradata => write!(f, "Teradata"),
115        }
116    }
117}
118
119impl Dialect {
120    /// Returns the support tier for this dialect.
121    #[must_use]
122    pub fn support_level(&self) -> &'static str {
123        match self {
124            Dialect::Ansi
125            | Dialect::Athena
126            | Dialect::BigQuery
127            | Dialect::ClickHouse
128            | Dialect::Databricks
129            | Dialect::DuckDb
130            | Dialect::Hive
131            | Dialect::Mysql
132            | Dialect::Oracle
133            | Dialect::Postgres
134            | Dialect::Presto
135            | Dialect::Redshift
136            | Dialect::Snowflake
137            | Dialect::Spark
138            | Dialect::Sqlite
139            | Dialect::StarRocks
140            | Dialect::Trino
141            | Dialect::Tsql => "Official",
142
143            Dialect::Doris
144            | Dialect::Dremio
145            | Dialect::Drill
146            | Dialect::Druid
147            | Dialect::Exasol
148            | Dialect::Fabric
149            | Dialect::Materialize
150            | Dialect::Prql
151            | Dialect::RisingWave
152            | Dialect::SingleStore
153            | Dialect::Tableau
154            | Dialect::Teradata => "Community",
155        }
156    }
157
158    /// Returns all dialect variants.
159    #[must_use]
160    pub fn all() -> &'static [Dialect] {
161        &[
162            Dialect::Ansi,
163            Dialect::Athena,
164            Dialect::BigQuery,
165            Dialect::ClickHouse,
166            Dialect::Databricks,
167            Dialect::Doris,
168            Dialect::Dremio,
169            Dialect::Drill,
170            Dialect::Druid,
171            Dialect::DuckDb,
172            Dialect::Exasol,
173            Dialect::Fabric,
174            Dialect::Hive,
175            Dialect::Materialize,
176            Dialect::Mysql,
177            Dialect::Oracle,
178            Dialect::Postgres,
179            Dialect::Presto,
180            Dialect::Prql,
181            Dialect::Redshift,
182            Dialect::RisingWave,
183            Dialect::SingleStore,
184            Dialect::Snowflake,
185            Dialect::Spark,
186            Dialect::Sqlite,
187            Dialect::StarRocks,
188            Dialect::Tableau,
189            Dialect::Teradata,
190            Dialect::Trino,
191            Dialect::Tsql,
192        ]
193    }
194
195    /// Parse a dialect name (case-insensitive) into a `Dialect`.
196    pub fn from_str(s: &str) -> Option<Dialect> {
197        match s.to_lowercase().as_str() {
198            "" | "ansi" => Some(Dialect::Ansi),
199            "athena" => Some(Dialect::Athena),
200            "bigquery" => Some(Dialect::BigQuery),
201            "clickhouse" => Some(Dialect::ClickHouse),
202            "databricks" => Some(Dialect::Databricks),
203            "doris" => Some(Dialect::Doris),
204            "dremio" => Some(Dialect::Dremio),
205            "drill" => Some(Dialect::Drill),
206            "druid" => Some(Dialect::Druid),
207            "duckdb" => Some(Dialect::DuckDb),
208            "exasol" => Some(Dialect::Exasol),
209            "fabric" => Some(Dialect::Fabric),
210            "hive" => Some(Dialect::Hive),
211            "materialize" => Some(Dialect::Materialize),
212            "mysql" => Some(Dialect::Mysql),
213            "oracle" => Some(Dialect::Oracle),
214            "postgres" | "postgresql" => Some(Dialect::Postgres),
215            "presto" => Some(Dialect::Presto),
216            "prql" => Some(Dialect::Prql),
217            "redshift" => Some(Dialect::Redshift),
218            "risingwave" => Some(Dialect::RisingWave),
219            "singlestore" => Some(Dialect::SingleStore),
220            "snowflake" => Some(Dialect::Snowflake),
221            "spark" => Some(Dialect::Spark),
222            "sqlite" => Some(Dialect::Sqlite),
223            "starrocks" => Some(Dialect::StarRocks),
224            "tableau" => Some(Dialect::Tableau),
225            "teradata" => Some(Dialect::Teradata),
226            "trino" => Some(Dialect::Trino),
227            "tsql" | "mssql" | "sqlserver" => Some(Dialect::Tsql),
228            _ => None,
229        }
230    }
231}
232
233// ═══════════════════════════════════════════════════════════════════════════
234// Dialect families — helpers for grouping similar dialects
235// ═══════════════════════════════════════════════════════════════════════════
236
237/// Dialects in the MySQL family (use SUBSTR, IFNULL, similar type system).
238fn is_mysql_family(d: Dialect) -> bool {
239    matches!(
240        d,
241        Dialect::Mysql | Dialect::Doris | Dialect::SingleStore | Dialect::StarRocks
242    )
243}
244
245/// Dialects in the Postgres family (support ILIKE, BYTEA, SUBSTRING).
246fn is_postgres_family(d: Dialect) -> bool {
247    matches!(
248        d,
249        Dialect::Postgres | Dialect::Redshift | Dialect::Materialize | Dialect::RisingWave
250    )
251}
252
253/// Dialects in the Presto family (ANSI-like, VARCHAR oriented).
254fn is_presto_family(d: Dialect) -> bool {
255    matches!(d, Dialect::Presto | Dialect::Trino | Dialect::Athena)
256}
257
258/// Dialects in the Hive/Spark family (use STRING type, SUBSTR).
259fn is_hive_family(d: Dialect) -> bool {
260    matches!(d, Dialect::Hive | Dialect::Spark | Dialect::Databricks)
261}
262
263/// Dialects in the T-SQL family.
264fn is_tsql_family(d: Dialect) -> bool {
265    matches!(d, Dialect::Tsql | Dialect::Fabric)
266}
267
268/// Dialects that natively support ILIKE.
269pub(crate) fn supports_ilike_builtin(d: Dialect) -> bool {
270    matches!(
271        d,
272        Dialect::Postgres
273            | Dialect::Redshift
274            | Dialect::Materialize
275            | Dialect::RisingWave
276            | Dialect::DuckDb
277            | Dialect::Snowflake
278            | Dialect::ClickHouse
279            | Dialect::Trino
280            | Dialect::Presto
281            | Dialect::Athena
282            | Dialect::Databricks
283            | Dialect::Spark
284            | Dialect::Hive
285            | Dialect::StarRocks
286            | Dialect::Exasol
287            | Dialect::Druid
288            | Dialect::Dremio
289    )
290}
291
292// ═══════════════════════════════════════════════════════════════════════════
293// Statement / expression transforms
294// ═══════════════════════════════════════════════════════════════════════════
295
296/// Transform a statement from one dialect to another.
297///
298/// This applies dialect-specific rewrite rules such as:
299/// - Type mapping (e.g., `TEXT` → `STRING` for BigQuery)
300/// - Function name mapping (e.g., `NOW()` → `CURRENT_TIMESTAMP()`)
301/// - ILIKE → LIKE with LOWER() wrapping for dialects that don't support ILIKE
302#[must_use]
303pub fn transform(statement: &Statement, from: Dialect, to: Dialect) -> Statement {
304    if from == to {
305        return statement.clone();
306    }
307    let mut stmt = statement.clone();
308    transform_statement(&mut stmt, to);
309    stmt
310}
311
312fn transform_statement(statement: &mut Statement, target: Dialect) {
313    match statement {
314        Statement::Select(sel) => {
315            // Transform LIMIT / TOP / FETCH FIRST for the target dialect
316            transform_limit(sel, target);
317            // Transform identifier quoting for the target dialect
318            transform_quotes_in_select(sel, target);
319
320            for item in &mut sel.columns {
321                if let SelectItem::Expr { expr, .. } = item {
322                    *expr = transform_expr(expr.clone(), target);
323                }
324            }
325            if let Some(wh) = &mut sel.where_clause {
326                *wh = transform_expr(wh.clone(), target);
327            }
328            for gb in &mut sel.group_by {
329                *gb = transform_expr(gb.clone(), target);
330            }
331            if let Some(having) = &mut sel.having {
332                *having = transform_expr(having.clone(), target);
333            }
334        }
335        Statement::Insert(ins) => {
336            if let InsertSource::Values(rows) = &mut ins.source {
337                for row in rows {
338                    for val in row {
339                        *val = transform_expr(val.clone(), target);
340                    }
341                }
342            }
343            // Transform RETURNING expressions
344            for item in &mut ins.returning {
345                if let SelectItem::Expr { expr, .. } = item {
346                    *expr = transform_expr(expr.clone(), target);
347                }
348            }
349        }
350        Statement::Update(upd) => {
351            for (_, val) in &mut upd.assignments {
352                *val = transform_expr(val.clone(), target);
353            }
354            if let Some(wh) = &mut upd.where_clause {
355                *wh = transform_expr(wh.clone(), target);
356            }
357            // Transform RETURNING expressions
358            for item in &mut upd.returning {
359                if let SelectItem::Expr { expr, .. } = item {
360                    *expr = transform_expr(expr.clone(), target);
361                }
362            }
363        }
364        Statement::Delete(del) => {
365            if let Some(wh) = &mut del.where_clause {
366                *wh = transform_expr(wh.clone(), target);
367            }
368            // Transform RETURNING expressions
369            for item in &mut del.returning {
370                if let SelectItem::Expr { expr, .. } = item {
371                    *expr = transform_expr(expr.clone(), target);
372                }
373            }
374        }
375        // DDL: map data types in CREATE TABLE column definitions
376        Statement::CreateTable(ct) => {
377            for col in &mut ct.columns {
378                col.data_type = map_data_type(col.data_type.clone(), target);
379                if let Some(default) = &mut col.default {
380                    *default = transform_expr(default.clone(), target);
381                }
382            }
383            // Transform constraints (CHECK expressions)
384            for constraint in &mut ct.constraints {
385                if let TableConstraint::Check { expr, .. } = constraint {
386                    *expr = transform_expr(expr.clone(), target);
387                }
388            }
389            // Transform AS SELECT subquery
390            if let Some(as_select) = &mut ct.as_select {
391                transform_statement(as_select, target);
392            }
393        }
394        // DDL: map data types in ALTER TABLE ADD COLUMN
395        Statement::AlterTable(alt) => {
396            for action in &mut alt.actions {
397                match action {
398                    AlterTableAction::AddColumn(col) => {
399                        col.data_type = map_data_type(col.data_type.clone(), target);
400                        if let Some(default) = &mut col.default {
401                            *default = transform_expr(default.clone(), target);
402                        }
403                    }
404                    AlterTableAction::AlterColumnType { data_type, .. } => {
405                        *data_type = map_data_type(data_type.clone(), target);
406                    }
407                    _ => {}
408                }
409            }
410        }
411        _ => {}
412    }
413}
414
415/// Transform an expression for the target dialect.
416fn transform_expr(expr: Expr, target: Dialect) -> Expr {
417    match expr {
418        // Map function names across dialects
419        Expr::Function {
420            name,
421            args,
422            distinct,
423            filter,
424            over,
425        } => {
426            let new_name = map_function_name(&name, target);
427            let new_args: Vec<Expr> = args
428                .into_iter()
429                .map(|a| transform_expr(a, target))
430                .collect();
431            Expr::Function {
432                name: new_name,
433                args: new_args,
434                distinct,
435                filter: filter.map(|f| Box::new(transform_expr(*f, target))),
436                over,
437            }
438        }
439        // Recurse into typed function child expressions, with special handling
440        // for date/time formatting functions that need format string conversion
441        Expr::TypedFunction { func, filter, over } => {
442            let transformed_func = transform_typed_function(func, target);
443            Expr::TypedFunction {
444                func: transformed_func,
445                filter: filter.map(|f| Box::new(transform_expr(*f, target))),
446                over,
447            }
448        }
449        // ILIKE → LOWER(expr) LIKE LOWER(pattern) for non-supporting dialects
450        Expr::ILike {
451            expr,
452            pattern,
453            negated,
454            escape,
455        } if !supports_ilike_builtin(target) => Expr::Like {
456            expr: Box::new(Expr::TypedFunction {
457                func: TypedFunction::Lower {
458                    expr: Box::new(transform_expr(*expr, target)),
459                },
460                filter: None,
461                over: None,
462            }),
463            pattern: Box::new(Expr::TypedFunction {
464                func: TypedFunction::Lower {
465                    expr: Box::new(transform_expr(*pattern, target)),
466                },
467                filter: None,
468                over: None,
469            }),
470            negated,
471            escape,
472        },
473        // SIMILAR TO → LIKE for T-SQL (lossy: regex features dropped)
474        Expr::SimilarTo {
475            expr,
476            pattern,
477            negated,
478            escape,
479        } if is_tsql_family(target) => {
480            let transformed_pattern = transform_expr(*pattern, target);
481            let simplified = simplify_similar_to_pattern(&transformed_pattern);
482            Expr::Like {
483                expr: Box::new(transform_expr(*expr, target)),
484                pattern: Box::new(simplified),
485                negated,
486                escape,
487            }
488        }
489        // Map data types in CAST
490        Expr::Cast { expr, data_type } => Expr::Cast {
491            expr: Box::new(transform_expr(*expr, target)),
492            data_type: map_data_type(data_type, target),
493        },
494        // Recurse into binary ops, with T-SQL specific transforms
495        Expr::BinaryOp { left, op, right } => {
496            // Change 3: || → CONCAT() for T-SQL
497            // Collect args BEFORE recursive transform to flatten the full chain
498            if op == BinaryOperator::Concat && is_tsql_family(target) {
499                let mut args = Vec::new();
500                collect_concat_args(
501                    &Expr::BinaryOp {
502                        left,
503                        op: BinaryOperator::Concat,
504                        right,
505                    },
506                    &mut args,
507                );
508                // Now transform each collected arg
509                let args = args
510                    .into_iter()
511                    .map(|a| transform_expr(a, target))
512                    .collect();
513                return Expr::Function {
514                    name: "CONCAT".to_string(),
515                    args,
516                    distinct: false,
517                    filter: None,
518                    over: None,
519                };
520            }
521
522            let left_transformed = transform_expr(*left, target);
523            let right_transformed = transform_expr(*right, target);
524
525            // Change 6: expr ± INTERVAL → DATEADD() for T-SQL
526            if is_tsql_family(target) && matches!(op, BinaryOperator::Plus | BinaryOperator::Minus)
527            {
528                if let Some(dateadd) =
529                    try_transform_interval_arithmetic(&left_transformed, &op, &right_transformed)
530                {
531                    return dateadd;
532                }
533            }
534
535            Expr::BinaryOp {
536                left: Box::new(left_transformed),
537                op,
538                right: Box::new(right_transformed),
539            }
540        }
541        Expr::UnaryOp { op, expr } => Expr::UnaryOp {
542            op,
543            expr: Box::new(transform_expr(*expr, target)),
544        },
545        Expr::Nested(inner) => Expr::Nested(Box::new(transform_expr(*inner, target))),
546        // Transform quoting on column references
547        Expr::Column {
548            table,
549            name,
550            quote_style,
551            table_quote_style,
552        } => {
553            let new_qs = if quote_style.is_quoted() {
554                QuoteStyle::for_dialect(target)
555            } else {
556                QuoteStyle::None
557            };
558            let new_tqs = if table_quote_style.is_quoted() {
559                QuoteStyle::for_dialect(target)
560            } else {
561                QuoteStyle::None
562            };
563            Expr::Column {
564                table,
565                name,
566                quote_style: new_qs,
567                table_quote_style: new_tqs,
568            }
569        }
570        // Everything else stays the same
571        other => other,
572    }
573}
574
575// ═══════════════════════════════════════════════════════════════════════════
576// Typed function transformation with format string conversion
577// ═══════════════════════════════════════════════════════════════════════════
578
579/// Transform a TypedFunction, including date/time format string conversion.
580///
581/// For TimeToStr and StrToTime functions, this converts the format string
582/// from the source dialect's convention to the target dialect's convention.
583fn transform_typed_function(func: TypedFunction, target: Dialect) -> TypedFunction {
584    match func {
585        TypedFunction::TimeToStr { expr, format } => {
586            let transformed_expr = Box::new(transform_expr(*expr, target));
587            let transformed_format = transform_format_expr(*format, target);
588            TypedFunction::TimeToStr {
589                expr: transformed_expr,
590                format: Box::new(transformed_format),
591            }
592        }
593        TypedFunction::StrToTime { expr, format } => {
594            let transformed_expr = Box::new(transform_expr(*expr, target));
595            let transformed_format = transform_format_expr(*format, target);
596            TypedFunction::StrToTime {
597                expr: transformed_expr,
598                format: Box::new(transformed_format),
599            }
600        }
601        // For all other typed functions, just transform child expressions
602        other => other.transform_children(&|e| transform_expr(e, target)),
603    }
604}
605
606/// Transform a format string expression for the target dialect.
607///
608/// If the expression is a string literal, convert the format specifiers.
609/// Otherwise, just recursively transform child expressions.
610fn transform_format_expr(expr: Expr, target: Dialect) -> Expr {
611    // We need to know the source dialect to convert properly.
612    // Since we don't have access to the source dialect here, we use heuristics
613    // to detect the format style based on the format string content.
614    match &expr {
615        Expr::StringLiteral(s) | Expr::NationalStringLiteral(s) => {
616            let detected_source = detect_format_style(s);
617            let target_style = time::TimeFormatStyle::for_dialect(target);
618
619            // Only convert if styles differ
620            if detected_source != target_style {
621                let converted = time::format_time(s, detected_source, target_style);
622                match expr {
623                    Expr::NationalStringLiteral(_) => Expr::NationalStringLiteral(converted),
624                    _ => Expr::StringLiteral(converted),
625                }
626            } else {
627                expr
628            }
629        }
630        _ => transform_expr(expr, target),
631    }
632}
633
634/// Detect the format style from a format string based on its content.
635fn detect_format_style(format_str: &str) -> time::TimeFormatStyle {
636    // Check for style-specific patterns
637    if format_str.contains('%') {
638        // strftime-style format
639        if format_str.contains("%i") {
640            // MySQL uses %i for minutes
641            time::TimeFormatStyle::Mysql
642        } else {
643            // Generic strftime (SQLite, BigQuery, etc.)
644            time::TimeFormatStyle::Strftime
645        }
646    } else if format_str.contains("YYYY") || format_str.contains("yyyy") {
647        // Check for Java vs Postgres/Snowflake
648        if format_str.contains("HH24") || format_str.contains("MI") || format_str.contains("SS") {
649            // Postgres/Oracle style
650            time::TimeFormatStyle::Postgres
651        } else if format_str.contains("mm") && format_str.contains("ss") {
652            // Java style (lowercase seconds and minutes)
653            time::TimeFormatStyle::Java
654        } else if format_str.contains("FF") {
655            // Snowflake fractional seconds
656            time::TimeFormatStyle::Snowflake
657        } else if format_str.contains("MM") && format_str.contains("DD") {
658            // Could be Postgres or Snowflake - default to Postgres
659            time::TimeFormatStyle::Postgres
660        } else {
661            // Default to Java for ambiguous cases with lowercase patterns
662            time::TimeFormatStyle::Java
663        }
664    } else {
665        // Unknown format - default to strftime
666        time::TimeFormatStyle::Strftime
667    }
668}
669
670// ═══════════════════════════════════════════════════════════════════════════
671// Function name mapping
672// ═══════════════════════════════════════════════════════════════════════════
673
674/// Map function names between dialects.
675pub(crate) fn map_function_name(name: &str, target: Dialect) -> String {
676    let upper = name.to_uppercase();
677    match upper.as_str() {
678        // ── NOW / CURRENT_TIMESTAMP / GETDATE ────────────────────────────
679        "NOW" => {
680            if is_tsql_family(target) {
681                "GETDATE".to_string()
682            } else if matches!(
683                target,
684                Dialect::Ansi
685                    | Dialect::BigQuery
686                    | Dialect::Snowflake
687                    | Dialect::Oracle
688                    | Dialect::ClickHouse
689                    | Dialect::Exasol
690                    | Dialect::Teradata
691                    | Dialect::Druid
692                    | Dialect::Dremio
693                    | Dialect::Tableau
694            ) || is_presto_family(target)
695                || is_hive_family(target)
696            {
697                "CURRENT_TIMESTAMP".to_string()
698            } else {
699                // Postgres, MySQL, SQLite, DuckDB, Redshift, etc. – keep NOW
700                name.to_string()
701            }
702        }
703        "GETDATE" => {
704            if is_tsql_family(target) {
705                name.to_string()
706            } else if is_postgres_family(target)
707                || matches!(target, Dialect::Mysql | Dialect::DuckDb | Dialect::Sqlite)
708            {
709                "NOW".to_string()
710            } else {
711                "CURRENT_TIMESTAMP".to_string()
712            }
713        }
714
715        // ── LEN / LENGTH ─────────────────────────────────────────────────
716        "LEN" => {
717            if is_tsql_family(target) || matches!(target, Dialect::BigQuery | Dialect::Snowflake) {
718                name.to_string()
719            } else {
720                "LENGTH".to_string()
721            }
722        }
723        "LENGTH" if is_tsql_family(target) => "LEN".to_string(),
724
725        // ── SUBSTR / SUBSTRING ───────────────────────────────────────────
726        "SUBSTR" => {
727            if is_mysql_family(target)
728                || matches!(target, Dialect::Sqlite | Dialect::Oracle)
729                || is_hive_family(target)
730            {
731                "SUBSTR".to_string()
732            } else {
733                "SUBSTRING".to_string()
734            }
735        }
736        "SUBSTRING" => {
737            if is_mysql_family(target)
738                || matches!(target, Dialect::Sqlite | Dialect::Oracle)
739                || is_hive_family(target)
740            {
741                "SUBSTR".to_string()
742            } else {
743                name.to_string()
744            }
745        }
746
747        // ── IFNULL / COALESCE / ISNULL ───────────────────────────────────
748        "IFNULL" => {
749            if is_tsql_family(target) {
750                "ISNULL".to_string()
751            } else if is_mysql_family(target) || matches!(target, Dialect::Sqlite) {
752                // MySQL family + SQLite natively support IFNULL
753                name.to_string()
754            } else {
755                "COALESCE".to_string()
756            }
757        }
758        "ISNULL" => {
759            if is_tsql_family(target) {
760                name.to_string()
761            } else if is_mysql_family(target) || matches!(target, Dialect::Sqlite) {
762                "IFNULL".to_string()
763            } else {
764                "COALESCE".to_string()
765            }
766        }
767
768        // ── NVL → COALESCE (Oracle to others) ───────────────────────────
769        "NVL" => {
770            if matches!(target, Dialect::Oracle | Dialect::Snowflake) {
771                name.to_string()
772            } else if is_mysql_family(target) || matches!(target, Dialect::Sqlite) {
773                "IFNULL".to_string()
774            } else if is_tsql_family(target) {
775                "ISNULL".to_string()
776            } else {
777                "COALESCE".to_string()
778            }
779        }
780
781        // ── RANDOM / RAND ────────────────────────────────────────────────
782        "RANDOM" => {
783            if matches!(
784                target,
785                Dialect::Postgres | Dialect::Sqlite | Dialect::DuckDb
786            ) {
787                name.to_string()
788            } else {
789                "RAND".to_string()
790            }
791        }
792        "RAND" => {
793            if matches!(
794                target,
795                Dialect::Postgres | Dialect::Sqlite | Dialect::DuckDb
796            ) {
797                "RANDOM".to_string()
798            } else {
799                name.to_string()
800            }
801        }
802
803        // ── POSITION / CHARINDEX ─────────────────────────────────────────
804        "POSITION" if is_tsql_family(target) => "CHARINDEX".to_string(),
805        "CHARINDEX" if is_postgres_family(target) => "POSITION".to_string(),
806
807        // Everything else – preserve original name
808        _ => name.to_string(),
809    }
810}
811
812// ═══════════════════════════════════════════════════════════════════════════
813// Data-type mapping
814// ═══════════════════════════════════════════════════════════════════════════
815
816/// Map data types between dialects.
817pub(crate) fn map_data_type(dt: DataType, target: Dialect) -> DataType {
818    match (dt, target) {
819        // ── T-SQL type mappings ─────────────────────────────────────────
820        (DataType::Text, t) if is_tsql_family(t) => {
821            DataType::Varchar(None) // NVARCHAR(MAX) emitted by generator via Unknown
822        }
823        (DataType::Boolean, t) if is_tsql_family(t) => DataType::Bit(None),
824        (DataType::Bytea, t) if is_tsql_family(t) => DataType::Varbinary(None),
825        (DataType::Json, t) if is_tsql_family(t) => DataType::Varchar(None),
826        (DataType::Jsonb, t) if is_tsql_family(t) => DataType::Varchar(None),
827        (DataType::Uuid, t) if is_tsql_family(t) => {
828            DataType::Unknown("UNIQUEIDENTIFIER".to_string())
829        }
830        (DataType::Serial, t) if is_tsql_family(t) => DataType::Int,
831        (DataType::BigSerial, t) if is_tsql_family(t) => DataType::BigInt,
832        (DataType::SmallSerial, t) if is_tsql_family(t) => DataType::SmallInt,
833        (DataType::Timestamp { .. }, t) if is_tsql_family(t) => {
834            DataType::Unknown("DATETIME2".to_string())
835        }
836        (DataType::Real, t) if is_tsql_family(t) => DataType::Real,
837
838        // ── TEXT / STRING ────────────────────────────────────────────────
839        // TEXT → STRING for BigQuery, Hive, Spark, Databricks
840        (DataType::Text, t) if matches!(t, Dialect::BigQuery) || is_hive_family(t) => {
841            DataType::String
842        }
843        // STRING → TEXT for Postgres family, MySQL family, SQLite
844        (DataType::String, t)
845            if is_postgres_family(t) || is_mysql_family(t) || matches!(t, Dialect::Sqlite) =>
846        {
847            DataType::Text
848        }
849
850        // ── INT → BIGINT (BigQuery) ─────────────────────────────────────
851        (DataType::Int, Dialect::BigQuery) => DataType::BigInt,
852
853        // ── FLOAT → DOUBLE (BigQuery) ───────────────────────────────────
854        (DataType::Float, Dialect::BigQuery) => DataType::Double,
855
856        // ── BYTEA ↔ BLOB ────────────────────────────────────────────────
857        (DataType::Bytea, t)
858            if is_mysql_family(t)
859                || matches!(t, Dialect::Sqlite | Dialect::Oracle)
860                || is_hive_family(t) =>
861        {
862            DataType::Blob
863        }
864        (DataType::Blob, t) if is_postgres_family(t) => DataType::Bytea,
865        (DataType::Varbinary(_), t) if is_postgres_family(t) => DataType::Bytea,
866
867        // ── BOOLEAN → BOOL ──────────────────────────────────────────────
868        (DataType::Boolean, Dialect::Mysql) => DataType::Boolean,
869
870        // Everything else is unchanged
871        (dt, _) => dt,
872    }
873}
874
875// ═══════════════════════════════════════════════════════════════════════════
876// LIMIT / TOP / FETCH FIRST transform
877// ═══════════════════════════════════════════════════════════════════════════
878
879/// Transform LIMIT / TOP / FETCH FIRST between dialects.
880///
881/// - T-SQL family:  `LIMIT n` → `TOP n` (OFFSET + FETCH handled separately)
882/// - Oracle:        `LIMIT n` → `FETCH FIRST n ROWS ONLY`
883/// - All others:    `TOP n` / `FETCH FIRST n` → `LIMIT n`
884fn transform_limit(sel: &mut SelectStatement, target: Dialect) {
885    if is_tsql_family(target) {
886        // Move LIMIT → TOP for T-SQL (only when there's no OFFSET)
887        if let Some(limit) = sel.limit.take() {
888            if sel.offset.is_none() {
889                sel.top = Some(Box::new(limit));
890            } else {
891                // T-SQL with OFFSET uses OFFSET n ROWS FETCH NEXT m ROWS ONLY
892                sel.fetch_first = Some(limit);
893                // T-SQL OFFSET/FETCH requires ORDER BY. Add ORDER BY (SELECT NULL) if absent.
894                if sel.order_by.is_empty() {
895                    sel.order_by = vec![OrderByItem {
896                        expr: Expr::Subquery(Box::new(Statement::Select(SelectStatement {
897                            comments: Vec::new(),
898                            ctes: Vec::new(),
899                            distinct: false,
900                            top: None,
901                            columns: vec![SelectItem::Expr {
902                                expr: Expr::Null,
903                                alias: None,
904                                alias_quote_style: QuoteStyle::None,
905                            }],
906                            from: None,
907                            joins: Vec::new(),
908                            where_clause: None,
909                            group_by: Vec::new(),
910                            having: None,
911                            order_by: Vec::new(),
912                            limit: None,
913                            offset: None,
914                            fetch_first: None,
915                            qualify: None,
916                            window_definitions: Vec::new(),
917                        }))),
918                        ascending: true,
919                        nulls_first: None,
920                    }];
921                }
922            }
923        }
924        // Also move fetch_first → top when no offset
925        if sel.offset.is_none() {
926            if let Some(fetch) = sel.fetch_first.take() {
927                sel.top = Some(Box::new(fetch));
928            }
929        }
930    } else if matches!(target, Dialect::Oracle) {
931        // Oracle prefers FETCH FIRST n ROWS ONLY (SQL:2008 syntax)
932        if let Some(limit) = sel.limit.take() {
933            sel.fetch_first = Some(limit);
934        }
935        if let Some(top) = sel.top.take() {
936            sel.fetch_first = Some(*top);
937        }
938    } else {
939        // All other dialects: normalize to LIMIT
940        if let Some(top) = sel.top.take() {
941            if sel.limit.is_none() {
942                sel.limit = Some(*top);
943            }
944        }
945        if let Some(fetch) = sel.fetch_first.take() {
946            if sel.limit.is_none() {
947                sel.limit = Some(fetch);
948            }
949        }
950    }
951}
952
953// ═══════════════════════════════════════════════════════════════════════════
954// Quoted-identifier transform
955// ═══════════════════════════════════════════════════════════════════════════
956
957/// Convert any quoted identifiers in expressions to the target dialect's
958/// quoting convention.
959fn transform_quotes(expr: Expr, target: Dialect) -> Expr {
960    match expr {
961        Expr::Column {
962            table,
963            name,
964            quote_style,
965            table_quote_style,
966        } => {
967            let new_qs = if quote_style.is_quoted() {
968                QuoteStyle::for_dialect(target)
969            } else {
970                QuoteStyle::None
971            };
972            let new_tqs = if table_quote_style.is_quoted() {
973                QuoteStyle::for_dialect(target)
974            } else {
975                QuoteStyle::None
976            };
977            Expr::Column {
978                table,
979                name,
980                quote_style: new_qs,
981                table_quote_style: new_tqs,
982            }
983        }
984        // Recurse into sub-expressions
985        Expr::BinaryOp { left, op, right } => Expr::BinaryOp {
986            left: Box::new(transform_quotes(*left, target)),
987            op,
988            right: Box::new(transform_quotes(*right, target)),
989        },
990        Expr::UnaryOp { op, expr } => Expr::UnaryOp {
991            op,
992            expr: Box::new(transform_quotes(*expr, target)),
993        },
994        Expr::Function {
995            name,
996            args,
997            distinct,
998            filter,
999            over,
1000        } => Expr::Function {
1001            name,
1002            args: args
1003                .into_iter()
1004                .map(|a| transform_quotes(a, target))
1005                .collect(),
1006            distinct,
1007            filter: filter.map(|f| Box::new(transform_quotes(*f, target))),
1008            over,
1009        },
1010        Expr::TypedFunction { func, filter, over } => Expr::TypedFunction {
1011            func: func.transform_children(&|e| transform_quotes(e, target)),
1012            filter: filter.map(|f| Box::new(transform_quotes(*f, target))),
1013            over,
1014        },
1015        Expr::Nested(inner) => Expr::Nested(Box::new(transform_quotes(*inner, target))),
1016        Expr::Alias { expr, name } => Expr::Alias {
1017            expr: Box::new(transform_quotes(*expr, target)),
1018            name,
1019        },
1020        other => other,
1021    }
1022}
1023
1024/// Transform quoting for all identifier-bearing nodes inside a SELECT.
1025fn transform_quotes_in_select(sel: &mut SelectStatement, target: Dialect) {
1026    // Columns in the select list
1027    for item in &mut sel.columns {
1028        if let SelectItem::Expr { expr, .. } = item {
1029            *expr = transform_quotes(expr.clone(), target);
1030        }
1031    }
1032    // WHERE
1033    if let Some(wh) = &mut sel.where_clause {
1034        *wh = transform_quotes(wh.clone(), target);
1035    }
1036    // GROUP BY
1037    for gb in &mut sel.group_by {
1038        *gb = transform_quotes(gb.clone(), target);
1039    }
1040    // HAVING
1041    if let Some(having) = &mut sel.having {
1042        *having = transform_quotes(having.clone(), target);
1043    }
1044    // ORDER BY
1045    for ob in &mut sel.order_by {
1046        ob.expr = transform_quotes(ob.expr.clone(), target);
1047    }
1048    // Table refs (FROM, JOINs)
1049    if let Some(from) = &mut sel.from {
1050        transform_quotes_in_table_source(&mut from.source, target);
1051    }
1052    for join in &mut sel.joins {
1053        transform_quotes_in_table_source(&mut join.table, target);
1054        if let Some(on) = &mut join.on {
1055            *on = transform_quotes(on.clone(), target);
1056        }
1057    }
1058}
1059
1060fn transform_quotes_in_table_source(source: &mut TableSource, target: Dialect) {
1061    match source {
1062        TableSource::Table(tref) => {
1063            if tref.name_quote_style.is_quoted() {
1064                tref.name_quote_style = QuoteStyle::for_dialect(target);
1065            }
1066        }
1067        TableSource::Subquery { .. } => {}
1068        TableSource::TableFunction { .. } => {}
1069        TableSource::Lateral { source } => transform_quotes_in_table_source(source, target),
1070        TableSource::Pivot { source, .. } | TableSource::Unpivot { source, .. } => {
1071            transform_quotes_in_table_source(source, target);
1072        }
1073        TableSource::Unnest { .. } => {}
1074    }
1075}
1076
1077// ═══════════════════════════════════════════════════════════════════════════
1078// Concat operator transform (Change 3: || → CONCAT() for T-SQL)
1079// ═══════════════════════════════════════════════════════════════════════════
1080
1081/// Collect all operands from a chain of `||` (Concat) operations into a flat list.
1082fn collect_concat_args(expr: &Expr, args: &mut Vec<Expr>) {
1083    match expr {
1084        Expr::BinaryOp {
1085            left,
1086            op: BinaryOperator::Concat,
1087            right,
1088        } => {
1089            collect_concat_args(left, args);
1090            collect_concat_args(right, args);
1091        }
1092        other => args.push(other.clone()),
1093    }
1094}
1095
1096// ═══════════════════════════════════════════════════════════════════════════
1097// Interval arithmetic transform (Change 6: expr ± INTERVAL → DATEADD())
1098// ═══════════════════════════════════════════════════════════════════════════
1099
1100/// Try to transform `expr ± INTERVAL 'n unit'` into `DATEADD(unit, ±n, expr)` for T-SQL.
1101/// Returns `Some(transformed_expr)` if the right side is an interval, `None` otherwise.
1102fn try_transform_interval_arithmetic(
1103    left: &Expr,
1104    op: &BinaryOperator,
1105    right: &Expr,
1106) -> Option<Expr> {
1107    // Check right side is an interval
1108    if let Expr::Interval { value, unit } = right {
1109        if let Some((count, unit_name)) = parse_interval_value(value, unit) {
1110            let final_count = if matches!(op, BinaryOperator::Minus) {
1111                -count
1112            } else {
1113                count
1114            };
1115            return Some(Expr::Function {
1116                name: "DATEADD".to_string(),
1117                args: vec![
1118                    // Use a Column expr for the datepart keyword (unquoted identifier)
1119                    Expr::Column {
1120                        table: None,
1121                        name: unit_name,
1122                        quote_style: QuoteStyle::None,
1123                        table_quote_style: QuoteStyle::None,
1124                    },
1125                    Expr::Number(final_count.to_string()),
1126                    left.clone(),
1127                ],
1128                distinct: false,
1129                filter: None,
1130                over: None,
1131            });
1132        }
1133    }
1134
1135    // Check left side is an interval (less common: INTERVAL '7 days' + col)
1136    if let Expr::Interval { value, unit } = left {
1137        if matches!(op, BinaryOperator::Plus) {
1138            if let Some((count, unit_name)) = parse_interval_value(value, unit) {
1139                return Some(Expr::Function {
1140                    name: "DATEADD".to_string(),
1141                    args: vec![
1142                        Expr::Column {
1143                            table: None,
1144                            name: unit_name,
1145                            quote_style: QuoteStyle::None,
1146                            table_quote_style: QuoteStyle::None,
1147                        },
1148                        Expr::Number(count.to_string()),
1149                        right.clone(),
1150                    ],
1151                    distinct: false,
1152                    filter: None,
1153                    over: None,
1154                });
1155            }
1156        }
1157    }
1158
1159    None
1160}
1161
1162/// Parse an interval value expression and optional unit into (count, T-SQL datepart name).
1163fn parse_interval_value(value: &Expr, unit: &Option<DateTimeField>) -> Option<(i64, String)> {
1164    // Case 1: INTERVAL '7 days' (value is a string literal containing "7 days")
1165    if let Expr::StringLiteral(s) = value {
1166        let parts: Vec<&str> = s.trim().split_whitespace().collect();
1167        if parts.len() == 2 {
1168            let count: i64 = parts[0].parse().ok()?;
1169            let unit_name = normalize_interval_unit(parts[1])?;
1170            return Some((count, unit_name));
1171        }
1172        if parts.len() == 1 {
1173            // Just a number in the string, unit must come from the `unit` field
1174            let count: i64 = parts[0].parse().ok()?;
1175            if let Some(u) = unit {
1176                let unit_name = datetime_field_to_tsql(u)?;
1177                return Some((count, unit_name));
1178            }
1179        }
1180    }
1181
1182    // Case 2: INTERVAL 7 DAY (value is a number, unit is DateTimeField)
1183    if let Expr::Number(n) = value {
1184        let count: i64 = n.parse().ok()?;
1185        if let Some(u) = unit {
1186            let unit_name = datetime_field_to_tsql(u)?;
1187            return Some((count, unit_name));
1188        }
1189    }
1190
1191    None
1192}
1193
1194/// Normalize an interval unit string to a T-SQL DATEADD part name.
1195fn normalize_interval_unit(unit: &str) -> Option<String> {
1196    let lower = unit.to_lowercase();
1197    let normalized = lower.trim_end_matches('s');
1198    match normalized {
1199        "year" => Some("YEAR".to_string()),
1200        "month" => Some("MONTH".to_string()),
1201        "week" => Some("WEEK".to_string()),
1202        "day" => Some("DAY".to_string()),
1203        "hour" => Some("HOUR".to_string()),
1204        "minute" => Some("MINUTE".to_string()),
1205        "second" => Some("SECOND".to_string()),
1206        "millisecond" => Some("MILLISECOND".to_string()),
1207        "microsecond" => Some("MICROSECOND".to_string()),
1208        _ => None,
1209    }
1210}
1211
1212/// Convert a DateTimeField to T-SQL DATEADD unit name.
1213fn datetime_field_to_tsql(field: &DateTimeField) -> Option<String> {
1214    match field {
1215        DateTimeField::Year => Some("YEAR".to_string()),
1216        DateTimeField::Quarter => Some("QUARTER".to_string()),
1217        DateTimeField::Month => Some("MONTH".to_string()),
1218        DateTimeField::Week => Some("WEEK".to_string()),
1219        DateTimeField::Day => Some("DAY".to_string()),
1220        DateTimeField::Hour => Some("HOUR".to_string()),
1221        DateTimeField::Minute => Some("MINUTE".to_string()),
1222        DateTimeField::Second => Some("SECOND".to_string()),
1223        DateTimeField::Millisecond => Some("MILLISECOND".to_string()),
1224        DateTimeField::Microsecond => Some("MICROSECOND".to_string()),
1225        _ => None,
1226    }
1227}
1228
1229// ═══════════════════════════════════════════════════════════════════════════
1230// SIMILAR TO → LIKE pattern simplification (Change 9)
1231// ═══════════════════════════════════════════════════════════════════════════
1232
1233/// Simplify a SIMILAR TO pattern for use with LIKE.
1234/// Strips regex features (|, (), +, *) that T-SQL LIKE doesn't support.
1235fn simplify_similar_to_pattern(pattern: &Expr) -> Expr {
1236    if let Expr::StringLiteral(s) = pattern {
1237        let simplified = s.replace('|', "%").replace('(', "").replace(')', "");
1238        Expr::StringLiteral(simplified)
1239    } else {
1240        pattern.clone()
1241    }
1242}