polyglot_sql/dialects/mod.rs
1//! SQL Dialect System
2//!
3//! This module implements the dialect abstraction layer that enables SQL transpilation
4//! between 30+ database engines. Each dialect encapsulates three concerns:
5//!
6//! - **Tokenization**: Dialect-specific lexing rules (e.g., BigQuery uses backtick quoting,
7//! MySQL uses backtick for identifiers, TSQL uses square brackets).
8//! - **Generation**: How AST nodes are rendered back to SQL text, including identifier quoting
9//! style, function name casing, and syntax variations.
10//! - **Transformation**: AST-level rewrites that convert dialect-specific constructs to/from
11//! a normalized form (e.g., Snowflake `SQUARE(x)` becomes `POWER(x, 2)`).
12//!
13//! The primary entry point is [`Dialect::get`], which returns a configured [`Dialect`] instance
14//! for a given [`DialectType`]. From there, callers can [`parse`](Dialect::parse),
15//! [`generate`](Dialect::generate), [`transform`](Dialect::transform), or
16//! [`transpile`](Dialect::transpile) to another dialect in a single call.
17//!
18//! Each concrete dialect (e.g., `PostgresDialect`, `BigQueryDialect`) implements the
19//! [`DialectImpl`] trait, which provides configuration hooks and expression-level transforms.
20//! Dialect modules live in submodules of this module and are re-exported here.
21
22mod generic; // Always compiled
23
24#[cfg(feature = "dialect-athena")]
25mod athena;
26#[cfg(feature = "dialect-bigquery")]
27mod bigquery;
28#[cfg(feature = "dialect-clickhouse")]
29mod clickhouse;
30#[cfg(feature = "dialect-cockroachdb")]
31mod cockroachdb;
32#[cfg(feature = "dialect-databricks")]
33mod databricks;
34#[cfg(feature = "dialect-datafusion")]
35mod datafusion;
36#[cfg(feature = "dialect-doris")]
37mod doris;
38#[cfg(feature = "dialect-dremio")]
39mod dremio;
40#[cfg(feature = "dialect-drill")]
41mod drill;
42#[cfg(feature = "dialect-druid")]
43mod druid;
44#[cfg(feature = "dialect-duckdb")]
45mod duckdb;
46#[cfg(feature = "dialect-dune")]
47mod dune;
48#[cfg(feature = "dialect-exasol")]
49mod exasol;
50#[cfg(feature = "dialect-fabric")]
51mod fabric;
52#[cfg(feature = "dialect-hive")]
53mod hive;
54#[cfg(feature = "dialect-materialize")]
55mod materialize;
56#[cfg(feature = "dialect-mysql")]
57mod mysql;
58#[cfg(feature = "dialect-oracle")]
59mod oracle;
60#[cfg(feature = "dialect-postgresql")]
61mod postgres;
62#[cfg(feature = "dialect-presto")]
63mod presto;
64#[cfg(feature = "dialect-redshift")]
65mod redshift;
66#[cfg(feature = "dialect-risingwave")]
67mod risingwave;
68#[cfg(feature = "dialect-singlestore")]
69mod singlestore;
70#[cfg(feature = "dialect-snowflake")]
71mod snowflake;
72#[cfg(feature = "dialect-solr")]
73mod solr;
74#[cfg(feature = "dialect-spark")]
75mod spark;
76#[cfg(feature = "dialect-sqlite")]
77mod sqlite;
78#[cfg(feature = "dialect-starrocks")]
79mod starrocks;
80#[cfg(feature = "dialect-tableau")]
81mod tableau;
82#[cfg(feature = "dialect-teradata")]
83mod teradata;
84#[cfg(feature = "dialect-tidb")]
85mod tidb;
86#[cfg(feature = "dialect-trino")]
87mod trino;
88#[cfg(feature = "dialect-tsql")]
89mod tsql;
90
91pub use generic::GenericDialect; // Always available
92
93#[cfg(feature = "dialect-athena")]
94pub use athena::AthenaDialect;
95#[cfg(feature = "dialect-bigquery")]
96pub use bigquery::BigQueryDialect;
97#[cfg(feature = "dialect-clickhouse")]
98pub use clickhouse::ClickHouseDialect;
99#[cfg(feature = "dialect-cockroachdb")]
100pub use cockroachdb::CockroachDBDialect;
101#[cfg(feature = "dialect-databricks")]
102pub use databricks::DatabricksDialect;
103#[cfg(feature = "dialect-datafusion")]
104pub use datafusion::DataFusionDialect;
105#[cfg(feature = "dialect-doris")]
106pub use doris::DorisDialect;
107#[cfg(feature = "dialect-dremio")]
108pub use dremio::DremioDialect;
109#[cfg(feature = "dialect-drill")]
110pub use drill::DrillDialect;
111#[cfg(feature = "dialect-druid")]
112pub use druid::DruidDialect;
113#[cfg(feature = "dialect-duckdb")]
114pub use duckdb::DuckDBDialect;
115#[cfg(feature = "dialect-dune")]
116pub use dune::DuneDialect;
117#[cfg(feature = "dialect-exasol")]
118pub use exasol::ExasolDialect;
119#[cfg(feature = "dialect-fabric")]
120pub use fabric::FabricDialect;
121#[cfg(feature = "dialect-hive")]
122pub use hive::HiveDialect;
123#[cfg(feature = "dialect-materialize")]
124pub use materialize::MaterializeDialect;
125#[cfg(feature = "dialect-mysql")]
126pub use mysql::MySQLDialect;
127#[cfg(feature = "dialect-oracle")]
128pub use oracle::OracleDialect;
129#[cfg(feature = "dialect-postgresql")]
130pub use postgres::PostgresDialect;
131#[cfg(feature = "dialect-presto")]
132pub use presto::PrestoDialect;
133#[cfg(feature = "dialect-redshift")]
134pub use redshift::RedshiftDialect;
135#[cfg(feature = "dialect-risingwave")]
136pub use risingwave::RisingWaveDialect;
137#[cfg(feature = "dialect-singlestore")]
138pub use singlestore::SingleStoreDialect;
139#[cfg(feature = "dialect-snowflake")]
140pub use snowflake::SnowflakeDialect;
141#[cfg(feature = "dialect-solr")]
142pub use solr::SolrDialect;
143#[cfg(feature = "dialect-spark")]
144pub use spark::SparkDialect;
145#[cfg(feature = "dialect-sqlite")]
146pub use sqlite::SQLiteDialect;
147#[cfg(feature = "dialect-starrocks")]
148pub use starrocks::StarRocksDialect;
149#[cfg(feature = "dialect-tableau")]
150pub use tableau::TableauDialect;
151#[cfg(feature = "dialect-teradata")]
152pub use teradata::TeradataDialect;
153#[cfg(feature = "dialect-tidb")]
154pub use tidb::TiDBDialect;
155#[cfg(feature = "dialect-trino")]
156pub use trino::TrinoDialect;
157#[cfg(feature = "dialect-tsql")]
158pub use tsql::TSQLDialect;
159
160use crate::error::Result;
161#[cfg(feature = "transpile")]
162use crate::expressions::{ColumnConstraint, Function, Identifier, Literal};
163use crate::expressions::{DataType, Expression};
164#[cfg(any(
165 feature = "transpile",
166 feature = "ast-tools",
167 feature = "generate",
168 feature = "semantic"
169))]
170use crate::expressions::{From, FunctionBody, Join, Null, OrderBy, OutputClause, TableRef, With};
171#[cfg(feature = "transpile")]
172use crate::generator::UnsupportedLevel;
173#[cfg(feature = "generate")]
174use crate::generator::{Generator, GeneratorConfig};
175use crate::parser::Parser;
176#[cfg(feature = "transpile")]
177use crate::tokens::TokenType;
178use crate::tokens::{Token, Tokenizer, TokenizerConfig};
179#[cfg(feature = "transpile")]
180use crate::traversal::ExpressionWalk;
181use serde::{Deserialize, Serialize};
182use std::collections::HashMap;
183use std::sync::{Arc, LazyLock, RwLock};
184
185/// Enumeration of all supported SQL dialects.
186///
187/// Each variant corresponds to a specific SQL database engine or query language.
188/// The `Generic` variant represents standard SQL with no dialect-specific behavior,
189/// and is used as the default when no dialect is specified.
190///
191/// Dialect names are case-insensitive when parsed from strings via [`FromStr`].
192/// Some dialects accept aliases (e.g., "mssql" and "sqlserver" both resolve to [`TSQL`](DialectType::TSQL)).
193#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
194#[serde(rename_all = "lowercase")]
195pub enum DialectType {
196 /// Standard SQL with no dialect-specific behavior (default).
197 Generic,
198 /// PostgreSQL -- advanced open-source relational database.
199 PostgreSQL,
200 /// MySQL -- widely-used open-source relational database (also accepts "mysql").
201 MySQL,
202 /// Google BigQuery -- serverless cloud data warehouse with unique syntax (backtick quoting, STRUCT types, QUALIFY).
203 BigQuery,
204 /// Snowflake -- cloud data platform with QUALIFY clause, FLATTEN, and variant types.
205 Snowflake,
206 /// DuckDB -- in-process analytical database with modern SQL extensions.
207 DuckDB,
208 /// SQLite -- lightweight embedded relational database.
209 SQLite,
210 /// Apache Hive -- data warehouse on Hadoop with HiveQL syntax.
211 Hive,
212 /// Apache Spark SQL -- distributed query engine (also accepts "spark2").
213 Spark,
214 /// Trino -- distributed SQL query engine (formerly PrestoSQL).
215 Trino,
216 /// PrestoDB -- distributed SQL query engine for big data.
217 Presto,
218 /// Amazon Redshift -- cloud data warehouse based on PostgreSQL.
219 Redshift,
220 /// Transact-SQL (T-SQL) -- Microsoft SQL Server and Azure SQL (also accepts "mssql", "sqlserver").
221 TSQL,
222 /// Oracle Database -- commercial relational database with PL/SQL extensions.
223 Oracle,
224 /// ClickHouse -- column-oriented OLAP database for real-time analytics.
225 ClickHouse,
226 /// Databricks SQL -- Spark-based lakehouse platform with QUALIFY support.
227 Databricks,
228 /// Amazon Athena -- serverless query service (hybrid Trino/Hive engine).
229 Athena,
230 /// Teradata -- enterprise data warehouse with proprietary SQL extensions.
231 Teradata,
232 /// Apache Doris -- real-time analytical database (MySQL-compatible).
233 Doris,
234 /// StarRocks -- sub-second OLAP database (MySQL-compatible).
235 StarRocks,
236 /// Materialize -- streaming SQL database built on differential dataflow.
237 Materialize,
238 /// RisingWave -- distributed streaming database with PostgreSQL compatibility.
239 RisingWave,
240 /// SingleStore (formerly MemSQL) -- distributed SQL database (also accepts "memsql").
241 SingleStore,
242 /// CockroachDB -- distributed SQL database with PostgreSQL compatibility (also accepts "cockroach").
243 CockroachDB,
244 /// TiDB -- distributed HTAP database with MySQL compatibility.
245 TiDB,
246 /// Apache Druid -- real-time analytics database.
247 Druid,
248 /// Apache Solr -- search platform with SQL interface.
249 Solr,
250 /// Tableau -- data visualization platform with its own SQL dialect.
251 Tableau,
252 /// Dune Analytics -- blockchain analytics SQL engine.
253 Dune,
254 /// Microsoft Fabric -- unified analytics platform (T-SQL based).
255 Fabric,
256 /// Apache Drill -- schema-free SQL query engine for big data.
257 Drill,
258 /// Dremio -- data lakehouse platform with Arrow-based query engine.
259 Dremio,
260 /// Exasol -- in-memory analytic database.
261 Exasol,
262 /// Apache DataFusion -- Arrow-based query engine with modern SQL extensions.
263 DataFusion,
264}
265
266impl Default for DialectType {
267 fn default() -> Self {
268 DialectType::Generic
269 }
270}
271
272impl std::fmt::Display for DialectType {
273 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
274 match self {
275 DialectType::Generic => write!(f, "generic"),
276 DialectType::PostgreSQL => write!(f, "postgresql"),
277 DialectType::MySQL => write!(f, "mysql"),
278 DialectType::BigQuery => write!(f, "bigquery"),
279 DialectType::Snowflake => write!(f, "snowflake"),
280 DialectType::DuckDB => write!(f, "duckdb"),
281 DialectType::SQLite => write!(f, "sqlite"),
282 DialectType::Hive => write!(f, "hive"),
283 DialectType::Spark => write!(f, "spark"),
284 DialectType::Trino => write!(f, "trino"),
285 DialectType::Presto => write!(f, "presto"),
286 DialectType::Redshift => write!(f, "redshift"),
287 DialectType::TSQL => write!(f, "tsql"),
288 DialectType::Oracle => write!(f, "oracle"),
289 DialectType::ClickHouse => write!(f, "clickhouse"),
290 DialectType::Databricks => write!(f, "databricks"),
291 DialectType::Athena => write!(f, "athena"),
292 DialectType::Teradata => write!(f, "teradata"),
293 DialectType::Doris => write!(f, "doris"),
294 DialectType::StarRocks => write!(f, "starrocks"),
295 DialectType::Materialize => write!(f, "materialize"),
296 DialectType::RisingWave => write!(f, "risingwave"),
297 DialectType::SingleStore => write!(f, "singlestore"),
298 DialectType::CockroachDB => write!(f, "cockroachdb"),
299 DialectType::TiDB => write!(f, "tidb"),
300 DialectType::Druid => write!(f, "druid"),
301 DialectType::Solr => write!(f, "solr"),
302 DialectType::Tableau => write!(f, "tableau"),
303 DialectType::Dune => write!(f, "dune"),
304 DialectType::Fabric => write!(f, "fabric"),
305 DialectType::Drill => write!(f, "drill"),
306 DialectType::Dremio => write!(f, "dremio"),
307 DialectType::Exasol => write!(f, "exasol"),
308 DialectType::DataFusion => write!(f, "datafusion"),
309 }
310 }
311}
312
313impl std::str::FromStr for DialectType {
314 type Err = crate::error::Error;
315
316 fn from_str(s: &str) -> Result<Self> {
317 match s.to_ascii_lowercase().as_str() {
318 "generic" | "" => Ok(DialectType::Generic),
319 "postgres" | "postgresql" => Ok(DialectType::PostgreSQL),
320 "mysql" => Ok(DialectType::MySQL),
321 "bigquery" => Ok(DialectType::BigQuery),
322 "snowflake" => Ok(DialectType::Snowflake),
323 "duckdb" => Ok(DialectType::DuckDB),
324 "sqlite" => Ok(DialectType::SQLite),
325 "hive" => Ok(DialectType::Hive),
326 "spark" | "spark2" => Ok(DialectType::Spark),
327 "trino" => Ok(DialectType::Trino),
328 "presto" => Ok(DialectType::Presto),
329 "redshift" => Ok(DialectType::Redshift),
330 "tsql" | "mssql" | "sqlserver" => Ok(DialectType::TSQL),
331 "oracle" => Ok(DialectType::Oracle),
332 "clickhouse" => Ok(DialectType::ClickHouse),
333 "databricks" => Ok(DialectType::Databricks),
334 "athena" => Ok(DialectType::Athena),
335 "teradata" => Ok(DialectType::Teradata),
336 "doris" => Ok(DialectType::Doris),
337 "starrocks" => Ok(DialectType::StarRocks),
338 "materialize" => Ok(DialectType::Materialize),
339 "risingwave" => Ok(DialectType::RisingWave),
340 "singlestore" | "memsql" => Ok(DialectType::SingleStore),
341 "cockroachdb" | "cockroach" => Ok(DialectType::CockroachDB),
342 "tidb" => Ok(DialectType::TiDB),
343 "druid" => Ok(DialectType::Druid),
344 "solr" => Ok(DialectType::Solr),
345 "tableau" => Ok(DialectType::Tableau),
346 "dune" => Ok(DialectType::Dune),
347 "fabric" => Ok(DialectType::Fabric),
348 "drill" => Ok(DialectType::Drill),
349 "dremio" => Ok(DialectType::Dremio),
350 "exasol" => Ok(DialectType::Exasol),
351 "datafusion" | "arrow-datafusion" | "arrow_datafusion" => Ok(DialectType::DataFusion),
352 _ => Err(crate::error::Error::parse(
353 format!("Unknown dialect: {}", s),
354 0,
355 0,
356 0,
357 0,
358 )),
359 }
360 }
361}
362
363/// Trait that each concrete SQL dialect must implement.
364///
365/// `DialectImpl` provides the configuration hooks and per-expression transform logic
366/// that distinguish one dialect from another. Implementors supply:
367///
368/// - A [`DialectType`] identifier.
369/// - Optional overrides for tokenizer and generator configuration (defaults to generic SQL).
370/// - An expression-level transform function ([`transform_expr`](DialectImpl::transform_expr))
371/// that rewrites individual AST nodes for this dialect (e.g., converting `NVL` to `COALESCE`).
372/// - An optional preprocessing step ([`preprocess`](DialectImpl::preprocess)) for whole-tree
373/// rewrites that must run before the recursive per-node transform (e.g., eliminating QUALIFY).
374///
375/// The default implementations are no-ops, so a minimal dialect only needs to provide
376/// [`dialect_type`](DialectImpl::dialect_type) and override the methods that differ from
377/// standard SQL.
378pub trait DialectImpl {
379 /// Returns the [`DialectType`] that identifies this dialect.
380 fn dialect_type(&self) -> DialectType;
381
382 /// Returns the tokenizer configuration for this dialect.
383 ///
384 /// Override to customize identifier quoting characters, string escape rules,
385 /// comment styles, and other lexing behavior.
386 fn tokenizer_config(&self) -> TokenizerConfig {
387 TokenizerConfig::default()
388 }
389
390 /// Returns the generator configuration for this dialect.
391 ///
392 /// Override to customize identifier quoting style, function name casing,
393 /// keyword casing, and other SQL generation behavior.
394 #[cfg(feature = "generate")]
395 fn generator_config(&self) -> GeneratorConfig {
396 GeneratorConfig::default()
397 }
398
399 /// Returns a generator configuration tailored to a specific expression.
400 ///
401 /// Override this for hybrid dialects like Athena that route to different SQL engines
402 /// based on expression type (e.g., Hive-style generation for DDL, Trino-style for DML).
403 /// The default delegates to [`generator_config`](DialectImpl::generator_config).
404 #[cfg(feature = "generate")]
405 fn generator_config_for_expr(&self, _expr: &Expression) -> GeneratorConfig {
406 self.generator_config()
407 }
408
409 /// Transforms a single expression node for this dialect, without recursing into children.
410 ///
411 /// This is the per-node rewrite hook invoked by [`transform_recursive`]. Return the
412 /// expression unchanged if no dialect-specific rewrite is needed. Transformations
413 /// typically include function renaming, operator substitution, and type mapping.
414 #[cfg(feature = "transpile")]
415 fn transform_expr(&self, expr: Expression) -> Result<Expression> {
416 Ok(expr)
417 }
418
419 /// Applies whole-tree preprocessing transforms before the recursive per-node pass.
420 ///
421 /// Override this to apply structural rewrites that must see the entire tree at once,
422 /// such as `eliminate_qualify`, `eliminate_distinct_on`, `ensure_bools`, or
423 /// `explode_projection_to_unnest`. The default is a no-op pass-through.
424 #[cfg(feature = "transpile")]
425 fn preprocess(&self, expr: Expression) -> Result<Expression> {
426 Ok(expr)
427 }
428}
429
430/// Recursively transforms a [`DataType`](crate::expressions::DataType), handling nested
431/// parametric types such as `ARRAY<INT>`, `STRUCT<a INT, b TEXT>`, and `MAP<STRING, INT>`.
432///
433/// The outer type is first passed through `transform_fn` as an `Expression::DataType`,
434/// and then nested element/field types are recursed into. This ensures that dialect-level
435/// type mappings (e.g., `INT` to `INTEGER`) propagate into complex nested types.
436#[cfg(any(
437 feature = "transpile",
438 feature = "ast-tools",
439 feature = "generate",
440 feature = "semantic"
441))]
442fn transform_data_type_recursive<F>(
443 dt: crate::expressions::DataType,
444 transform_fn: &F,
445) -> Result<crate::expressions::DataType>
446where
447 F: Fn(Expression) -> Result<Expression>,
448{
449 use crate::expressions::DataType;
450 // First, transform the outermost type through the expression system
451 let dt_expr = transform_fn(Expression::DataType(dt))?;
452 let dt = match dt_expr {
453 Expression::DataType(d) => d,
454 _ => {
455 return Ok(match dt_expr {
456 _ => DataType::Custom {
457 name: "UNKNOWN".to_string(),
458 },
459 })
460 }
461 };
462 // Then recurse into nested types
463 match dt {
464 DataType::Array {
465 element_type,
466 dimension,
467 } => {
468 let inner = transform_data_type_recursive(*element_type, transform_fn)?;
469 Ok(DataType::Array {
470 element_type: Box::new(inner),
471 dimension,
472 })
473 }
474 DataType::List { element_type } => {
475 let inner = transform_data_type_recursive(*element_type, transform_fn)?;
476 Ok(DataType::List {
477 element_type: Box::new(inner),
478 })
479 }
480 DataType::Struct { fields, nested } => {
481 let mut new_fields = Vec::new();
482 for mut field in fields {
483 field.data_type = transform_data_type_recursive(field.data_type, transform_fn)?;
484 new_fields.push(field);
485 }
486 Ok(DataType::Struct {
487 fields: new_fields,
488 nested,
489 })
490 }
491 DataType::Map {
492 key_type,
493 value_type,
494 } => {
495 let k = transform_data_type_recursive(*key_type, transform_fn)?;
496 let v = transform_data_type_recursive(*value_type, transform_fn)?;
497 Ok(DataType::Map {
498 key_type: Box::new(k),
499 value_type: Box::new(v),
500 })
501 }
502 other => Ok(other),
503 }
504}
505
506/// Convert DuckDB C-style format strings to Presto C-style format strings.
507/// DuckDB and Presto both use C-style % directives but with different specifiers for some cases.
508#[cfg(feature = "transpile")]
509fn duckdb_to_presto_format(fmt: &str) -> String {
510 // Order matters: handle longer patterns first to avoid partial replacements
511 let mut result = fmt.to_string();
512 // First pass: mark multi-char patterns with placeholders
513 result = result.replace("%-m", "\x01NOPADM\x01");
514 result = result.replace("%-d", "\x01NOPADD\x01");
515 result = result.replace("%-I", "\x01NOPADI\x01");
516 result = result.replace("%-H", "\x01NOPADH\x01");
517 result = result.replace("%H:%M:%S", "\x01HMS\x01");
518 result = result.replace("%Y-%m-%d", "\x01YMD\x01");
519 // Now convert individual specifiers
520 result = result.replace("%M", "%i");
521 result = result.replace("%S", "%s");
522 // Restore multi-char patterns with Presto equivalents
523 result = result.replace("\x01NOPADM\x01", "%c");
524 result = result.replace("\x01NOPADD\x01", "%e");
525 result = result.replace("\x01NOPADI\x01", "%l");
526 result = result.replace("\x01NOPADH\x01", "%k");
527 result = result.replace("\x01HMS\x01", "%T");
528 result = result.replace("\x01YMD\x01", "%Y-%m-%d");
529 result
530}
531
532/// Convert DuckDB C-style format strings to BigQuery format strings.
533/// BigQuery uses a mix of strftime-like directives.
534#[cfg(feature = "transpile")]
535fn duckdb_to_bigquery_format(fmt: &str) -> String {
536 let mut result = fmt.to_string();
537 // Handle longer patterns first
538 result = result.replace("%-d", "%e");
539 result = result.replace("%Y-%m-%d %H:%M:%S", "%F %T");
540 result = result.replace("%Y-%m-%d", "%F");
541 result = result.replace("%H:%M:%S", "%T");
542 result
543}
544
545#[cfg(feature = "transpile")]
546fn presto_to_java_format(fmt: &str) -> String {
547 fmt.replace("%Y", "yyyy")
548 .replace("%m", "MM")
549 .replace("%d", "dd")
550 .replace("%H", "HH")
551 .replace("%i", "mm")
552 .replace("%S", "ss")
553 .replace("%s", "ss")
554 .replace("%y", "yy")
555 .replace("%T", "HH:mm:ss")
556 .replace("%F", "yyyy-MM-dd")
557 .replace("%M", "MMMM")
558}
559
560#[cfg(feature = "transpile")]
561fn normalize_presto_format(fmt: &str) -> String {
562 fmt.replace("%H:%i:%S", "%T").replace("%H:%i:%s", "%T")
563}
564
565#[cfg(feature = "transpile")]
566fn presto_to_duckdb_format(fmt: &str) -> String {
567 fmt.replace("%i", "%M")
568 .replace("%s", "%S")
569 .replace("%T", "%H:%M:%S")
570}
571
572#[cfg(feature = "transpile")]
573fn presto_to_bigquery_format(fmt: &str) -> String {
574 fmt.replace("%Y-%m-%d", "%F")
575 .replace("%H:%i:%S", "%T")
576 .replace("%H:%i:%s", "%T")
577 .replace("%i", "%M")
578 .replace("%s", "%S")
579}
580
581#[cfg(feature = "transpile")]
582fn is_default_presto_timestamp_format(fmt: &str) -> bool {
583 let normalized = normalize_presto_format(fmt);
584 normalized == "%Y-%m-%d %T"
585 || normalized == "%Y-%m-%d %H:%i:%S"
586 || fmt == "%Y-%m-%d %H:%i:%S"
587 || fmt == "%Y-%m-%d %T"
588}
589
590#[cfg(feature = "transpile")]
591fn is_default_presto_date_format(fmt: &str) -> bool {
592 fmt == "%Y-%m-%d" || fmt == "%F"
593}
594
595#[cfg(any(
596 feature = "transpile",
597 feature = "ast-tools",
598 feature = "generate",
599 feature = "semantic"
600))]
601#[derive(Debug)]
602enum TransformTask {
603 Visit(Expression),
604 Finish(FinishTask),
605}
606
607#[cfg(any(
608 feature = "transpile",
609 feature = "ast-tools",
610 feature = "generate",
611 feature = "semantic"
612))]
613#[derive(Debug)]
614enum FinishTask {
615 Unary(Expression),
616 Binary(Expression),
617 CastLike(Expression),
618 List(Expression, usize),
619 From(crate::expressions::From, usize),
620 Select(SelectFrame),
621 SetOp(Expression),
622}
623
624#[cfg(any(
625 feature = "transpile",
626 feature = "ast-tools",
627 feature = "generate",
628 feature = "semantic"
629))]
630#[derive(Debug)]
631struct SelectFrame {
632 select: Box<crate::expressions::Select>,
633 expr_count: usize,
634 from_present: bool,
635 where_present: bool,
636 group_by_count: usize,
637 having_present: bool,
638 qualify_present: bool,
639}
640
641#[cfg(any(
642 feature = "transpile",
643 feature = "ast-tools",
644 feature = "generate",
645 feature = "semantic"
646))]
647fn transform_pop_result(results: &mut Vec<Expression>) -> Result<Expression> {
648 results
649 .pop()
650 .ok_or_else(|| crate::error::Error::Internal("transform stack underflow".to_string()))
651}
652
653#[cfg(any(
654 feature = "transpile",
655 feature = "ast-tools",
656 feature = "generate",
657 feature = "semantic"
658))]
659fn transform_pop_results(results: &mut Vec<Expression>, count: usize) -> Result<Vec<Expression>> {
660 if results.len() < count {
661 return Err(crate::error::Error::Internal(
662 "transform result stack underflow".to_string(),
663 ));
664 }
665 Ok(results.split_off(results.len() - count))
666}
667
668/// Applies a transform function bottom-up through an entire expression tree.
669///
670/// The public entrypoint uses an explicit task stack for the recursion-heavy shapes
671/// that dominate deeply nested SQL (nested SELECT/FROM/SUBQUERY chains, set-operation
672/// trees, and common binary/unary expression chains). Less common shapes currently
673/// reuse the reference recursive implementation so semantics stay identical while
674/// the hot path avoids stack growth.
675#[cfg(any(
676 feature = "transpile",
677 feature = "ast-tools",
678 feature = "generate",
679 feature = "semantic"
680))]
681pub fn transform_recursive<F>(expr: Expression, transform_fn: &F) -> Result<Expression>
682where
683 F: Fn(Expression) -> Result<Expression>,
684{
685 #[cfg(feature = "stacker")]
686 {
687 let red_zone = if cfg!(debug_assertions) {
688 4 * 1024 * 1024
689 } else {
690 1024 * 1024
691 };
692 stacker::maybe_grow(red_zone, 8 * 1024 * 1024, move || {
693 transform_recursive_inner(expr, transform_fn)
694 })
695 }
696 #[cfg(not(feature = "stacker"))]
697 {
698 transform_recursive_inner(expr, transform_fn)
699 }
700}
701
702#[cfg(any(
703 feature = "transpile",
704 feature = "ast-tools",
705 feature = "generate",
706 feature = "semantic"
707))]
708fn transform_recursive_inner<F>(expr: Expression, transform_fn: &F) -> Result<Expression>
709where
710 F: Fn(Expression) -> Result<Expression>,
711{
712 let mut tasks = vec![TransformTask::Visit(expr)];
713 let mut results = Vec::new();
714
715 while let Some(task) = tasks.pop() {
716 match task {
717 TransformTask::Visit(expr) => {
718 if matches!(
719 &expr,
720 Expression::Literal(_)
721 | Expression::Boolean(_)
722 | Expression::Null(_)
723 | Expression::Identifier(_)
724 | Expression::Star(_)
725 | Expression::Parameter(_)
726 | Expression::Placeholder(_)
727 | Expression::SessionParameter(_)
728 ) {
729 results.push(transform_fn(expr)?);
730 continue;
731 }
732
733 match expr {
734 Expression::Alias(mut alias) => {
735 let child = std::mem::replace(&mut alias.this, Expression::Null(Null));
736 tasks.push(TransformTask::Finish(FinishTask::Unary(Expression::Alias(
737 alias,
738 ))));
739 tasks.push(TransformTask::Visit(child));
740 }
741 Expression::Paren(mut paren) => {
742 let child = std::mem::replace(&mut paren.this, Expression::Null(Null));
743 tasks.push(TransformTask::Finish(FinishTask::Unary(Expression::Paren(
744 paren,
745 ))));
746 tasks.push(TransformTask::Visit(child));
747 }
748 Expression::Not(mut not) => {
749 let child = std::mem::replace(&mut not.this, Expression::Null(Null));
750 tasks.push(TransformTask::Finish(FinishTask::Unary(Expression::Not(
751 not,
752 ))));
753 tasks.push(TransformTask::Visit(child));
754 }
755 Expression::Neg(mut neg) => {
756 let child = std::mem::replace(&mut neg.this, Expression::Null(Null));
757 tasks.push(TransformTask::Finish(FinishTask::Unary(Expression::Neg(
758 neg,
759 ))));
760 tasks.push(TransformTask::Visit(child));
761 }
762 Expression::IsNull(mut expr) => {
763 let child = std::mem::replace(&mut expr.this, Expression::Null(Null));
764 tasks.push(TransformTask::Finish(FinishTask::Unary(
765 Expression::IsNull(expr),
766 )));
767 tasks.push(TransformTask::Visit(child));
768 }
769 Expression::IsTrue(mut expr) => {
770 let child = std::mem::replace(&mut expr.this, Expression::Null(Null));
771 tasks.push(TransformTask::Finish(FinishTask::Unary(
772 Expression::IsTrue(expr),
773 )));
774 tasks.push(TransformTask::Visit(child));
775 }
776 Expression::IsFalse(mut expr) => {
777 let child = std::mem::replace(&mut expr.this, Expression::Null(Null));
778 tasks.push(TransformTask::Finish(FinishTask::Unary(
779 Expression::IsFalse(expr),
780 )));
781 tasks.push(TransformTask::Visit(child));
782 }
783 Expression::Subquery(mut subquery) => {
784 let child = std::mem::replace(&mut subquery.this, Expression::Null(Null));
785 tasks.push(TransformTask::Finish(FinishTask::Unary(
786 Expression::Subquery(subquery),
787 )));
788 tasks.push(TransformTask::Visit(child));
789 }
790 Expression::Exists(mut exists) => {
791 let child = std::mem::replace(&mut exists.this, Expression::Null(Null));
792 tasks.push(TransformTask::Finish(FinishTask::Unary(
793 Expression::Exists(exists),
794 )));
795 tasks.push(TransformTask::Visit(child));
796 }
797 Expression::TableArgument(mut arg) => {
798 let child = std::mem::replace(&mut arg.this, Expression::Null(Null));
799 tasks.push(TransformTask::Finish(FinishTask::Unary(
800 Expression::TableArgument(arg),
801 )));
802 tasks.push(TransformTask::Visit(child));
803 }
804 Expression::And(mut op) => {
805 let right = std::mem::replace(&mut op.right, Expression::Null(Null));
806 let left = std::mem::replace(&mut op.left, Expression::Null(Null));
807 tasks.push(TransformTask::Finish(FinishTask::Binary(Expression::And(
808 op,
809 ))));
810 tasks.push(TransformTask::Visit(right));
811 tasks.push(TransformTask::Visit(left));
812 }
813 Expression::Or(mut op) => {
814 let right = std::mem::replace(&mut op.right, Expression::Null(Null));
815 let left = std::mem::replace(&mut op.left, Expression::Null(Null));
816 tasks.push(TransformTask::Finish(FinishTask::Binary(Expression::Or(
817 op,
818 ))));
819 tasks.push(TransformTask::Visit(right));
820 tasks.push(TransformTask::Visit(left));
821 }
822 Expression::Add(mut op) => {
823 let right = std::mem::replace(&mut op.right, Expression::Null(Null));
824 let left = std::mem::replace(&mut op.left, Expression::Null(Null));
825 tasks.push(TransformTask::Finish(FinishTask::Binary(Expression::Add(
826 op,
827 ))));
828 tasks.push(TransformTask::Visit(right));
829 tasks.push(TransformTask::Visit(left));
830 }
831 Expression::Sub(mut op) => {
832 let right = std::mem::replace(&mut op.right, Expression::Null(Null));
833 let left = std::mem::replace(&mut op.left, Expression::Null(Null));
834 tasks.push(TransformTask::Finish(FinishTask::Binary(Expression::Sub(
835 op,
836 ))));
837 tasks.push(TransformTask::Visit(right));
838 tasks.push(TransformTask::Visit(left));
839 }
840 Expression::Mul(mut op) => {
841 let right = std::mem::replace(&mut op.right, Expression::Null(Null));
842 let left = std::mem::replace(&mut op.left, Expression::Null(Null));
843 tasks.push(TransformTask::Finish(FinishTask::Binary(Expression::Mul(
844 op,
845 ))));
846 tasks.push(TransformTask::Visit(right));
847 tasks.push(TransformTask::Visit(left));
848 }
849 Expression::Div(mut op) => {
850 let right = std::mem::replace(&mut op.right, Expression::Null(Null));
851 let left = std::mem::replace(&mut op.left, Expression::Null(Null));
852 tasks.push(TransformTask::Finish(FinishTask::Binary(Expression::Div(
853 op,
854 ))));
855 tasks.push(TransformTask::Visit(right));
856 tasks.push(TransformTask::Visit(left));
857 }
858 Expression::Eq(mut op) => {
859 let right = std::mem::replace(&mut op.right, Expression::Null(Null));
860 let left = std::mem::replace(&mut op.left, Expression::Null(Null));
861 tasks.push(TransformTask::Finish(FinishTask::Binary(Expression::Eq(
862 op,
863 ))));
864 tasks.push(TransformTask::Visit(right));
865 tasks.push(TransformTask::Visit(left));
866 }
867 Expression::Lt(mut op) => {
868 let right = std::mem::replace(&mut op.right, Expression::Null(Null));
869 let left = std::mem::replace(&mut op.left, Expression::Null(Null));
870 tasks.push(TransformTask::Finish(FinishTask::Binary(Expression::Lt(
871 op,
872 ))));
873 tasks.push(TransformTask::Visit(right));
874 tasks.push(TransformTask::Visit(left));
875 }
876 Expression::Gt(mut op) => {
877 let right = std::mem::replace(&mut op.right, Expression::Null(Null));
878 let left = std::mem::replace(&mut op.left, Expression::Null(Null));
879 tasks.push(TransformTask::Finish(FinishTask::Binary(Expression::Gt(
880 op,
881 ))));
882 tasks.push(TransformTask::Visit(right));
883 tasks.push(TransformTask::Visit(left));
884 }
885 Expression::Neq(mut op) => {
886 let right = std::mem::replace(&mut op.right, Expression::Null(Null));
887 let left = std::mem::replace(&mut op.left, Expression::Null(Null));
888 tasks.push(TransformTask::Finish(FinishTask::Binary(Expression::Neq(
889 op,
890 ))));
891 tasks.push(TransformTask::Visit(right));
892 tasks.push(TransformTask::Visit(left));
893 }
894 Expression::Lte(mut op) => {
895 let right = std::mem::replace(&mut op.right, Expression::Null(Null));
896 let left = std::mem::replace(&mut op.left, Expression::Null(Null));
897 tasks.push(TransformTask::Finish(FinishTask::Binary(Expression::Lte(
898 op,
899 ))));
900 tasks.push(TransformTask::Visit(right));
901 tasks.push(TransformTask::Visit(left));
902 }
903 Expression::Gte(mut op) => {
904 let right = std::mem::replace(&mut op.right, Expression::Null(Null));
905 let left = std::mem::replace(&mut op.left, Expression::Null(Null));
906 tasks.push(TransformTask::Finish(FinishTask::Binary(Expression::Gte(
907 op,
908 ))));
909 tasks.push(TransformTask::Visit(right));
910 tasks.push(TransformTask::Visit(left));
911 }
912 Expression::Mod(mut op) => {
913 let right = std::mem::replace(&mut op.right, Expression::Null(Null));
914 let left = std::mem::replace(&mut op.left, Expression::Null(Null));
915 tasks.push(TransformTask::Finish(FinishTask::Binary(Expression::Mod(
916 op,
917 ))));
918 tasks.push(TransformTask::Visit(right));
919 tasks.push(TransformTask::Visit(left));
920 }
921 Expression::Concat(mut op) => {
922 let right = std::mem::replace(&mut op.right, Expression::Null(Null));
923 let left = std::mem::replace(&mut op.left, Expression::Null(Null));
924 tasks.push(TransformTask::Finish(FinishTask::Binary(
925 Expression::Concat(op),
926 )));
927 tasks.push(TransformTask::Visit(right));
928 tasks.push(TransformTask::Visit(left));
929 }
930 Expression::BitwiseAnd(mut op) => {
931 let right = std::mem::replace(&mut op.right, Expression::Null(Null));
932 let left = std::mem::replace(&mut op.left, Expression::Null(Null));
933 tasks.push(TransformTask::Finish(FinishTask::Binary(
934 Expression::BitwiseAnd(op),
935 )));
936 tasks.push(TransformTask::Visit(right));
937 tasks.push(TransformTask::Visit(left));
938 }
939 Expression::BitwiseOr(mut op) => {
940 let right = std::mem::replace(&mut op.right, Expression::Null(Null));
941 let left = std::mem::replace(&mut op.left, Expression::Null(Null));
942 tasks.push(TransformTask::Finish(FinishTask::Binary(
943 Expression::BitwiseOr(op),
944 )));
945 tasks.push(TransformTask::Visit(right));
946 tasks.push(TransformTask::Visit(left));
947 }
948 Expression::BitwiseXor(mut op) => {
949 let right = std::mem::replace(&mut op.right, Expression::Null(Null));
950 let left = std::mem::replace(&mut op.left, Expression::Null(Null));
951 tasks.push(TransformTask::Finish(FinishTask::Binary(
952 Expression::BitwiseXor(op),
953 )));
954 tasks.push(TransformTask::Visit(right));
955 tasks.push(TransformTask::Visit(left));
956 }
957 Expression::Is(mut op) => {
958 let right = std::mem::replace(&mut op.right, Expression::Null(Null));
959 let left = std::mem::replace(&mut op.left, Expression::Null(Null));
960 tasks.push(TransformTask::Finish(FinishTask::Binary(Expression::Is(
961 op,
962 ))));
963 tasks.push(TransformTask::Visit(right));
964 tasks.push(TransformTask::Visit(left));
965 }
966 Expression::MemberOf(mut op) => {
967 let right = std::mem::replace(&mut op.right, Expression::Null(Null));
968 let left = std::mem::replace(&mut op.left, Expression::Null(Null));
969 tasks.push(TransformTask::Finish(FinishTask::Binary(
970 Expression::MemberOf(op),
971 )));
972 tasks.push(TransformTask::Visit(right));
973 tasks.push(TransformTask::Visit(left));
974 }
975 Expression::ArrayContainsAll(mut op) => {
976 let right = std::mem::replace(&mut op.right, Expression::Null(Null));
977 let left = std::mem::replace(&mut op.left, Expression::Null(Null));
978 tasks.push(TransformTask::Finish(FinishTask::Binary(
979 Expression::ArrayContainsAll(op),
980 )));
981 tasks.push(TransformTask::Visit(right));
982 tasks.push(TransformTask::Visit(left));
983 }
984 Expression::ArrayContainedBy(mut op) => {
985 let right = std::mem::replace(&mut op.right, Expression::Null(Null));
986 let left = std::mem::replace(&mut op.left, Expression::Null(Null));
987 tasks.push(TransformTask::Finish(FinishTask::Binary(
988 Expression::ArrayContainedBy(op),
989 )));
990 tasks.push(TransformTask::Visit(right));
991 tasks.push(TransformTask::Visit(left));
992 }
993 Expression::ArrayOverlaps(mut op) => {
994 let right = std::mem::replace(&mut op.right, Expression::Null(Null));
995 let left = std::mem::replace(&mut op.left, Expression::Null(Null));
996 tasks.push(TransformTask::Finish(FinishTask::Binary(
997 Expression::ArrayOverlaps(op),
998 )));
999 tasks.push(TransformTask::Visit(right));
1000 tasks.push(TransformTask::Visit(left));
1001 }
1002 Expression::TsMatch(mut op) => {
1003 let right = std::mem::replace(&mut op.right, Expression::Null(Null));
1004 let left = std::mem::replace(&mut op.left, Expression::Null(Null));
1005 tasks.push(TransformTask::Finish(FinishTask::Binary(
1006 Expression::TsMatch(op),
1007 )));
1008 tasks.push(TransformTask::Visit(right));
1009 tasks.push(TransformTask::Visit(left));
1010 }
1011 Expression::Adjacent(mut op) => {
1012 let right = std::mem::replace(&mut op.right, Expression::Null(Null));
1013 let left = std::mem::replace(&mut op.left, Expression::Null(Null));
1014 tasks.push(TransformTask::Finish(FinishTask::Binary(
1015 Expression::Adjacent(op),
1016 )));
1017 tasks.push(TransformTask::Visit(right));
1018 tasks.push(TransformTask::Visit(left));
1019 }
1020 Expression::Like(mut like) => {
1021 let right = std::mem::replace(&mut like.right, Expression::Null(Null));
1022 let left = std::mem::replace(&mut like.left, Expression::Null(Null));
1023 tasks.push(TransformTask::Finish(FinishTask::Binary(Expression::Like(
1024 like,
1025 ))));
1026 tasks.push(TransformTask::Visit(right));
1027 tasks.push(TransformTask::Visit(left));
1028 }
1029 Expression::ILike(mut like) => {
1030 let right = std::mem::replace(&mut like.right, Expression::Null(Null));
1031 let left = std::mem::replace(&mut like.left, Expression::Null(Null));
1032 tasks.push(TransformTask::Finish(FinishTask::Binary(
1033 Expression::ILike(like),
1034 )));
1035 tasks.push(TransformTask::Visit(right));
1036 tasks.push(TransformTask::Visit(left));
1037 }
1038 Expression::Cast(mut cast) => {
1039 let child = std::mem::replace(&mut cast.this, Expression::Null(Null));
1040 tasks.push(TransformTask::Finish(FinishTask::CastLike(
1041 Expression::Cast(cast),
1042 )));
1043 tasks.push(TransformTask::Visit(child));
1044 }
1045 Expression::TryCast(mut cast) => {
1046 let child = std::mem::replace(&mut cast.this, Expression::Null(Null));
1047 tasks.push(TransformTask::Finish(FinishTask::CastLike(
1048 Expression::TryCast(cast),
1049 )));
1050 tasks.push(TransformTask::Visit(child));
1051 }
1052 Expression::SafeCast(mut cast) => {
1053 let child = std::mem::replace(&mut cast.this, Expression::Null(Null));
1054 tasks.push(TransformTask::Finish(FinishTask::CastLike(
1055 Expression::SafeCast(cast),
1056 )));
1057 tasks.push(TransformTask::Visit(child));
1058 }
1059 Expression::Function(mut function) => {
1060 let args = std::mem::take(&mut function.args);
1061 let count = args.len();
1062 tasks.push(TransformTask::Finish(FinishTask::List(
1063 Expression::Function(function),
1064 count,
1065 )));
1066 for child in args.into_iter().rev() {
1067 tasks.push(TransformTask::Visit(child));
1068 }
1069 }
1070 Expression::Array(mut array) => {
1071 let expressions = std::mem::take(&mut array.expressions);
1072 let count = expressions.len();
1073 tasks.push(TransformTask::Finish(FinishTask::List(
1074 Expression::Array(array),
1075 count,
1076 )));
1077 for child in expressions.into_iter().rev() {
1078 tasks.push(TransformTask::Visit(child));
1079 }
1080 }
1081 Expression::Tuple(mut tuple) => {
1082 let expressions = std::mem::take(&mut tuple.expressions);
1083 let count = expressions.len();
1084 tasks.push(TransformTask::Finish(FinishTask::List(
1085 Expression::Tuple(tuple),
1086 count,
1087 )));
1088 for child in expressions.into_iter().rev() {
1089 tasks.push(TransformTask::Visit(child));
1090 }
1091 }
1092 Expression::ArrayFunc(mut array) => {
1093 let expressions = std::mem::take(&mut array.expressions);
1094 let count = expressions.len();
1095 tasks.push(TransformTask::Finish(FinishTask::List(
1096 Expression::ArrayFunc(array),
1097 count,
1098 )));
1099 for child in expressions.into_iter().rev() {
1100 tasks.push(TransformTask::Visit(child));
1101 }
1102 }
1103 Expression::Coalesce(mut func) => {
1104 let expressions = std::mem::take(&mut func.expressions);
1105 let count = expressions.len();
1106 tasks.push(TransformTask::Finish(FinishTask::List(
1107 Expression::Coalesce(func),
1108 count,
1109 )));
1110 for child in expressions.into_iter().rev() {
1111 tasks.push(TransformTask::Visit(child));
1112 }
1113 }
1114 Expression::Greatest(mut func) => {
1115 let expressions = std::mem::take(&mut func.expressions);
1116 let count = expressions.len();
1117 tasks.push(TransformTask::Finish(FinishTask::List(
1118 Expression::Greatest(func),
1119 count,
1120 )));
1121 for child in expressions.into_iter().rev() {
1122 tasks.push(TransformTask::Visit(child));
1123 }
1124 }
1125 Expression::Least(mut func) => {
1126 let expressions = std::mem::take(&mut func.expressions);
1127 let count = expressions.len();
1128 tasks.push(TransformTask::Finish(FinishTask::List(
1129 Expression::Least(func),
1130 count,
1131 )));
1132 for child in expressions.into_iter().rev() {
1133 tasks.push(TransformTask::Visit(child));
1134 }
1135 }
1136 Expression::ArrayConcat(mut func) => {
1137 let expressions = std::mem::take(&mut func.expressions);
1138 let count = expressions.len();
1139 tasks.push(TransformTask::Finish(FinishTask::List(
1140 Expression::ArrayConcat(func),
1141 count,
1142 )));
1143 for child in expressions.into_iter().rev() {
1144 tasks.push(TransformTask::Visit(child));
1145 }
1146 }
1147 Expression::ArrayIntersect(mut func) => {
1148 let expressions = std::mem::take(&mut func.expressions);
1149 let count = expressions.len();
1150 tasks.push(TransformTask::Finish(FinishTask::List(
1151 Expression::ArrayIntersect(func),
1152 count,
1153 )));
1154 for child in expressions.into_iter().rev() {
1155 tasks.push(TransformTask::Visit(child));
1156 }
1157 }
1158 Expression::ArrayZip(mut func) => {
1159 let expressions = std::mem::take(&mut func.expressions);
1160 let count = expressions.len();
1161 tasks.push(TransformTask::Finish(FinishTask::List(
1162 Expression::ArrayZip(func),
1163 count,
1164 )));
1165 for child in expressions.into_iter().rev() {
1166 tasks.push(TransformTask::Visit(child));
1167 }
1168 }
1169 Expression::MapConcat(mut func) => {
1170 let expressions = std::mem::take(&mut func.expressions);
1171 let count = expressions.len();
1172 tasks.push(TransformTask::Finish(FinishTask::List(
1173 Expression::MapConcat(func),
1174 count,
1175 )));
1176 for child in expressions.into_iter().rev() {
1177 tasks.push(TransformTask::Visit(child));
1178 }
1179 }
1180 Expression::JsonArray(mut func) => {
1181 let expressions = std::mem::take(&mut func.expressions);
1182 let count = expressions.len();
1183 tasks.push(TransformTask::Finish(FinishTask::List(
1184 Expression::JsonArray(func),
1185 count,
1186 )));
1187 for child in expressions.into_iter().rev() {
1188 tasks.push(TransformTask::Visit(child));
1189 }
1190 }
1191 Expression::From(mut from) => {
1192 let expressions = std::mem::take(&mut from.expressions);
1193 let count = expressions.len();
1194 tasks.push(TransformTask::Finish(FinishTask::From(*from, count)));
1195 for child in expressions.into_iter().rev() {
1196 tasks.push(TransformTask::Visit(child));
1197 }
1198 }
1199 Expression::Select(mut select) => {
1200 let expressions = std::mem::take(&mut select.expressions);
1201 let expr_count = expressions.len();
1202
1203 let from_info = select.from.take().map(|mut from| {
1204 let children = std::mem::take(&mut from.expressions);
1205 (from, children)
1206 });
1207 let from_present = from_info.is_some();
1208
1209 let where_child = select.where_clause.as_mut().map(|where_clause| {
1210 std::mem::replace(&mut where_clause.this, Expression::Null(Null))
1211 });
1212 let where_present = where_child.is_some();
1213
1214 let group_expressions = select
1215 .group_by
1216 .as_mut()
1217 .map(|group_by| std::mem::take(&mut group_by.expressions))
1218 .unwrap_or_default();
1219 let group_by_count = group_expressions.len();
1220
1221 let having_child = select.having.as_mut().map(|having| {
1222 std::mem::replace(&mut having.this, Expression::Null(Null))
1223 });
1224 let having_present = having_child.is_some();
1225
1226 let qualify_child = select.qualify.as_mut().map(|qualify| {
1227 std::mem::replace(&mut qualify.this, Expression::Null(Null))
1228 });
1229 let qualify_present = qualify_child.is_some();
1230
1231 tasks.push(TransformTask::Finish(FinishTask::Select(SelectFrame {
1232 select,
1233 expr_count,
1234 from_present,
1235 where_present,
1236 group_by_count,
1237 having_present,
1238 qualify_present,
1239 })));
1240
1241 if let Some(child) = qualify_child {
1242 tasks.push(TransformTask::Visit(child));
1243 }
1244 if let Some(child) = having_child {
1245 tasks.push(TransformTask::Visit(child));
1246 }
1247 for child in group_expressions.into_iter().rev() {
1248 tasks.push(TransformTask::Visit(child));
1249 }
1250 if let Some(child) = where_child {
1251 tasks.push(TransformTask::Visit(child));
1252 }
1253 if let Some((from, children)) = from_info {
1254 tasks.push(TransformTask::Finish(FinishTask::From(
1255 from,
1256 children.len(),
1257 )));
1258 for child in children.into_iter().rev() {
1259 tasks.push(TransformTask::Visit(child));
1260 }
1261 }
1262 for child in expressions.into_iter().rev() {
1263 tasks.push(TransformTask::Visit(child));
1264 }
1265 }
1266 Expression::Union(mut union) => {
1267 let right = std::mem::replace(&mut union.right, Expression::Null(Null));
1268 let left = std::mem::replace(&mut union.left, Expression::Null(Null));
1269 tasks.push(TransformTask::Finish(FinishTask::SetOp(Expression::Union(
1270 union,
1271 ))));
1272 tasks.push(TransformTask::Visit(right));
1273 tasks.push(TransformTask::Visit(left));
1274 }
1275 Expression::Intersect(mut intersect) => {
1276 let right = std::mem::replace(&mut intersect.right, Expression::Null(Null));
1277 let left = std::mem::replace(&mut intersect.left, Expression::Null(Null));
1278 tasks.push(TransformTask::Finish(FinishTask::SetOp(
1279 Expression::Intersect(intersect),
1280 )));
1281 tasks.push(TransformTask::Visit(right));
1282 tasks.push(TransformTask::Visit(left));
1283 }
1284 Expression::Except(mut except) => {
1285 let right = std::mem::replace(&mut except.right, Expression::Null(Null));
1286 let left = std::mem::replace(&mut except.left, Expression::Null(Null));
1287 tasks.push(TransformTask::Finish(FinishTask::SetOp(
1288 Expression::Except(except),
1289 )));
1290 tasks.push(TransformTask::Visit(right));
1291 tasks.push(TransformTask::Visit(left));
1292 }
1293 other => {
1294 results.push(transform_recursive_reference(other, transform_fn)?);
1295 }
1296 }
1297 }
1298 TransformTask::Finish(finish) => match finish {
1299 FinishTask::Unary(expr) => {
1300 let child = transform_pop_result(&mut results)?;
1301 let rebuilt = match expr {
1302 Expression::Alias(mut alias) => {
1303 alias.this = child;
1304 Expression::Alias(alias)
1305 }
1306 Expression::Paren(mut paren) => {
1307 paren.this = child;
1308 Expression::Paren(paren)
1309 }
1310 Expression::Not(mut not) => {
1311 not.this = child;
1312 Expression::Not(not)
1313 }
1314 Expression::Neg(mut neg) => {
1315 neg.this = child;
1316 Expression::Neg(neg)
1317 }
1318 Expression::IsNull(mut expr) => {
1319 expr.this = child;
1320 Expression::IsNull(expr)
1321 }
1322 Expression::IsTrue(mut expr) => {
1323 expr.this = child;
1324 Expression::IsTrue(expr)
1325 }
1326 Expression::IsFalse(mut expr) => {
1327 expr.this = child;
1328 Expression::IsFalse(expr)
1329 }
1330 Expression::Subquery(mut subquery) => {
1331 subquery.this = child;
1332 Expression::Subquery(subquery)
1333 }
1334 Expression::Exists(mut exists) => {
1335 exists.this = child;
1336 Expression::Exists(exists)
1337 }
1338 Expression::TableArgument(mut arg) => {
1339 arg.this = child;
1340 Expression::TableArgument(arg)
1341 }
1342 _ => {
1343 return Err(crate::error::Error::Internal(
1344 "unexpected unary transform task".to_string(),
1345 ));
1346 }
1347 };
1348 results.push(transform_fn(rebuilt)?);
1349 }
1350 FinishTask::Binary(expr) => {
1351 let mut children = transform_pop_results(&mut results, 2)?.into_iter();
1352 let left = children.next().expect("left child");
1353 let right = children.next().expect("right child");
1354 let rebuilt = match expr {
1355 Expression::And(mut op) => {
1356 op.left = left;
1357 op.right = right;
1358 Expression::And(op)
1359 }
1360 Expression::Or(mut op) => {
1361 op.left = left;
1362 op.right = right;
1363 Expression::Or(op)
1364 }
1365 Expression::Add(mut op) => {
1366 op.left = left;
1367 op.right = right;
1368 Expression::Add(op)
1369 }
1370 Expression::Sub(mut op) => {
1371 op.left = left;
1372 op.right = right;
1373 Expression::Sub(op)
1374 }
1375 Expression::Mul(mut op) => {
1376 op.left = left;
1377 op.right = right;
1378 Expression::Mul(op)
1379 }
1380 Expression::Div(mut op) => {
1381 op.left = left;
1382 op.right = right;
1383 Expression::Div(op)
1384 }
1385 Expression::Eq(mut op) => {
1386 op.left = left;
1387 op.right = right;
1388 Expression::Eq(op)
1389 }
1390 Expression::Lt(mut op) => {
1391 op.left = left;
1392 op.right = right;
1393 Expression::Lt(op)
1394 }
1395 Expression::Gt(mut op) => {
1396 op.left = left;
1397 op.right = right;
1398 Expression::Gt(op)
1399 }
1400 Expression::Neq(mut op) => {
1401 op.left = left;
1402 op.right = right;
1403 Expression::Neq(op)
1404 }
1405 Expression::Lte(mut op) => {
1406 op.left = left;
1407 op.right = right;
1408 Expression::Lte(op)
1409 }
1410 Expression::Gte(mut op) => {
1411 op.left = left;
1412 op.right = right;
1413 Expression::Gte(op)
1414 }
1415 Expression::Mod(mut op) => {
1416 op.left = left;
1417 op.right = right;
1418 Expression::Mod(op)
1419 }
1420 Expression::Concat(mut op) => {
1421 op.left = left;
1422 op.right = right;
1423 Expression::Concat(op)
1424 }
1425 Expression::BitwiseAnd(mut op) => {
1426 op.left = left;
1427 op.right = right;
1428 Expression::BitwiseAnd(op)
1429 }
1430 Expression::BitwiseOr(mut op) => {
1431 op.left = left;
1432 op.right = right;
1433 Expression::BitwiseOr(op)
1434 }
1435 Expression::BitwiseXor(mut op) => {
1436 op.left = left;
1437 op.right = right;
1438 Expression::BitwiseXor(op)
1439 }
1440 Expression::Is(mut op) => {
1441 op.left = left;
1442 op.right = right;
1443 Expression::Is(op)
1444 }
1445 Expression::MemberOf(mut op) => {
1446 op.left = left;
1447 op.right = right;
1448 Expression::MemberOf(op)
1449 }
1450 Expression::ArrayContainsAll(mut op) => {
1451 op.left = left;
1452 op.right = right;
1453 Expression::ArrayContainsAll(op)
1454 }
1455 Expression::ArrayContainedBy(mut op) => {
1456 op.left = left;
1457 op.right = right;
1458 Expression::ArrayContainedBy(op)
1459 }
1460 Expression::ArrayOverlaps(mut op) => {
1461 op.left = left;
1462 op.right = right;
1463 Expression::ArrayOverlaps(op)
1464 }
1465 Expression::TsMatch(mut op) => {
1466 op.left = left;
1467 op.right = right;
1468 Expression::TsMatch(op)
1469 }
1470 Expression::Adjacent(mut op) => {
1471 op.left = left;
1472 op.right = right;
1473 Expression::Adjacent(op)
1474 }
1475 Expression::Like(mut like) => {
1476 like.left = left;
1477 like.right = right;
1478 Expression::Like(like)
1479 }
1480 Expression::ILike(mut like) => {
1481 like.left = left;
1482 like.right = right;
1483 Expression::ILike(like)
1484 }
1485 _ => {
1486 return Err(crate::error::Error::Internal(
1487 "unexpected binary transform task".to_string(),
1488 ));
1489 }
1490 };
1491 results.push(transform_fn(rebuilt)?);
1492 }
1493 FinishTask::CastLike(expr) => {
1494 let child = transform_pop_result(&mut results)?;
1495 let rebuilt = match expr {
1496 Expression::Cast(mut cast) => {
1497 cast.this = child;
1498 cast.to = transform_data_type_recursive(cast.to, transform_fn)?;
1499 Expression::Cast(cast)
1500 }
1501 Expression::TryCast(mut cast) => {
1502 cast.this = child;
1503 cast.to = transform_data_type_recursive(cast.to, transform_fn)?;
1504 Expression::TryCast(cast)
1505 }
1506 Expression::SafeCast(mut cast) => {
1507 cast.this = child;
1508 cast.to = transform_data_type_recursive(cast.to, transform_fn)?;
1509 Expression::SafeCast(cast)
1510 }
1511 _ => {
1512 return Err(crate::error::Error::Internal(
1513 "unexpected cast transform task".to_string(),
1514 ));
1515 }
1516 };
1517 results.push(transform_fn(rebuilt)?);
1518 }
1519 FinishTask::List(expr, count) => {
1520 let children = transform_pop_results(&mut results, count)?;
1521 let rebuilt = match expr {
1522 Expression::Function(mut function) => {
1523 function.args = children;
1524 Expression::Function(function)
1525 }
1526 Expression::Array(mut array) => {
1527 array.expressions = children;
1528 Expression::Array(array)
1529 }
1530 Expression::Tuple(mut tuple) => {
1531 tuple.expressions = children;
1532 Expression::Tuple(tuple)
1533 }
1534 Expression::ArrayFunc(mut array) => {
1535 array.expressions = children;
1536 Expression::ArrayFunc(array)
1537 }
1538 Expression::Coalesce(mut func) => {
1539 func.expressions = children;
1540 Expression::Coalesce(func)
1541 }
1542 Expression::Greatest(mut func) => {
1543 func.expressions = children;
1544 Expression::Greatest(func)
1545 }
1546 Expression::Least(mut func) => {
1547 func.expressions = children;
1548 Expression::Least(func)
1549 }
1550 Expression::ArrayConcat(mut func) => {
1551 func.expressions = children;
1552 Expression::ArrayConcat(func)
1553 }
1554 Expression::ArrayIntersect(mut func) => {
1555 func.expressions = children;
1556 Expression::ArrayIntersect(func)
1557 }
1558 Expression::ArrayZip(mut func) => {
1559 func.expressions = children;
1560 Expression::ArrayZip(func)
1561 }
1562 Expression::MapConcat(mut func) => {
1563 func.expressions = children;
1564 Expression::MapConcat(func)
1565 }
1566 Expression::JsonArray(mut func) => {
1567 func.expressions = children;
1568 Expression::JsonArray(func)
1569 }
1570 _ => {
1571 return Err(crate::error::Error::Internal(
1572 "unexpected list transform task".to_string(),
1573 ));
1574 }
1575 };
1576 results.push(transform_fn(rebuilt)?);
1577 }
1578 FinishTask::From(mut from, count) => {
1579 from.expressions = transform_pop_results(&mut results, count)?;
1580 results.push(transform_fn(Expression::From(Box::new(from)))?);
1581 }
1582 FinishTask::Select(frame) => {
1583 let mut select = *frame.select;
1584
1585 if frame.qualify_present {
1586 if let Some(ref mut qualify) = select.qualify {
1587 qualify.this = transform_pop_result(&mut results)?;
1588 }
1589 }
1590 if frame.having_present {
1591 if let Some(ref mut having) = select.having {
1592 having.this = transform_pop_result(&mut results)?;
1593 }
1594 }
1595 if frame.group_by_count > 0 {
1596 if let Some(ref mut group_by) = select.group_by {
1597 group_by.expressions =
1598 transform_pop_results(&mut results, frame.group_by_count)?;
1599 }
1600 }
1601 if frame.where_present {
1602 if let Some(ref mut where_clause) = select.where_clause {
1603 where_clause.this = transform_pop_result(&mut results)?;
1604 }
1605 }
1606 if frame.from_present {
1607 match transform_pop_result(&mut results)? {
1608 Expression::From(from) => {
1609 select.from = Some(*from);
1610 }
1611 _ => {
1612 return Err(crate::error::Error::Internal(
1613 "expected FROM expression result".to_string(),
1614 ));
1615 }
1616 }
1617 }
1618 select.expressions = transform_pop_results(&mut results, frame.expr_count)?;
1619
1620 select.joins = select
1621 .joins
1622 .into_iter()
1623 .map(|mut join| {
1624 join.this = transform_recursive(join.this, transform_fn)?;
1625 if let Some(on) = join.on.take() {
1626 join.on = Some(transform_recursive(on, transform_fn)?);
1627 }
1628 match transform_fn(Expression::Join(Box::new(join)))? {
1629 Expression::Join(j) => Ok(*j),
1630 _ => Err(crate::error::Error::parse(
1631 "Join transformation returned non-join expression",
1632 0,
1633 0,
1634 0,
1635 0,
1636 )),
1637 }
1638 })
1639 .collect::<Result<Vec<_>>>()?;
1640
1641 select.lateral_views = select
1642 .lateral_views
1643 .into_iter()
1644 .map(|mut lv| {
1645 lv.this = transform_recursive(lv.this, transform_fn)?;
1646 Ok(lv)
1647 })
1648 .collect::<Result<Vec<_>>>()?;
1649
1650 if let Some(mut with) = select.with.take() {
1651 with.ctes = with
1652 .ctes
1653 .into_iter()
1654 .map(|mut cte| {
1655 let original = cte.this.clone();
1656 cte.this =
1657 transform_recursive(cte.this, transform_fn).unwrap_or(original);
1658 cte
1659 })
1660 .collect();
1661 select.with = Some(with);
1662 }
1663
1664 if let Some(mut order) = select.order_by.take() {
1665 order.expressions = order
1666 .expressions
1667 .into_iter()
1668 .map(|o| {
1669 let mut o = o;
1670 let original = o.this.clone();
1671 o.this =
1672 transform_recursive(o.this, transform_fn).unwrap_or(original);
1673 match transform_fn(Expression::Ordered(Box::new(o.clone()))) {
1674 Ok(Expression::Ordered(transformed)) => *transformed,
1675 Ok(_) | Err(_) => o,
1676 }
1677 })
1678 .collect();
1679 select.order_by = Some(order);
1680 }
1681
1682 if let Some(ref mut windows) = select.windows {
1683 for nw in windows.iter_mut() {
1684 nw.spec.order_by = std::mem::take(&mut nw.spec.order_by)
1685 .into_iter()
1686 .map(|o| {
1687 let mut o = o;
1688 let original = o.this.clone();
1689 o.this = transform_recursive(o.this, transform_fn)
1690 .unwrap_or(original);
1691 match transform_fn(Expression::Ordered(Box::new(o.clone()))) {
1692 Ok(Expression::Ordered(transformed)) => *transformed,
1693 Ok(_) | Err(_) => o,
1694 }
1695 })
1696 .collect();
1697 }
1698 }
1699
1700 results.push(transform_fn(Expression::Select(Box::new(select)))?);
1701 }
1702 FinishTask::SetOp(expr) => {
1703 let mut children = transform_pop_results(&mut results, 2)?.into_iter();
1704 let left = children.next().expect("left child");
1705 let right = children.next().expect("right child");
1706
1707 let rebuilt = match expr {
1708 Expression::Union(mut union) => {
1709 union.left = left;
1710 union.right = right;
1711 if let Some(mut order) = union.order_by.take() {
1712 order.expressions = order
1713 .expressions
1714 .into_iter()
1715 .map(|o| {
1716 let mut o = o;
1717 let original = o.this.clone();
1718 o.this = transform_recursive(o.this, transform_fn)
1719 .unwrap_or(original);
1720 match transform_fn(Expression::Ordered(Box::new(o.clone())))
1721 {
1722 Ok(Expression::Ordered(transformed)) => *transformed,
1723 Ok(_) | Err(_) => o,
1724 }
1725 })
1726 .collect();
1727 union.order_by = Some(order);
1728 }
1729 if let Some(mut with) = union.with.take() {
1730 with.ctes = with
1731 .ctes
1732 .into_iter()
1733 .map(|mut cte| {
1734 let original = cte.this.clone();
1735 cte.this = transform_recursive(cte.this, transform_fn)
1736 .unwrap_or(original);
1737 cte
1738 })
1739 .collect();
1740 union.with = Some(with);
1741 }
1742 Expression::Union(union)
1743 }
1744 Expression::Intersect(mut intersect) => {
1745 intersect.left = left;
1746 intersect.right = right;
1747 if let Some(mut order) = intersect.order_by.take() {
1748 order.expressions = order
1749 .expressions
1750 .into_iter()
1751 .map(|o| {
1752 let mut o = o;
1753 let original = o.this.clone();
1754 o.this = transform_recursive(o.this, transform_fn)
1755 .unwrap_or(original);
1756 match transform_fn(Expression::Ordered(Box::new(o.clone())))
1757 {
1758 Ok(Expression::Ordered(transformed)) => *transformed,
1759 Ok(_) | Err(_) => o,
1760 }
1761 })
1762 .collect();
1763 intersect.order_by = Some(order);
1764 }
1765 if let Some(mut with) = intersect.with.take() {
1766 with.ctes = with
1767 .ctes
1768 .into_iter()
1769 .map(|mut cte| {
1770 let original = cte.this.clone();
1771 cte.this = transform_recursive(cte.this, transform_fn)
1772 .unwrap_or(original);
1773 cte
1774 })
1775 .collect();
1776 intersect.with = Some(with);
1777 }
1778 Expression::Intersect(intersect)
1779 }
1780 Expression::Except(mut except) => {
1781 except.left = left;
1782 except.right = right;
1783 if let Some(mut order) = except.order_by.take() {
1784 order.expressions = order
1785 .expressions
1786 .into_iter()
1787 .map(|o| {
1788 let mut o = o;
1789 let original = o.this.clone();
1790 o.this = transform_recursive(o.this, transform_fn)
1791 .unwrap_or(original);
1792 match transform_fn(Expression::Ordered(Box::new(o.clone())))
1793 {
1794 Ok(Expression::Ordered(transformed)) => *transformed,
1795 Ok(_) | Err(_) => o,
1796 }
1797 })
1798 .collect();
1799 except.order_by = Some(order);
1800 }
1801 if let Some(mut with) = except.with.take() {
1802 with.ctes = with
1803 .ctes
1804 .into_iter()
1805 .map(|mut cte| {
1806 let original = cte.this.clone();
1807 cte.this = transform_recursive(cte.this, transform_fn)
1808 .unwrap_or(original);
1809 cte
1810 })
1811 .collect();
1812 except.with = Some(with);
1813 }
1814 Expression::Except(except)
1815 }
1816 _ => {
1817 return Err(crate::error::Error::Internal(
1818 "unexpected set-op transform task".to_string(),
1819 ));
1820 }
1821 };
1822 results.push(transform_fn(rebuilt)?);
1823 }
1824 },
1825 }
1826 }
1827
1828 match results.len() {
1829 1 => Ok(results.pop().expect("single transform result")),
1830 _ => Err(crate::error::Error::Internal(
1831 "unexpected transform result stack size".to_string(),
1832 )),
1833 }
1834}
1835
1836#[cfg(any(
1837 feature = "transpile",
1838 feature = "ast-tools",
1839 feature = "generate",
1840 feature = "semantic"
1841))]
1842fn transform_table_ref_recursive<F>(table: TableRef, transform_fn: &F) -> Result<TableRef>
1843where
1844 F: Fn(Expression) -> Result<Expression>,
1845{
1846 match transform_recursive(Expression::Table(Box::new(table)), transform_fn)? {
1847 Expression::Table(table) => Ok(*table),
1848 _ => Err(crate::error::Error::parse(
1849 "TableRef transformation returned non-table expression",
1850 0,
1851 0,
1852 0,
1853 0,
1854 )),
1855 }
1856}
1857
1858#[cfg(any(
1859 feature = "transpile",
1860 feature = "ast-tools",
1861 feature = "generate",
1862 feature = "semantic"
1863))]
1864fn transform_from_recursive<F>(from: From, transform_fn: &F) -> Result<From>
1865where
1866 F: Fn(Expression) -> Result<Expression>,
1867{
1868 match transform_recursive(Expression::From(Box::new(from)), transform_fn)? {
1869 Expression::From(from) => Ok(*from),
1870 _ => Err(crate::error::Error::parse(
1871 "FROM transformation returned non-FROM expression",
1872 0,
1873 0,
1874 0,
1875 0,
1876 )),
1877 }
1878}
1879
1880#[cfg(any(
1881 feature = "transpile",
1882 feature = "ast-tools",
1883 feature = "generate",
1884 feature = "semantic"
1885))]
1886fn transform_join_recursive<F>(mut join: Join, transform_fn: &F) -> Result<Join>
1887where
1888 F: Fn(Expression) -> Result<Expression>,
1889{
1890 join.this = transform_recursive(join.this, transform_fn)?;
1891 if let Some(on) = join.on.take() {
1892 join.on = Some(transform_recursive(on, transform_fn)?);
1893 }
1894 if let Some(match_condition) = join.match_condition.take() {
1895 join.match_condition = Some(transform_recursive(match_condition, transform_fn)?);
1896 }
1897 join.pivots = join
1898 .pivots
1899 .into_iter()
1900 .map(|pivot| transform_recursive(pivot, transform_fn))
1901 .collect::<Result<Vec<_>>>()?;
1902
1903 match transform_fn(Expression::Join(Box::new(join)))? {
1904 Expression::Join(join) => Ok(*join),
1905 _ => Err(crate::error::Error::parse(
1906 "Join transformation returned non-join expression",
1907 0,
1908 0,
1909 0,
1910 0,
1911 )),
1912 }
1913}
1914
1915#[cfg(any(
1916 feature = "transpile",
1917 feature = "ast-tools",
1918 feature = "generate",
1919 feature = "semantic"
1920))]
1921fn transform_output_clause_recursive<F>(
1922 mut output: OutputClause,
1923 transform_fn: &F,
1924) -> Result<OutputClause>
1925where
1926 F: Fn(Expression) -> Result<Expression>,
1927{
1928 output.columns = output
1929 .columns
1930 .into_iter()
1931 .map(|column| transform_recursive(column, transform_fn))
1932 .collect::<Result<Vec<_>>>()?;
1933 if let Some(into_table) = output.into_table.take() {
1934 output.into_table = Some(transform_recursive(into_table, transform_fn)?);
1935 }
1936 Ok(output)
1937}
1938
1939#[cfg(any(
1940 feature = "transpile",
1941 feature = "ast-tools",
1942 feature = "generate",
1943 feature = "semantic"
1944))]
1945fn transform_with_recursive<F>(mut with: With, transform_fn: &F) -> Result<With>
1946where
1947 F: Fn(Expression) -> Result<Expression>,
1948{
1949 with.ctes = with
1950 .ctes
1951 .into_iter()
1952 .map(|mut cte| {
1953 cte.this = transform_recursive(cte.this, transform_fn)?;
1954 Ok(cte)
1955 })
1956 .collect::<Result<Vec<_>>>()?;
1957 if let Some(search) = with.search.take() {
1958 with.search = Some(Box::new(transform_recursive(*search, transform_fn)?));
1959 }
1960 Ok(with)
1961}
1962
1963#[cfg(any(
1964 feature = "transpile",
1965 feature = "ast-tools",
1966 feature = "generate",
1967 feature = "semantic"
1968))]
1969fn transform_order_by_recursive<F>(mut order: OrderBy, transform_fn: &F) -> Result<OrderBy>
1970where
1971 F: Fn(Expression) -> Result<Expression>,
1972{
1973 order.expressions = order
1974 .expressions
1975 .into_iter()
1976 .map(|mut ordered| {
1977 let original = ordered.this.clone();
1978 ordered.this = transform_recursive(ordered.this, transform_fn).unwrap_or(original);
1979 match transform_fn(Expression::Ordered(Box::new(ordered.clone()))) {
1980 Ok(Expression::Ordered(transformed)) => Ok(*transformed),
1981 Ok(_) | Err(_) => Ok(ordered),
1982 }
1983 })
1984 .collect::<Result<Vec<_>>>()?;
1985 Ok(order)
1986}
1987
1988#[cfg(any(
1989 feature = "transpile",
1990 feature = "ast-tools",
1991 feature = "generate",
1992 feature = "semantic"
1993))]
1994fn transform_recursive_reference<F>(expr: Expression, transform_fn: &F) -> Result<Expression>
1995where
1996 F: Fn(Expression) -> Result<Expression>,
1997{
1998 use crate::expressions::BinaryOp;
1999
2000 // Helper macro to recurse into AggFunc-based expressions (this, filter, order_by, having_max, limit).
2001 macro_rules! recurse_agg {
2002 ($variant:ident, $f:expr) => {{
2003 let mut f = $f;
2004 f.this = transform_recursive(f.this, transform_fn)?;
2005 if let Some(filter) = f.filter.take() {
2006 f.filter = Some(transform_recursive(filter, transform_fn)?);
2007 }
2008 for ord in &mut f.order_by {
2009 ord.this = transform_recursive(
2010 std::mem::replace(&mut ord.this, Expression::Null(crate::expressions::Null)),
2011 transform_fn,
2012 )?;
2013 }
2014 if let Some((ref mut expr, _)) = f.having_max {
2015 *expr = Box::new(transform_recursive(
2016 std::mem::replace(expr.as_mut(), Expression::Null(crate::expressions::Null)),
2017 transform_fn,
2018 )?);
2019 }
2020 if let Some(limit) = f.limit.take() {
2021 f.limit = Some(Box::new(transform_recursive(*limit, transform_fn)?));
2022 }
2023 Expression::$variant(f)
2024 }};
2025 }
2026
2027 // Helper macro to transform binary ops with Box<BinaryOp>
2028 macro_rules! transform_binary {
2029 ($variant:ident, $op:expr) => {{
2030 let left = transform_recursive($op.left, transform_fn)?;
2031 let right = transform_recursive($op.right, transform_fn)?;
2032 Expression::$variant(Box::new(BinaryOp {
2033 left,
2034 right,
2035 left_comments: $op.left_comments,
2036 operator_comments: $op.operator_comments,
2037 trailing_comments: $op.trailing_comments,
2038 inferred_type: $op.inferred_type,
2039 }))
2040 }};
2041 }
2042
2043 // Fast path: leaf nodes never need child traversal, apply transform directly
2044 if matches!(
2045 &expr,
2046 Expression::Literal(_)
2047 | Expression::Boolean(_)
2048 | Expression::Null(_)
2049 | Expression::Identifier(_)
2050 | Expression::Star(_)
2051 | Expression::Parameter(_)
2052 | Expression::Placeholder(_)
2053 | Expression::SessionParameter(_)
2054 ) {
2055 return transform_fn(expr);
2056 }
2057
2058 // First recursively transform children, then apply the transform function
2059 let expr = match expr {
2060 Expression::Select(mut select) => {
2061 select.expressions = select
2062 .expressions
2063 .into_iter()
2064 .map(|e| transform_recursive(e, transform_fn))
2065 .collect::<Result<Vec<_>>>()?;
2066
2067 // Transform FROM clause
2068 if let Some(mut from) = select.from.take() {
2069 from.expressions = from
2070 .expressions
2071 .into_iter()
2072 .map(|e| transform_recursive(e, transform_fn))
2073 .collect::<Result<Vec<_>>>()?;
2074 select.from = Some(from);
2075 }
2076
2077 // Transform JOINs - important for CROSS APPLY / LATERAL transformations
2078 select.joins = select
2079 .joins
2080 .into_iter()
2081 .map(|mut join| {
2082 join.this = transform_recursive(join.this, transform_fn)?;
2083 if let Some(on) = join.on.take() {
2084 join.on = Some(transform_recursive(on, transform_fn)?);
2085 }
2086 // Wrap join in Expression::Join to allow transform_fn to transform it
2087 match transform_fn(Expression::Join(Box::new(join)))? {
2088 Expression::Join(j) => Ok(*j),
2089 _ => Err(crate::error::Error::parse(
2090 "Join transformation returned non-join expression",
2091 0,
2092 0,
2093 0,
2094 0,
2095 )),
2096 }
2097 })
2098 .collect::<Result<Vec<_>>>()?;
2099
2100 // Transform LATERAL VIEW expressions (Hive/Spark)
2101 select.lateral_views = select
2102 .lateral_views
2103 .into_iter()
2104 .map(|mut lv| {
2105 lv.this = transform_recursive(lv.this, transform_fn)?;
2106 Ok(lv)
2107 })
2108 .collect::<Result<Vec<_>>>()?;
2109
2110 // Transform WHERE clause
2111 if let Some(mut where_clause) = select.where_clause.take() {
2112 where_clause.this = transform_recursive(where_clause.this, transform_fn)?;
2113 select.where_clause = Some(where_clause);
2114 }
2115
2116 // Transform GROUP BY
2117 if let Some(mut group_by) = select.group_by.take() {
2118 group_by.expressions = group_by
2119 .expressions
2120 .into_iter()
2121 .map(|e| transform_recursive(e, transform_fn))
2122 .collect::<Result<Vec<_>>>()?;
2123 select.group_by = Some(group_by);
2124 }
2125
2126 // Transform HAVING
2127 if let Some(mut having) = select.having.take() {
2128 having.this = transform_recursive(having.this, transform_fn)?;
2129 select.having = Some(having);
2130 }
2131
2132 // Transform WITH (CTEs)
2133 if let Some(mut with) = select.with.take() {
2134 with.ctes = with
2135 .ctes
2136 .into_iter()
2137 .map(|mut cte| {
2138 let original = cte.this.clone();
2139 cte.this = transform_recursive(cte.this, transform_fn).unwrap_or(original);
2140 cte
2141 })
2142 .collect();
2143 select.with = Some(with);
2144 }
2145
2146 // Transform ORDER BY
2147 if let Some(mut order) = select.order_by.take() {
2148 order.expressions = order
2149 .expressions
2150 .into_iter()
2151 .map(|o| {
2152 let mut o = o;
2153 let original = o.this.clone();
2154 o.this = transform_recursive(o.this, transform_fn).unwrap_or(original);
2155 // Also apply transform to the Ordered wrapper itself (for NULLS FIRST etc.)
2156 match transform_fn(Expression::Ordered(Box::new(o.clone()))) {
2157 Ok(Expression::Ordered(transformed)) => *transformed,
2158 Ok(_) | Err(_) => o,
2159 }
2160 })
2161 .collect();
2162 select.order_by = Some(order);
2163 }
2164
2165 // Transform WINDOW clause order_by
2166 if let Some(ref mut windows) = select.windows {
2167 for nw in windows.iter_mut() {
2168 nw.spec.order_by = std::mem::take(&mut nw.spec.order_by)
2169 .into_iter()
2170 .map(|o| {
2171 let mut o = o;
2172 let original = o.this.clone();
2173 o.this = transform_recursive(o.this, transform_fn).unwrap_or(original);
2174 match transform_fn(Expression::Ordered(Box::new(o.clone()))) {
2175 Ok(Expression::Ordered(transformed)) => *transformed,
2176 Ok(_) | Err(_) => o,
2177 }
2178 })
2179 .collect();
2180 }
2181 }
2182
2183 // Transform QUALIFY
2184 if let Some(mut qual) = select.qualify.take() {
2185 qual.this = transform_recursive(qual.this, transform_fn)?;
2186 select.qualify = Some(qual);
2187 }
2188
2189 Expression::Select(select)
2190 }
2191 Expression::Function(mut f) => {
2192 f.args = f
2193 .args
2194 .into_iter()
2195 .map(|e| transform_recursive(e, transform_fn))
2196 .collect::<Result<Vec<_>>>()?;
2197 Expression::Function(f)
2198 }
2199 Expression::AggregateFunction(mut f) => {
2200 f.args = f
2201 .args
2202 .into_iter()
2203 .map(|e| transform_recursive(e, transform_fn))
2204 .collect::<Result<Vec<_>>>()?;
2205 if let Some(filter) = f.filter {
2206 f.filter = Some(transform_recursive(filter, transform_fn)?);
2207 }
2208 Expression::AggregateFunction(f)
2209 }
2210 Expression::WindowFunction(mut wf) => {
2211 wf.this = transform_recursive(wf.this, transform_fn)?;
2212 wf.over.partition_by = wf
2213 .over
2214 .partition_by
2215 .into_iter()
2216 .map(|e| transform_recursive(e, transform_fn))
2217 .collect::<Result<Vec<_>>>()?;
2218 // Transform order_by items through Expression::Ordered wrapper
2219 wf.over.order_by = wf
2220 .over
2221 .order_by
2222 .into_iter()
2223 .map(|o| {
2224 let mut o = o;
2225 o.this = transform_recursive(o.this, transform_fn)?;
2226 match transform_fn(Expression::Ordered(Box::new(o)))? {
2227 Expression::Ordered(transformed) => Ok(*transformed),
2228 _ => Err(crate::error::Error::parse(
2229 "Ordered transformation returned non-Ordered expression",
2230 0,
2231 0,
2232 0,
2233 0,
2234 )),
2235 }
2236 })
2237 .collect::<Result<Vec<_>>>()?;
2238 Expression::WindowFunction(wf)
2239 }
2240 Expression::Alias(mut a) => {
2241 a.this = transform_recursive(a.this, transform_fn)?;
2242 Expression::Alias(a)
2243 }
2244 Expression::Cast(mut c) => {
2245 c.this = transform_recursive(c.this, transform_fn)?;
2246 // Also transform the target data type (recursively for nested types like ARRAY<INT>, STRUCT<a INT>)
2247 c.to = transform_data_type_recursive(c.to, transform_fn)?;
2248 Expression::Cast(c)
2249 }
2250 Expression::And(op) => transform_binary!(And, *op),
2251 Expression::Or(op) => transform_binary!(Or, *op),
2252 Expression::Add(op) => transform_binary!(Add, *op),
2253 Expression::Sub(op) => transform_binary!(Sub, *op),
2254 Expression::Mul(op) => transform_binary!(Mul, *op),
2255 Expression::Div(op) => transform_binary!(Div, *op),
2256 Expression::Eq(op) => transform_binary!(Eq, *op),
2257 Expression::Lt(op) => transform_binary!(Lt, *op),
2258 Expression::Gt(op) => transform_binary!(Gt, *op),
2259 Expression::Paren(mut p) => {
2260 p.this = transform_recursive(p.this, transform_fn)?;
2261 Expression::Paren(p)
2262 }
2263 Expression::Coalesce(mut f) => {
2264 f.expressions = f
2265 .expressions
2266 .into_iter()
2267 .map(|e| transform_recursive(e, transform_fn))
2268 .collect::<Result<Vec<_>>>()?;
2269 Expression::Coalesce(f)
2270 }
2271 Expression::IfNull(mut f) => {
2272 f.this = transform_recursive(f.this, transform_fn)?;
2273 f.expression = transform_recursive(f.expression, transform_fn)?;
2274 Expression::IfNull(f)
2275 }
2276 Expression::Nvl(mut f) => {
2277 f.this = transform_recursive(f.this, transform_fn)?;
2278 f.expression = transform_recursive(f.expression, transform_fn)?;
2279 Expression::Nvl(f)
2280 }
2281 Expression::In(mut i) => {
2282 i.this = transform_recursive(i.this, transform_fn)?;
2283 i.expressions = i
2284 .expressions
2285 .into_iter()
2286 .map(|e| transform_recursive(e, transform_fn))
2287 .collect::<Result<Vec<_>>>()?;
2288 if let Some(query) = i.query {
2289 i.query = Some(transform_recursive(query, transform_fn)?);
2290 }
2291 Expression::In(i)
2292 }
2293 Expression::Not(mut n) => {
2294 n.this = transform_recursive(n.this, transform_fn)?;
2295 Expression::Not(n)
2296 }
2297 Expression::ArraySlice(mut s) => {
2298 s.this = transform_recursive(s.this, transform_fn)?;
2299 if let Some(start) = s.start {
2300 s.start = Some(transform_recursive(start, transform_fn)?);
2301 }
2302 if let Some(end) = s.end {
2303 s.end = Some(transform_recursive(end, transform_fn)?);
2304 }
2305 Expression::ArraySlice(s)
2306 }
2307 Expression::Subscript(mut s) => {
2308 s.this = transform_recursive(s.this, transform_fn)?;
2309 s.index = transform_recursive(s.index, transform_fn)?;
2310 Expression::Subscript(s)
2311 }
2312 Expression::Array(mut a) => {
2313 a.expressions = a
2314 .expressions
2315 .into_iter()
2316 .map(|e| transform_recursive(e, transform_fn))
2317 .collect::<Result<Vec<_>>>()?;
2318 Expression::Array(a)
2319 }
2320 Expression::Struct(mut s) => {
2321 let mut new_fields = Vec::new();
2322 for (name, expr) in s.fields {
2323 let transformed = transform_recursive(expr, transform_fn)?;
2324 new_fields.push((name, transformed));
2325 }
2326 s.fields = new_fields;
2327 Expression::Struct(s)
2328 }
2329 Expression::NamedArgument(mut na) => {
2330 na.value = transform_recursive(na.value, transform_fn)?;
2331 Expression::NamedArgument(na)
2332 }
2333 Expression::MapFunc(mut m) => {
2334 m.keys = m
2335 .keys
2336 .into_iter()
2337 .map(|e| transform_recursive(e, transform_fn))
2338 .collect::<Result<Vec<_>>>()?;
2339 m.values = m
2340 .values
2341 .into_iter()
2342 .map(|e| transform_recursive(e, transform_fn))
2343 .collect::<Result<Vec<_>>>()?;
2344 Expression::MapFunc(m)
2345 }
2346 Expression::ArrayFunc(mut a) => {
2347 a.expressions = a
2348 .expressions
2349 .into_iter()
2350 .map(|e| transform_recursive(e, transform_fn))
2351 .collect::<Result<Vec<_>>>()?;
2352 Expression::ArrayFunc(a)
2353 }
2354 Expression::Lambda(mut l) => {
2355 l.body = transform_recursive(l.body, transform_fn)?;
2356 Expression::Lambda(l)
2357 }
2358 Expression::JsonExtract(mut f) => {
2359 f.this = transform_recursive(f.this, transform_fn)?;
2360 f.path = transform_recursive(f.path, transform_fn)?;
2361 Expression::JsonExtract(f)
2362 }
2363 Expression::JsonExtractScalar(mut f) => {
2364 f.this = transform_recursive(f.this, transform_fn)?;
2365 f.path = transform_recursive(f.path, transform_fn)?;
2366 Expression::JsonExtractScalar(f)
2367 }
2368
2369 // ===== UnaryFunc-based expressions =====
2370 // These all have a single `this: Expression` child
2371 Expression::Length(mut f) => {
2372 f.this = transform_recursive(f.this, transform_fn)?;
2373 Expression::Length(f)
2374 }
2375 Expression::Upper(mut f) => {
2376 f.this = transform_recursive(f.this, transform_fn)?;
2377 Expression::Upper(f)
2378 }
2379 Expression::Lower(mut f) => {
2380 f.this = transform_recursive(f.this, transform_fn)?;
2381 Expression::Lower(f)
2382 }
2383 Expression::LTrim(mut f) => {
2384 f.this = transform_recursive(f.this, transform_fn)?;
2385 Expression::LTrim(f)
2386 }
2387 Expression::RTrim(mut f) => {
2388 f.this = transform_recursive(f.this, transform_fn)?;
2389 Expression::RTrim(f)
2390 }
2391 Expression::Reverse(mut f) => {
2392 f.this = transform_recursive(f.this, transform_fn)?;
2393 Expression::Reverse(f)
2394 }
2395 Expression::Abs(mut f) => {
2396 f.this = transform_recursive(f.this, transform_fn)?;
2397 Expression::Abs(f)
2398 }
2399 Expression::Ceil(mut f) => {
2400 f.this = transform_recursive(f.this, transform_fn)?;
2401 Expression::Ceil(f)
2402 }
2403 Expression::Floor(mut f) => {
2404 f.this = transform_recursive(f.this, transform_fn)?;
2405 Expression::Floor(f)
2406 }
2407 Expression::Sign(mut f) => {
2408 f.this = transform_recursive(f.this, transform_fn)?;
2409 Expression::Sign(f)
2410 }
2411 Expression::Sqrt(mut f) => {
2412 f.this = transform_recursive(f.this, transform_fn)?;
2413 Expression::Sqrt(f)
2414 }
2415 Expression::Cbrt(mut f) => {
2416 f.this = transform_recursive(f.this, transform_fn)?;
2417 Expression::Cbrt(f)
2418 }
2419 Expression::Ln(mut f) => {
2420 f.this = transform_recursive(f.this, transform_fn)?;
2421 Expression::Ln(f)
2422 }
2423 Expression::Log(mut f) => {
2424 f.this = transform_recursive(f.this, transform_fn)?;
2425 if let Some(base) = f.base {
2426 f.base = Some(transform_recursive(base, transform_fn)?);
2427 }
2428 Expression::Log(f)
2429 }
2430 Expression::Exp(mut f) => {
2431 f.this = transform_recursive(f.this, transform_fn)?;
2432 Expression::Exp(f)
2433 }
2434 Expression::Date(mut f) => {
2435 f.this = transform_recursive(f.this, transform_fn)?;
2436 Expression::Date(f)
2437 }
2438 Expression::Stddev(f) => recurse_agg!(Stddev, f),
2439 Expression::StddevSamp(f) => recurse_agg!(StddevSamp, f),
2440 Expression::Variance(f) => recurse_agg!(Variance, f),
2441
2442 // ===== BinaryFunc-based expressions =====
2443 Expression::ModFunc(mut f) => {
2444 f.this = transform_recursive(f.this, transform_fn)?;
2445 f.expression = transform_recursive(f.expression, transform_fn)?;
2446 Expression::ModFunc(f)
2447 }
2448 Expression::Power(mut f) => {
2449 f.this = transform_recursive(f.this, transform_fn)?;
2450 f.expression = transform_recursive(f.expression, transform_fn)?;
2451 Expression::Power(f)
2452 }
2453 Expression::MapFromArrays(mut f) => {
2454 f.this = transform_recursive(f.this, transform_fn)?;
2455 f.expression = transform_recursive(f.expression, transform_fn)?;
2456 Expression::MapFromArrays(f)
2457 }
2458 Expression::ElementAt(mut f) => {
2459 f.this = transform_recursive(f.this, transform_fn)?;
2460 f.expression = transform_recursive(f.expression, transform_fn)?;
2461 Expression::ElementAt(f)
2462 }
2463 Expression::MapContainsKey(mut f) => {
2464 f.this = transform_recursive(f.this, transform_fn)?;
2465 f.expression = transform_recursive(f.expression, transform_fn)?;
2466 Expression::MapContainsKey(f)
2467 }
2468 Expression::Left(mut f) => {
2469 f.this = transform_recursive(f.this, transform_fn)?;
2470 f.length = transform_recursive(f.length, transform_fn)?;
2471 Expression::Left(f)
2472 }
2473 Expression::Right(mut f) => {
2474 f.this = transform_recursive(f.this, transform_fn)?;
2475 f.length = transform_recursive(f.length, transform_fn)?;
2476 Expression::Right(f)
2477 }
2478 Expression::Repeat(mut f) => {
2479 f.this = transform_recursive(f.this, transform_fn)?;
2480 f.times = transform_recursive(f.times, transform_fn)?;
2481 Expression::Repeat(f)
2482 }
2483
2484 // ===== Complex function expressions =====
2485 Expression::Substring(mut f) => {
2486 f.this = transform_recursive(f.this, transform_fn)?;
2487 f.start = transform_recursive(f.start, transform_fn)?;
2488 if let Some(len) = f.length {
2489 f.length = Some(transform_recursive(len, transform_fn)?);
2490 }
2491 Expression::Substring(f)
2492 }
2493 Expression::Replace(mut f) => {
2494 f.this = transform_recursive(f.this, transform_fn)?;
2495 f.old = transform_recursive(f.old, transform_fn)?;
2496 f.new = transform_recursive(f.new, transform_fn)?;
2497 Expression::Replace(f)
2498 }
2499 Expression::ConcatWs(mut f) => {
2500 f.separator = transform_recursive(f.separator, transform_fn)?;
2501 f.expressions = f
2502 .expressions
2503 .into_iter()
2504 .map(|e| transform_recursive(e, transform_fn))
2505 .collect::<Result<Vec<_>>>()?;
2506 Expression::ConcatWs(f)
2507 }
2508 Expression::Trim(mut f) => {
2509 f.this = transform_recursive(f.this, transform_fn)?;
2510 if let Some(chars) = f.characters {
2511 f.characters = Some(transform_recursive(chars, transform_fn)?);
2512 }
2513 Expression::Trim(f)
2514 }
2515 Expression::Split(mut f) => {
2516 f.this = transform_recursive(f.this, transform_fn)?;
2517 f.delimiter = transform_recursive(f.delimiter, transform_fn)?;
2518 Expression::Split(f)
2519 }
2520 Expression::Lpad(mut f) => {
2521 f.this = transform_recursive(f.this, transform_fn)?;
2522 f.length = transform_recursive(f.length, transform_fn)?;
2523 if let Some(fill) = f.fill {
2524 f.fill = Some(transform_recursive(fill, transform_fn)?);
2525 }
2526 Expression::Lpad(f)
2527 }
2528 Expression::Rpad(mut f) => {
2529 f.this = transform_recursive(f.this, transform_fn)?;
2530 f.length = transform_recursive(f.length, transform_fn)?;
2531 if let Some(fill) = f.fill {
2532 f.fill = Some(transform_recursive(fill, transform_fn)?);
2533 }
2534 Expression::Rpad(f)
2535 }
2536
2537 // ===== Conditional expressions =====
2538 Expression::Case(mut c) => {
2539 if let Some(operand) = c.operand {
2540 c.operand = Some(transform_recursive(operand, transform_fn)?);
2541 }
2542 c.whens = c
2543 .whens
2544 .into_iter()
2545 .map(|(cond, then)| {
2546 let new_cond = transform_recursive(cond.clone(), transform_fn).unwrap_or(cond);
2547 let new_then = transform_recursive(then.clone(), transform_fn).unwrap_or(then);
2548 (new_cond, new_then)
2549 })
2550 .collect();
2551 if let Some(else_expr) = c.else_ {
2552 c.else_ = Some(transform_recursive(else_expr, transform_fn)?);
2553 }
2554 Expression::Case(c)
2555 }
2556 Expression::IfFunc(mut f) => {
2557 f.condition = transform_recursive(f.condition, transform_fn)?;
2558 f.true_value = transform_recursive(f.true_value, transform_fn)?;
2559 if let Some(false_val) = f.false_value {
2560 f.false_value = Some(transform_recursive(false_val, transform_fn)?);
2561 }
2562 Expression::IfFunc(f)
2563 }
2564
2565 // ===== Date/Time expressions =====
2566 Expression::DateAdd(mut f) => {
2567 f.this = transform_recursive(f.this, transform_fn)?;
2568 f.interval = transform_recursive(f.interval, transform_fn)?;
2569 Expression::DateAdd(f)
2570 }
2571 Expression::DateSub(mut f) => {
2572 f.this = transform_recursive(f.this, transform_fn)?;
2573 f.interval = transform_recursive(f.interval, transform_fn)?;
2574 Expression::DateSub(f)
2575 }
2576 Expression::DateDiff(mut f) => {
2577 f.this = transform_recursive(f.this, transform_fn)?;
2578 f.expression = transform_recursive(f.expression, transform_fn)?;
2579 Expression::DateDiff(f)
2580 }
2581 Expression::DateTrunc(mut f) => {
2582 f.this = transform_recursive(f.this, transform_fn)?;
2583 Expression::DateTrunc(f)
2584 }
2585 Expression::Extract(mut f) => {
2586 f.this = transform_recursive(f.this, transform_fn)?;
2587 Expression::Extract(f)
2588 }
2589
2590 // ===== JSON expressions =====
2591 Expression::JsonObject(mut f) => {
2592 f.pairs = f
2593 .pairs
2594 .into_iter()
2595 .map(|(k, v)| {
2596 let new_k = transform_recursive(k, transform_fn)?;
2597 let new_v = transform_recursive(v, transform_fn)?;
2598 Ok((new_k, new_v))
2599 })
2600 .collect::<Result<Vec<_>>>()?;
2601 Expression::JsonObject(f)
2602 }
2603
2604 // ===== Subquery expressions =====
2605 Expression::Subquery(mut s) => {
2606 s.this = transform_recursive(s.this, transform_fn)?;
2607 Expression::Subquery(s)
2608 }
2609 Expression::Exists(mut e) => {
2610 e.this = transform_recursive(e.this, transform_fn)?;
2611 Expression::Exists(e)
2612 }
2613 Expression::Describe(mut d) => {
2614 d.target = transform_recursive(d.target, transform_fn)?;
2615 Expression::Describe(d)
2616 }
2617
2618 // ===== Set operations =====
2619 Expression::Union(mut u) => {
2620 let left = std::mem::replace(&mut u.left, Expression::Null(Null));
2621 u.left = transform_recursive(left, transform_fn)?;
2622 let right = std::mem::replace(&mut u.right, Expression::Null(Null));
2623 u.right = transform_recursive(right, transform_fn)?;
2624 if let Some(mut order) = u.order_by.take() {
2625 order.expressions = order
2626 .expressions
2627 .into_iter()
2628 .map(|o| {
2629 let mut o = o;
2630 let original = o.this.clone();
2631 o.this = transform_recursive(o.this, transform_fn).unwrap_or(original);
2632 match transform_fn(Expression::Ordered(Box::new(o.clone()))) {
2633 Ok(Expression::Ordered(transformed)) => *transformed,
2634 Ok(_) | Err(_) => o,
2635 }
2636 })
2637 .collect();
2638 u.order_by = Some(order);
2639 }
2640 if let Some(mut with) = u.with.take() {
2641 with.ctes = with
2642 .ctes
2643 .into_iter()
2644 .map(|mut cte| {
2645 let original = cte.this.clone();
2646 cte.this = transform_recursive(cte.this, transform_fn).unwrap_or(original);
2647 cte
2648 })
2649 .collect();
2650 u.with = Some(with);
2651 }
2652 Expression::Union(u)
2653 }
2654 Expression::Intersect(mut i) => {
2655 let left = std::mem::replace(&mut i.left, Expression::Null(Null));
2656 i.left = transform_recursive(left, transform_fn)?;
2657 let right = std::mem::replace(&mut i.right, Expression::Null(Null));
2658 i.right = transform_recursive(right, transform_fn)?;
2659 if let Some(mut order) = i.order_by.take() {
2660 order.expressions = order
2661 .expressions
2662 .into_iter()
2663 .map(|o| {
2664 let mut o = o;
2665 let original = o.this.clone();
2666 o.this = transform_recursive(o.this, transform_fn).unwrap_or(original);
2667 match transform_fn(Expression::Ordered(Box::new(o.clone()))) {
2668 Ok(Expression::Ordered(transformed)) => *transformed,
2669 Ok(_) | Err(_) => o,
2670 }
2671 })
2672 .collect();
2673 i.order_by = Some(order);
2674 }
2675 if let Some(mut with) = i.with.take() {
2676 with.ctes = with
2677 .ctes
2678 .into_iter()
2679 .map(|mut cte| {
2680 let original = cte.this.clone();
2681 cte.this = transform_recursive(cte.this, transform_fn).unwrap_or(original);
2682 cte
2683 })
2684 .collect();
2685 i.with = Some(with);
2686 }
2687 Expression::Intersect(i)
2688 }
2689 Expression::Except(mut e) => {
2690 let left = std::mem::replace(&mut e.left, Expression::Null(Null));
2691 e.left = transform_recursive(left, transform_fn)?;
2692 let right = std::mem::replace(&mut e.right, Expression::Null(Null));
2693 e.right = transform_recursive(right, transform_fn)?;
2694 if let Some(mut order) = e.order_by.take() {
2695 order.expressions = order
2696 .expressions
2697 .into_iter()
2698 .map(|o| {
2699 let mut o = o;
2700 let original = o.this.clone();
2701 o.this = transform_recursive(o.this, transform_fn).unwrap_or(original);
2702 match transform_fn(Expression::Ordered(Box::new(o.clone()))) {
2703 Ok(Expression::Ordered(transformed)) => *transformed,
2704 Ok(_) | Err(_) => o,
2705 }
2706 })
2707 .collect();
2708 e.order_by = Some(order);
2709 }
2710 if let Some(mut with) = e.with.take() {
2711 with.ctes = with
2712 .ctes
2713 .into_iter()
2714 .map(|mut cte| {
2715 let original = cte.this.clone();
2716 cte.this = transform_recursive(cte.this, transform_fn).unwrap_or(original);
2717 cte
2718 })
2719 .collect();
2720 e.with = Some(with);
2721 }
2722 Expression::Except(e)
2723 }
2724
2725 // ===== DML expressions =====
2726 Expression::Insert(mut ins) => {
2727 // Transform VALUES clause expressions
2728 let mut new_values = Vec::new();
2729 for row in ins.values {
2730 let mut new_row = Vec::new();
2731 for e in row {
2732 new_row.push(transform_recursive(e, transform_fn)?);
2733 }
2734 new_values.push(new_row);
2735 }
2736 ins.values = new_values;
2737
2738 // Transform query (for INSERT ... SELECT)
2739 if let Some(query) = ins.query {
2740 ins.query = Some(transform_recursive(query, transform_fn)?);
2741 }
2742
2743 // Transform RETURNING clause
2744 let mut new_returning = Vec::new();
2745 for e in ins.returning {
2746 new_returning.push(transform_recursive(e, transform_fn)?);
2747 }
2748 ins.returning = new_returning;
2749
2750 // Transform ON CONFLICT clause
2751 if let Some(on_conflict) = ins.on_conflict {
2752 ins.on_conflict = Some(Box::new(transform_recursive(*on_conflict, transform_fn)?));
2753 }
2754
2755 Expression::Insert(ins)
2756 }
2757 Expression::Update(mut upd) => {
2758 upd.table = transform_table_ref_recursive(upd.table, transform_fn)?;
2759 upd.extra_tables = upd
2760 .extra_tables
2761 .into_iter()
2762 .map(|table| transform_table_ref_recursive(table, transform_fn))
2763 .collect::<Result<Vec<_>>>()?;
2764 upd.table_joins = upd
2765 .table_joins
2766 .into_iter()
2767 .map(|join| transform_join_recursive(join, transform_fn))
2768 .collect::<Result<Vec<_>>>()?;
2769 upd.set = upd
2770 .set
2771 .into_iter()
2772 .map(|(id, val)| {
2773 let new_val = transform_recursive(val.clone(), transform_fn).unwrap_or(val);
2774 (id, new_val)
2775 })
2776 .collect();
2777 if let Some(from_clause) = upd.from_clause.take() {
2778 upd.from_clause = Some(transform_from_recursive(from_clause, transform_fn)?);
2779 }
2780 upd.from_joins = upd
2781 .from_joins
2782 .into_iter()
2783 .map(|join| transform_join_recursive(join, transform_fn))
2784 .collect::<Result<Vec<_>>>()?;
2785 if let Some(mut where_clause) = upd.where_clause.take() {
2786 where_clause.this = transform_recursive(where_clause.this, transform_fn)?;
2787 upd.where_clause = Some(where_clause);
2788 }
2789 upd.returning = upd
2790 .returning
2791 .into_iter()
2792 .map(|expr| transform_recursive(expr, transform_fn))
2793 .collect::<Result<Vec<_>>>()?;
2794 if let Some(output) = upd.output.take() {
2795 upd.output = Some(transform_output_clause_recursive(output, transform_fn)?);
2796 }
2797 if let Some(with) = upd.with.take() {
2798 upd.with = Some(transform_with_recursive(with, transform_fn)?);
2799 }
2800 if let Some(limit) = upd.limit.take() {
2801 upd.limit = Some(transform_recursive(limit, transform_fn)?);
2802 }
2803 if let Some(order_by) = upd.order_by.take() {
2804 upd.order_by = Some(transform_order_by_recursive(order_by, transform_fn)?);
2805 }
2806 Expression::Update(upd)
2807 }
2808 Expression::Delete(mut del) => {
2809 del.table = transform_table_ref_recursive(del.table, transform_fn)?;
2810 del.using = del
2811 .using
2812 .into_iter()
2813 .map(|table| transform_table_ref_recursive(table, transform_fn))
2814 .collect::<Result<Vec<_>>>()?;
2815 if let Some(mut where_clause) = del.where_clause.take() {
2816 where_clause.this = transform_recursive(where_clause.this, transform_fn)?;
2817 del.where_clause = Some(where_clause);
2818 }
2819 if let Some(output) = del.output.take() {
2820 del.output = Some(transform_output_clause_recursive(output, transform_fn)?);
2821 }
2822 if let Some(with) = del.with.take() {
2823 del.with = Some(transform_with_recursive(with, transform_fn)?);
2824 }
2825 if let Some(limit) = del.limit.take() {
2826 del.limit = Some(transform_recursive(limit, transform_fn)?);
2827 }
2828 if let Some(order_by) = del.order_by.take() {
2829 del.order_by = Some(transform_order_by_recursive(order_by, transform_fn)?);
2830 }
2831 del.returning = del
2832 .returning
2833 .into_iter()
2834 .map(|expr| transform_recursive(expr, transform_fn))
2835 .collect::<Result<Vec<_>>>()?;
2836 del.tables = del
2837 .tables
2838 .into_iter()
2839 .map(|table| transform_table_ref_recursive(table, transform_fn))
2840 .collect::<Result<Vec<_>>>()?;
2841 del.joins = del
2842 .joins
2843 .into_iter()
2844 .map(|join| transform_join_recursive(join, transform_fn))
2845 .collect::<Result<Vec<_>>>()?;
2846 Expression::Delete(del)
2847 }
2848
2849 // ===== CTE expressions =====
2850 Expression::With(mut w) => {
2851 w.ctes = w
2852 .ctes
2853 .into_iter()
2854 .map(|mut cte| {
2855 let original = cte.this.clone();
2856 cte.this = transform_recursive(cte.this, transform_fn).unwrap_or(original);
2857 cte
2858 })
2859 .collect();
2860 Expression::With(w)
2861 }
2862 Expression::Cte(mut c) => {
2863 c.this = transform_recursive(c.this, transform_fn)?;
2864 Expression::Cte(c)
2865 }
2866
2867 // ===== Order expressions =====
2868 Expression::Ordered(mut o) => {
2869 o.this = transform_recursive(o.this, transform_fn)?;
2870 Expression::Ordered(o)
2871 }
2872
2873 // ===== Negation =====
2874 Expression::Neg(mut n) => {
2875 n.this = transform_recursive(n.this, transform_fn)?;
2876 Expression::Neg(n)
2877 }
2878
2879 // ===== Between =====
2880 Expression::Between(mut b) => {
2881 b.this = transform_recursive(b.this, transform_fn)?;
2882 b.low = transform_recursive(b.low, transform_fn)?;
2883 b.high = transform_recursive(b.high, transform_fn)?;
2884 Expression::Between(b)
2885 }
2886 Expression::IsNull(mut i) => {
2887 i.this = transform_recursive(i.this, transform_fn)?;
2888 Expression::IsNull(i)
2889 }
2890 Expression::IsTrue(mut i) => {
2891 i.this = transform_recursive(i.this, transform_fn)?;
2892 Expression::IsTrue(i)
2893 }
2894 Expression::IsFalse(mut i) => {
2895 i.this = transform_recursive(i.this, transform_fn)?;
2896 Expression::IsFalse(i)
2897 }
2898
2899 // ===== Like expressions =====
2900 Expression::Like(mut l) => {
2901 l.left = transform_recursive(l.left, transform_fn)?;
2902 l.right = transform_recursive(l.right, transform_fn)?;
2903 Expression::Like(l)
2904 }
2905 Expression::ILike(mut l) => {
2906 l.left = transform_recursive(l.left, transform_fn)?;
2907 l.right = transform_recursive(l.right, transform_fn)?;
2908 Expression::ILike(l)
2909 }
2910
2911 // ===== Additional binary ops not covered by macro =====
2912 Expression::Neq(op) => transform_binary!(Neq, *op),
2913 Expression::Lte(op) => transform_binary!(Lte, *op),
2914 Expression::Gte(op) => transform_binary!(Gte, *op),
2915 Expression::Mod(op) => transform_binary!(Mod, *op),
2916 Expression::Concat(op) => transform_binary!(Concat, *op),
2917 Expression::BitwiseAnd(op) => transform_binary!(BitwiseAnd, *op),
2918 Expression::BitwiseOr(op) => transform_binary!(BitwiseOr, *op),
2919 Expression::BitwiseXor(op) => transform_binary!(BitwiseXor, *op),
2920 Expression::Is(op) => transform_binary!(Is, *op),
2921
2922 // ===== TryCast / SafeCast =====
2923 Expression::TryCast(mut c) => {
2924 c.this = transform_recursive(c.this, transform_fn)?;
2925 c.to = transform_data_type_recursive(c.to, transform_fn)?;
2926 Expression::TryCast(c)
2927 }
2928 Expression::SafeCast(mut c) => {
2929 c.this = transform_recursive(c.this, transform_fn)?;
2930 c.to = transform_data_type_recursive(c.to, transform_fn)?;
2931 Expression::SafeCast(c)
2932 }
2933
2934 // ===== Misc =====
2935 Expression::Unnest(mut f) => {
2936 f.this = transform_recursive(f.this, transform_fn)?;
2937 f.expressions = f
2938 .expressions
2939 .into_iter()
2940 .map(|e| transform_recursive(e, transform_fn))
2941 .collect::<Result<Vec<_>>>()?;
2942 Expression::Unnest(f)
2943 }
2944 Expression::Explode(mut f) => {
2945 f.this = transform_recursive(f.this, transform_fn)?;
2946 Expression::Explode(f)
2947 }
2948 Expression::GroupConcat(mut f) => {
2949 f.this = transform_recursive(f.this, transform_fn)?;
2950 Expression::GroupConcat(f)
2951 }
2952 Expression::StringAgg(mut f) => {
2953 f.this = transform_recursive(f.this, transform_fn)?;
2954 if let Some(order_by) = f.order_by.take() {
2955 f.order_by = Some(
2956 order_by
2957 .into_iter()
2958 .map(|mut ordered| {
2959 let original = ordered.this.clone();
2960 ordered.this =
2961 transform_recursive(ordered.this, transform_fn).unwrap_or(original);
2962 match transform_fn(Expression::Ordered(Box::new(ordered.clone()))) {
2963 Ok(Expression::Ordered(transformed)) => Ok(*transformed),
2964 Ok(_) | Err(_) => Ok(ordered),
2965 }
2966 })
2967 .collect::<Result<Vec<_>>>()?,
2968 );
2969 }
2970 Expression::StringAgg(f)
2971 }
2972 Expression::ListAgg(mut f) => {
2973 f.this = transform_recursive(f.this, transform_fn)?;
2974 Expression::ListAgg(f)
2975 }
2976 Expression::ArrayAgg(mut f) => {
2977 f.this = transform_recursive(f.this, transform_fn)?;
2978 Expression::ArrayAgg(f)
2979 }
2980 Expression::ParseJson(mut f) => {
2981 f.this = transform_recursive(f.this, transform_fn)?;
2982 Expression::ParseJson(f)
2983 }
2984 Expression::ToJson(mut f) => {
2985 f.this = transform_recursive(f.this, transform_fn)?;
2986 Expression::ToJson(f)
2987 }
2988 Expression::JSONExtract(mut e) => {
2989 e.this = Box::new(transform_recursive(*e.this, transform_fn)?);
2990 e.expression = Box::new(transform_recursive(*e.expression, transform_fn)?);
2991 Expression::JSONExtract(e)
2992 }
2993 Expression::JSONExtractScalar(mut e) => {
2994 e.this = Box::new(transform_recursive(*e.this, transform_fn)?);
2995 e.expression = Box::new(transform_recursive(*e.expression, transform_fn)?);
2996 Expression::JSONExtractScalar(e)
2997 }
2998
2999 // StrToTime: recurse into this
3000 Expression::StrToTime(mut e) => {
3001 e.this = Box::new(transform_recursive(*e.this, transform_fn)?);
3002 Expression::StrToTime(e)
3003 }
3004
3005 // UnixToTime: recurse into this
3006 Expression::UnixToTime(mut e) => {
3007 e.this = Box::new(transform_recursive(*e.this, transform_fn)?);
3008 Expression::UnixToTime(e)
3009 }
3010
3011 // CreateTable: recurse into column defaults, on_update expressions, and data types
3012 Expression::CreateTable(mut ct) => {
3013 for col in &mut ct.columns {
3014 if let Some(default_expr) = col.default.take() {
3015 col.default = Some(transform_recursive(default_expr, transform_fn)?);
3016 }
3017 if let Some(on_update_expr) = col.on_update.take() {
3018 col.on_update = Some(transform_recursive(on_update_expr, transform_fn)?);
3019 }
3020 // Note: Column data type transformations (INT -> INT64 for BigQuery, etc.)
3021 // are NOT applied here because per-dialect transforms are designed for CAST/expression
3022 // contexts and may not produce correct results for DDL column definitions.
3023 // The DDL type mappings would need dedicated handling per source/target pair.
3024 }
3025 if let Some(as_select) = ct.as_select.take() {
3026 ct.as_select = Some(transform_recursive(as_select, transform_fn)?);
3027 }
3028 Expression::CreateTable(ct)
3029 }
3030
3031 // CreateView: recurse into the view body query
3032 Expression::CreateView(mut cv) => {
3033 cv.query = transform_recursive(cv.query, transform_fn)?;
3034 Expression::CreateView(cv)
3035 }
3036
3037 // CreateTask: recurse into the task body
3038 Expression::CreateTask(mut ct) => {
3039 ct.body = transform_recursive(ct.body, transform_fn)?;
3040 Expression::CreateTask(ct)
3041 }
3042
3043 // Prepare: recurse into the prepared statement body
3044 Expression::Prepare(mut prepare) => {
3045 prepare.statement = transform_recursive(prepare.statement, transform_fn)?;
3046 Expression::Prepare(prepare)
3047 }
3048
3049 // Execute: recurse into procedure/prepared name and argument values
3050 Expression::Execute(mut execute) => {
3051 execute.this = transform_recursive(execute.this, transform_fn)?;
3052 execute.arguments = execute
3053 .arguments
3054 .into_iter()
3055 .map(|argument| transform_recursive(argument, transform_fn))
3056 .collect::<Result<Vec<_>>>()?;
3057 execute.parameters = execute
3058 .parameters
3059 .into_iter()
3060 .map(|mut parameter| {
3061 parameter.value = transform_recursive(parameter.value, transform_fn)?;
3062 Ok(parameter)
3063 })
3064 .collect::<Result<Vec<_>>>()?;
3065 Expression::Execute(execute)
3066 }
3067
3068 // CreateProcedure: recurse into body expressions
3069 Expression::CreateProcedure(mut cp) => {
3070 if let Some(body) = cp.body.take() {
3071 cp.body = Some(match body {
3072 FunctionBody::Expression(expr) => {
3073 FunctionBody::Expression(transform_recursive(expr, transform_fn)?)
3074 }
3075 FunctionBody::Return(expr) => {
3076 FunctionBody::Return(transform_recursive(expr, transform_fn)?)
3077 }
3078 FunctionBody::Statements(stmts) => {
3079 let transformed_stmts = stmts
3080 .into_iter()
3081 .map(|s| transform_recursive(s, transform_fn))
3082 .collect::<Result<Vec<_>>>()?;
3083 FunctionBody::Statements(transformed_stmts)
3084 }
3085 other => other,
3086 });
3087 }
3088 Expression::CreateProcedure(cp)
3089 }
3090
3091 // CreateFunction: recurse into body expressions
3092 Expression::CreateFunction(mut cf) => {
3093 if let Some(body) = cf.body.take() {
3094 cf.body = Some(match body {
3095 FunctionBody::Expression(expr) => {
3096 FunctionBody::Expression(transform_recursive(expr, transform_fn)?)
3097 }
3098 FunctionBody::Return(expr) => {
3099 FunctionBody::Return(transform_recursive(expr, transform_fn)?)
3100 }
3101 FunctionBody::Statements(stmts) => {
3102 let transformed_stmts = stmts
3103 .into_iter()
3104 .map(|s| transform_recursive(s, transform_fn))
3105 .collect::<Result<Vec<_>>>()?;
3106 FunctionBody::Statements(transformed_stmts)
3107 }
3108 other => other,
3109 });
3110 }
3111 Expression::CreateFunction(cf)
3112 }
3113
3114 // MemberOf: recurse into left and right operands
3115 Expression::MemberOf(op) => transform_binary!(MemberOf, *op),
3116 // ArrayContainsAll (@>): recurse into left and right operands
3117 Expression::ArrayContainsAll(op) => transform_binary!(ArrayContainsAll, *op),
3118 // ArrayContainedBy (<@): recurse into left and right operands
3119 Expression::ArrayContainedBy(op) => transform_binary!(ArrayContainedBy, *op),
3120 // ArrayOverlaps (&&): recurse into left and right operands
3121 Expression::ArrayOverlaps(op) => transform_binary!(ArrayOverlaps, *op),
3122 // TsMatch (@@): recurse into left and right operands
3123 Expression::TsMatch(op) => transform_binary!(TsMatch, *op),
3124 // Adjacent (-|-): recurse into left and right operands
3125 Expression::Adjacent(op) => transform_binary!(Adjacent, *op),
3126
3127 // Table: recurse into when (HistoricalData) and changes fields
3128 Expression::Table(mut t) => {
3129 if let Some(when) = t.when.take() {
3130 let transformed =
3131 transform_recursive(Expression::HistoricalData(when), transform_fn)?;
3132 if let Expression::HistoricalData(hd) = transformed {
3133 t.when = Some(hd);
3134 }
3135 }
3136 if let Some(changes) = t.changes.take() {
3137 let transformed = transform_recursive(Expression::Changes(changes), transform_fn)?;
3138 if let Expression::Changes(c) = transformed {
3139 t.changes = Some(c);
3140 }
3141 }
3142 Expression::Table(t)
3143 }
3144
3145 // HistoricalData (Snowflake time travel): recurse into expression
3146 Expression::HistoricalData(mut hd) => {
3147 *hd.expression = transform_recursive(*hd.expression, transform_fn)?;
3148 Expression::HistoricalData(hd)
3149 }
3150
3151 // Changes (Snowflake CHANGES clause): recurse into at_before and end
3152 Expression::Changes(mut c) => {
3153 if let Some(at_before) = c.at_before.take() {
3154 c.at_before = Some(Box::new(transform_recursive(*at_before, transform_fn)?));
3155 }
3156 if let Some(end) = c.end.take() {
3157 c.end = Some(Box::new(transform_recursive(*end, transform_fn)?));
3158 }
3159 Expression::Changes(c)
3160 }
3161
3162 // TableArgument: TABLE(expr) or MODEL(expr)
3163 Expression::TableArgument(mut ta) => {
3164 ta.this = transform_recursive(ta.this, transform_fn)?;
3165 Expression::TableArgument(ta)
3166 }
3167
3168 // JoinedTable: (tbl1 JOIN tbl2 ON ...) - recurse into left and join tables
3169 Expression::JoinedTable(mut jt) => {
3170 jt.left = transform_recursive(jt.left, transform_fn)?;
3171 for join in &mut jt.joins {
3172 join.this = transform_recursive(
3173 std::mem::replace(&mut join.this, Expression::Null(crate::expressions::Null)),
3174 transform_fn,
3175 )?;
3176 if let Some(on) = join.on.take() {
3177 join.on = Some(transform_recursive(on, transform_fn)?);
3178 }
3179 }
3180 jt.lateral_views = jt
3181 .lateral_views
3182 .into_iter()
3183 .map(|mut lv| {
3184 lv.this = transform_recursive(lv.this, transform_fn)?;
3185 Ok(lv)
3186 })
3187 .collect::<Result<Vec<_>>>()?;
3188 Expression::JoinedTable(jt)
3189 }
3190
3191 // Lateral: LATERAL func() - recurse into the function expression
3192 Expression::Lateral(mut lat) => {
3193 *lat.this = transform_recursive(*lat.this, transform_fn)?;
3194 Expression::Lateral(lat)
3195 }
3196
3197 // WithinGroup: recurse into order_by items (for NULLS FIRST/LAST etc.)
3198 // but NOT into wg.this - the inner function is handled by StringAggConvert/GroupConcatConvert
3199 // as a unit together with the WithinGroup wrapper
3200 Expression::WithinGroup(mut wg) => {
3201 wg.order_by = wg
3202 .order_by
3203 .into_iter()
3204 .map(|mut o| {
3205 let original = o.this.clone();
3206 o.this = transform_recursive(o.this, transform_fn).unwrap_or(original);
3207 match transform_fn(Expression::Ordered(Box::new(o.clone()))) {
3208 Ok(Expression::Ordered(transformed)) => *transformed,
3209 Ok(_) | Err(_) => o,
3210 }
3211 })
3212 .collect();
3213 Expression::WithinGroup(wg)
3214 }
3215
3216 // Filter: recurse into both the aggregate and the filter condition
3217 Expression::Filter(mut f) => {
3218 f.this = Box::new(transform_recursive(*f.this, transform_fn)?);
3219 f.expression = Box::new(transform_recursive(*f.expression, transform_fn)?);
3220 Expression::Filter(f)
3221 }
3222
3223 // Aggregate functions (AggFunc-based): recurse into the aggregate argument,
3224 // filter, order_by, having_max, and limit.
3225 // Stddev, StddevSamp, Variance, and ArrayAgg are handled earlier in this match.
3226 Expression::Sum(f) => recurse_agg!(Sum, f),
3227 Expression::Avg(f) => recurse_agg!(Avg, f),
3228 Expression::Min(f) => recurse_agg!(Min, f),
3229 Expression::Max(f) => recurse_agg!(Max, f),
3230 Expression::CountIf(f) => recurse_agg!(CountIf, f),
3231 Expression::StddevPop(f) => recurse_agg!(StddevPop, f),
3232 Expression::VarPop(f) => recurse_agg!(VarPop, f),
3233 Expression::VarSamp(f) => recurse_agg!(VarSamp, f),
3234 Expression::Median(f) => recurse_agg!(Median, f),
3235 Expression::Mode(f) => recurse_agg!(Mode, f),
3236 Expression::First(f) => recurse_agg!(First, f),
3237 Expression::Last(f) => recurse_agg!(Last, f),
3238 Expression::AnyValue(f) => recurse_agg!(AnyValue, f),
3239 Expression::ApproxDistinct(f) => recurse_agg!(ApproxDistinct, f),
3240 Expression::ApproxCountDistinct(f) => recurse_agg!(ApproxCountDistinct, f),
3241 Expression::LogicalAnd(f) => recurse_agg!(LogicalAnd, f),
3242 Expression::LogicalOr(f) => recurse_agg!(LogicalOr, f),
3243 Expression::Skewness(f) => recurse_agg!(Skewness, f),
3244 Expression::ArrayConcatAgg(f) => recurse_agg!(ArrayConcatAgg, f),
3245 Expression::ArrayUniqueAgg(f) => recurse_agg!(ArrayUniqueAgg, f),
3246 Expression::BoolXorAgg(f) => recurse_agg!(BoolXorAgg, f),
3247 Expression::BitwiseOrAgg(f) => recurse_agg!(BitwiseOrAgg, f),
3248 Expression::BitwiseAndAgg(f) => recurse_agg!(BitwiseAndAgg, f),
3249 Expression::BitwiseXorAgg(f) => recurse_agg!(BitwiseXorAgg, f),
3250
3251 // Count has its own struct with an Option<Expression> `this` field
3252 Expression::Count(mut c) => {
3253 if let Some(this) = c.this.take() {
3254 c.this = Some(transform_recursive(this, transform_fn)?);
3255 }
3256 if let Some(filter) = c.filter.take() {
3257 c.filter = Some(transform_recursive(filter, transform_fn)?);
3258 }
3259 Expression::Count(c)
3260 }
3261
3262 Expression::PipeOperator(mut pipe) => {
3263 pipe.this = transform_recursive(pipe.this, transform_fn)?;
3264 pipe.expression = transform_recursive(pipe.expression, transform_fn)?;
3265 Expression::PipeOperator(pipe)
3266 }
3267
3268 // ArrayExcept/ArrayContains/ArrayDistinct: recurse into children
3269 Expression::ArrayExcept(mut f) => {
3270 f.this = transform_recursive(f.this, transform_fn)?;
3271 f.expression = transform_recursive(f.expression, transform_fn)?;
3272 Expression::ArrayExcept(f)
3273 }
3274 Expression::ArrayContains(mut f) => {
3275 f.this = transform_recursive(f.this, transform_fn)?;
3276 f.expression = transform_recursive(f.expression, transform_fn)?;
3277 Expression::ArrayContains(f)
3278 }
3279 Expression::ArrayDistinct(mut f) => {
3280 f.this = transform_recursive(f.this, transform_fn)?;
3281 Expression::ArrayDistinct(f)
3282 }
3283 Expression::ArrayPosition(mut f) => {
3284 f.this = transform_recursive(f.this, transform_fn)?;
3285 f.expression = transform_recursive(f.expression, transform_fn)?;
3286 Expression::ArrayPosition(f)
3287 }
3288
3289 // Pass through leaf nodes unchanged
3290 other => other,
3291 };
3292
3293 // Then apply the transform function
3294 transform_fn(expr)
3295}
3296
3297/// Returns the tokenizer config, generator config, and expression transform closure
3298/// for a built-in dialect type. This is the shared implementation used by both
3299/// `Dialect::get()` and custom dialect construction.
3300// ---------------------------------------------------------------------------
3301// Cached dialect configurations
3302// ---------------------------------------------------------------------------
3303
3304/// Pre-computed tokenizer + generator configs for a dialect, cached via `LazyLock`.
3305/// Transform closures are cheap (unit-struct method calls) and created fresh each time.
3306struct CachedDialectConfig {
3307 tokenizer_config: TokenizerConfig,
3308 #[cfg(feature = "generate")]
3309 generator_config: Arc<GeneratorConfig>,
3310}
3311
3312struct DialectConfigs {
3313 tokenizer_config: TokenizerConfig,
3314 #[cfg(feature = "generate")]
3315 generator_config: Arc<GeneratorConfig>,
3316 #[cfg(feature = "transpile")]
3317 transformer: Box<dyn Fn(Expression) -> Result<Expression> + Send + Sync>,
3318}
3319
3320/// Declare a per-dialect `LazyLock<CachedDialectConfig>` static.
3321macro_rules! cached_dialect {
3322 ($static_name:ident, $dialect_struct:expr, $feature:literal) => {
3323 #[cfg(feature = $feature)]
3324 static $static_name: LazyLock<CachedDialectConfig> = LazyLock::new(|| {
3325 let d = $dialect_struct;
3326 CachedDialectConfig {
3327 tokenizer_config: d.tokenizer_config(),
3328 #[cfg(feature = "generate")]
3329 generator_config: Arc::new(d.generator_config()),
3330 }
3331 });
3332 };
3333}
3334
3335static CACHED_GENERIC: LazyLock<CachedDialectConfig> = LazyLock::new(|| {
3336 let d = GenericDialect;
3337 CachedDialectConfig {
3338 tokenizer_config: d.tokenizer_config(),
3339 #[cfg(feature = "generate")]
3340 generator_config: Arc::new(d.generator_config()),
3341 }
3342});
3343
3344cached_dialect!(CACHED_POSTGRESQL, PostgresDialect, "dialect-postgresql");
3345cached_dialect!(CACHED_MYSQL, MySQLDialect, "dialect-mysql");
3346cached_dialect!(CACHED_BIGQUERY, BigQueryDialect, "dialect-bigquery");
3347cached_dialect!(CACHED_SNOWFLAKE, SnowflakeDialect, "dialect-snowflake");
3348cached_dialect!(CACHED_DUCKDB, DuckDBDialect, "dialect-duckdb");
3349cached_dialect!(CACHED_TSQL, TSQLDialect, "dialect-tsql");
3350cached_dialect!(CACHED_ORACLE, OracleDialect, "dialect-oracle");
3351cached_dialect!(CACHED_HIVE, HiveDialect, "dialect-hive");
3352cached_dialect!(CACHED_SPARK, SparkDialect, "dialect-spark");
3353cached_dialect!(CACHED_SQLITE, SQLiteDialect, "dialect-sqlite");
3354cached_dialect!(CACHED_PRESTO, PrestoDialect, "dialect-presto");
3355cached_dialect!(CACHED_TRINO, TrinoDialect, "dialect-trino");
3356cached_dialect!(CACHED_REDSHIFT, RedshiftDialect, "dialect-redshift");
3357cached_dialect!(CACHED_CLICKHOUSE, ClickHouseDialect, "dialect-clickhouse");
3358cached_dialect!(CACHED_DATABRICKS, DatabricksDialect, "dialect-databricks");
3359cached_dialect!(CACHED_ATHENA, AthenaDialect, "dialect-athena");
3360cached_dialect!(CACHED_TERADATA, TeradataDialect, "dialect-teradata");
3361cached_dialect!(CACHED_DORIS, DorisDialect, "dialect-doris");
3362cached_dialect!(CACHED_STARROCKS, StarRocksDialect, "dialect-starrocks");
3363cached_dialect!(
3364 CACHED_MATERIALIZE,
3365 MaterializeDialect,
3366 "dialect-materialize"
3367);
3368cached_dialect!(CACHED_RISINGWAVE, RisingWaveDialect, "dialect-risingwave");
3369cached_dialect!(
3370 CACHED_SINGLESTORE,
3371 SingleStoreDialect,
3372 "dialect-singlestore"
3373);
3374cached_dialect!(
3375 CACHED_COCKROACHDB,
3376 CockroachDBDialect,
3377 "dialect-cockroachdb"
3378);
3379cached_dialect!(CACHED_TIDB, TiDBDialect, "dialect-tidb");
3380cached_dialect!(CACHED_DRUID, DruidDialect, "dialect-druid");
3381cached_dialect!(CACHED_SOLR, SolrDialect, "dialect-solr");
3382cached_dialect!(CACHED_TABLEAU, TableauDialect, "dialect-tableau");
3383cached_dialect!(CACHED_DUNE, DuneDialect, "dialect-dune");
3384cached_dialect!(CACHED_FABRIC, FabricDialect, "dialect-fabric");
3385cached_dialect!(CACHED_DRILL, DrillDialect, "dialect-drill");
3386cached_dialect!(CACHED_DREMIO, DremioDialect, "dialect-dremio");
3387cached_dialect!(CACHED_EXASOL, ExasolDialect, "dialect-exasol");
3388cached_dialect!(CACHED_DATAFUSION, DataFusionDialect, "dialect-datafusion");
3389
3390fn configs_for_dialect_type(dt: DialectType) -> DialectConfigs {
3391 /// Clone configs from a cached static and pair with a fresh transform closure.
3392 macro_rules! from_cache {
3393 ($cache:expr, $dialect_struct:expr) => {{
3394 let c = &*$cache;
3395 DialectConfigs {
3396 tokenizer_config: c.tokenizer_config.clone(),
3397 #[cfg(feature = "generate")]
3398 generator_config: c.generator_config.clone(),
3399 #[cfg(feature = "transpile")]
3400 transformer: Box::new(move |e| $dialect_struct.transform_expr(e)),
3401 }
3402 }};
3403 }
3404 match dt {
3405 #[cfg(feature = "dialect-postgresql")]
3406 DialectType::PostgreSQL => from_cache!(CACHED_POSTGRESQL, PostgresDialect),
3407 #[cfg(feature = "dialect-mysql")]
3408 DialectType::MySQL => from_cache!(CACHED_MYSQL, MySQLDialect),
3409 #[cfg(feature = "dialect-bigquery")]
3410 DialectType::BigQuery => from_cache!(CACHED_BIGQUERY, BigQueryDialect),
3411 #[cfg(feature = "dialect-snowflake")]
3412 DialectType::Snowflake => from_cache!(CACHED_SNOWFLAKE, SnowflakeDialect),
3413 #[cfg(feature = "dialect-duckdb")]
3414 DialectType::DuckDB => from_cache!(CACHED_DUCKDB, DuckDBDialect),
3415 #[cfg(feature = "dialect-tsql")]
3416 DialectType::TSQL => from_cache!(CACHED_TSQL, TSQLDialect),
3417 #[cfg(feature = "dialect-oracle")]
3418 DialectType::Oracle => from_cache!(CACHED_ORACLE, OracleDialect),
3419 #[cfg(feature = "dialect-hive")]
3420 DialectType::Hive => from_cache!(CACHED_HIVE, HiveDialect),
3421 #[cfg(feature = "dialect-spark")]
3422 DialectType::Spark => from_cache!(CACHED_SPARK, SparkDialect),
3423 #[cfg(feature = "dialect-sqlite")]
3424 DialectType::SQLite => from_cache!(CACHED_SQLITE, SQLiteDialect),
3425 #[cfg(feature = "dialect-presto")]
3426 DialectType::Presto => from_cache!(CACHED_PRESTO, PrestoDialect),
3427 #[cfg(feature = "dialect-trino")]
3428 DialectType::Trino => from_cache!(CACHED_TRINO, TrinoDialect),
3429 #[cfg(feature = "dialect-redshift")]
3430 DialectType::Redshift => from_cache!(CACHED_REDSHIFT, RedshiftDialect),
3431 #[cfg(feature = "dialect-clickhouse")]
3432 DialectType::ClickHouse => from_cache!(CACHED_CLICKHOUSE, ClickHouseDialect),
3433 #[cfg(feature = "dialect-databricks")]
3434 DialectType::Databricks => from_cache!(CACHED_DATABRICKS, DatabricksDialect),
3435 #[cfg(feature = "dialect-athena")]
3436 DialectType::Athena => from_cache!(CACHED_ATHENA, AthenaDialect),
3437 #[cfg(feature = "dialect-teradata")]
3438 DialectType::Teradata => from_cache!(CACHED_TERADATA, TeradataDialect),
3439 #[cfg(feature = "dialect-doris")]
3440 DialectType::Doris => from_cache!(CACHED_DORIS, DorisDialect),
3441 #[cfg(feature = "dialect-starrocks")]
3442 DialectType::StarRocks => from_cache!(CACHED_STARROCKS, StarRocksDialect),
3443 #[cfg(feature = "dialect-materialize")]
3444 DialectType::Materialize => from_cache!(CACHED_MATERIALIZE, MaterializeDialect),
3445 #[cfg(feature = "dialect-risingwave")]
3446 DialectType::RisingWave => from_cache!(CACHED_RISINGWAVE, RisingWaveDialect),
3447 #[cfg(feature = "dialect-singlestore")]
3448 DialectType::SingleStore => from_cache!(CACHED_SINGLESTORE, SingleStoreDialect),
3449 #[cfg(feature = "dialect-cockroachdb")]
3450 DialectType::CockroachDB => from_cache!(CACHED_COCKROACHDB, CockroachDBDialect),
3451 #[cfg(feature = "dialect-tidb")]
3452 DialectType::TiDB => from_cache!(CACHED_TIDB, TiDBDialect),
3453 #[cfg(feature = "dialect-druid")]
3454 DialectType::Druid => from_cache!(CACHED_DRUID, DruidDialect),
3455 #[cfg(feature = "dialect-solr")]
3456 DialectType::Solr => from_cache!(CACHED_SOLR, SolrDialect),
3457 #[cfg(feature = "dialect-tableau")]
3458 DialectType::Tableau => from_cache!(CACHED_TABLEAU, TableauDialect),
3459 #[cfg(feature = "dialect-dune")]
3460 DialectType::Dune => from_cache!(CACHED_DUNE, DuneDialect),
3461 #[cfg(feature = "dialect-fabric")]
3462 DialectType::Fabric => from_cache!(CACHED_FABRIC, FabricDialect),
3463 #[cfg(feature = "dialect-drill")]
3464 DialectType::Drill => from_cache!(CACHED_DRILL, DrillDialect),
3465 #[cfg(feature = "dialect-dremio")]
3466 DialectType::Dremio => from_cache!(CACHED_DREMIO, DremioDialect),
3467 #[cfg(feature = "dialect-exasol")]
3468 DialectType::Exasol => from_cache!(CACHED_EXASOL, ExasolDialect),
3469 #[cfg(feature = "dialect-datafusion")]
3470 DialectType::DataFusion => from_cache!(CACHED_DATAFUSION, DataFusionDialect),
3471 _ => from_cache!(CACHED_GENERIC, GenericDialect),
3472 }
3473}
3474
3475// ---------------------------------------------------------------------------
3476// Custom dialect registry
3477// ---------------------------------------------------------------------------
3478
3479static CUSTOM_DIALECT_REGISTRY: LazyLock<RwLock<HashMap<String, Arc<CustomDialectConfig>>>> =
3480 LazyLock::new(|| RwLock::new(HashMap::new()));
3481
3482struct CustomDialectConfig {
3483 name: String,
3484 base_dialect: DialectType,
3485 tokenizer_config: TokenizerConfig,
3486 #[cfg(feature = "generate")]
3487 generator_config: GeneratorConfig,
3488 #[cfg(feature = "transpile")]
3489 transform: Option<Arc<dyn Fn(Expression) -> Result<Expression> + Send + Sync>>,
3490 #[cfg(feature = "transpile")]
3491 preprocess: Option<Arc<dyn Fn(Expression) -> Result<Expression> + Send + Sync>>,
3492}
3493
3494/// Fluent builder for creating and registering custom SQL dialects.
3495///
3496/// A custom dialect is based on an existing built-in dialect and allows selective
3497/// overrides of tokenizer configuration, generator configuration, and expression
3498/// transforms.
3499///
3500/// # Example
3501///
3502/// ```rust,ignore
3503/// use polyglot_sql::dialects::{CustomDialectBuilder, DialectType, Dialect};
3504/// use polyglot_sql::generator::NormalizeFunctions;
3505///
3506/// CustomDialectBuilder::new("my_postgres")
3507/// .based_on(DialectType::PostgreSQL)
3508/// .generator_config_modifier(|gc| {
3509/// gc.normalize_functions = NormalizeFunctions::Lower;
3510/// })
3511/// .register()
3512/// .unwrap();
3513///
3514/// let d = Dialect::get_by_name("my_postgres").unwrap();
3515/// let exprs = d.parse("SELECT COUNT(*)").unwrap();
3516/// let sql = d.generate(&exprs[0]).unwrap();
3517/// assert_eq!(sql, "select count(*)");
3518///
3519/// polyglot_sql::unregister_custom_dialect("my_postgres");
3520/// ```
3521pub struct CustomDialectBuilder {
3522 name: String,
3523 base_dialect: DialectType,
3524 tokenizer_modifier: Option<Box<dyn FnOnce(&mut TokenizerConfig)>>,
3525 #[cfg(feature = "generate")]
3526 generator_modifier: Option<Box<dyn FnOnce(&mut GeneratorConfig)>>,
3527 #[cfg(feature = "transpile")]
3528 transform: Option<Arc<dyn Fn(Expression) -> Result<Expression> + Send + Sync>>,
3529 #[cfg(feature = "transpile")]
3530 preprocess: Option<Arc<dyn Fn(Expression) -> Result<Expression> + Send + Sync>>,
3531}
3532
3533impl CustomDialectBuilder {
3534 /// Create a new builder with the given name. Defaults to `Generic` as the base dialect.
3535 pub fn new(name: impl Into<String>) -> Self {
3536 Self {
3537 name: name.into(),
3538 base_dialect: DialectType::Generic,
3539 tokenizer_modifier: None,
3540 #[cfg(feature = "generate")]
3541 generator_modifier: None,
3542 #[cfg(feature = "transpile")]
3543 transform: None,
3544 #[cfg(feature = "transpile")]
3545 preprocess: None,
3546 }
3547 }
3548
3549 /// Set the base built-in dialect to inherit configuration from.
3550 pub fn based_on(mut self, dialect: DialectType) -> Self {
3551 self.base_dialect = dialect;
3552 self
3553 }
3554
3555 /// Provide a closure that modifies the tokenizer configuration inherited from the base dialect.
3556 pub fn tokenizer_config_modifier<F>(mut self, f: F) -> Self
3557 where
3558 F: FnOnce(&mut TokenizerConfig) + 'static,
3559 {
3560 self.tokenizer_modifier = Some(Box::new(f));
3561 self
3562 }
3563
3564 /// Provide a closure that modifies the generator configuration inherited from the base dialect.
3565 #[cfg(feature = "generate")]
3566 pub fn generator_config_modifier<F>(mut self, f: F) -> Self
3567 where
3568 F: FnOnce(&mut GeneratorConfig) + 'static,
3569 {
3570 self.generator_modifier = Some(Box::new(f));
3571 self
3572 }
3573
3574 /// Set a custom per-node expression transform function.
3575 ///
3576 /// This replaces the base dialect's transform. It is called on every expression
3577 /// node during the recursive transform pass.
3578 #[cfg(feature = "transpile")]
3579 pub fn transform_fn<F>(mut self, f: F) -> Self
3580 where
3581 F: Fn(Expression) -> Result<Expression> + Send + Sync + 'static,
3582 {
3583 self.transform = Some(Arc::new(f));
3584 self
3585 }
3586
3587 /// Set a custom whole-tree preprocessing function.
3588 ///
3589 /// This replaces the base dialect's built-in preprocessing. It is called once
3590 /// on the entire expression tree before the recursive per-node transform.
3591 #[cfg(feature = "transpile")]
3592 pub fn preprocess_fn<F>(mut self, f: F) -> Self
3593 where
3594 F: Fn(Expression) -> Result<Expression> + Send + Sync + 'static,
3595 {
3596 self.preprocess = Some(Arc::new(f));
3597 self
3598 }
3599
3600 /// Build the custom dialect configuration and register it in the global registry.
3601 ///
3602 /// Returns an error if:
3603 /// - The name collides with a built-in dialect name
3604 /// - A custom dialect with the same name is already registered
3605 pub fn register(self) -> Result<()> {
3606 // Reject names that collide with built-in dialects
3607 if DialectType::from_str(&self.name).is_ok() {
3608 return Err(crate::error::Error::parse(
3609 format!(
3610 "Cannot register custom dialect '{}': name collides with built-in dialect",
3611 self.name
3612 ),
3613 0,
3614 0,
3615 0,
3616 0,
3617 ));
3618 }
3619
3620 // Get base configs
3621 let base_configs = configs_for_dialect_type(self.base_dialect);
3622 let mut tok_config = base_configs.tokenizer_config;
3623 #[cfg(feature = "generate")]
3624 let mut gen_config = (*base_configs.generator_config).clone();
3625
3626 // Apply modifiers
3627 if let Some(tok_mod) = self.tokenizer_modifier {
3628 tok_mod(&mut tok_config);
3629 }
3630 #[cfg(feature = "generate")]
3631 if let Some(gen_mod) = self.generator_modifier {
3632 gen_mod(&mut gen_config);
3633 }
3634
3635 let config = CustomDialectConfig {
3636 name: self.name.clone(),
3637 base_dialect: self.base_dialect,
3638 tokenizer_config: tok_config,
3639 #[cfg(feature = "generate")]
3640 generator_config: gen_config,
3641 #[cfg(feature = "transpile")]
3642 transform: self.transform,
3643 #[cfg(feature = "transpile")]
3644 preprocess: self.preprocess,
3645 };
3646
3647 register_custom_dialect(config)
3648 }
3649}
3650
3651use std::str::FromStr;
3652
3653fn register_custom_dialect(config: CustomDialectConfig) -> Result<()> {
3654 let mut registry = CUSTOM_DIALECT_REGISTRY.write().map_err(|e| {
3655 crate::error::Error::parse(format!("Registry lock poisoned: {}", e), 0, 0, 0, 0)
3656 })?;
3657
3658 if registry.contains_key(&config.name) {
3659 return Err(crate::error::Error::parse(
3660 format!("Custom dialect '{}' is already registered", config.name),
3661 0,
3662 0,
3663 0,
3664 0,
3665 ));
3666 }
3667
3668 registry.insert(config.name.clone(), Arc::new(config));
3669 Ok(())
3670}
3671
3672/// Remove a custom dialect from the global registry.
3673///
3674/// Returns `true` if a dialect with that name was found and removed,
3675/// `false` if no such custom dialect existed.
3676pub fn unregister_custom_dialect(name: &str) -> bool {
3677 if let Ok(mut registry) = CUSTOM_DIALECT_REGISTRY.write() {
3678 registry.remove(name).is_some()
3679 } else {
3680 false
3681 }
3682}
3683
3684fn get_custom_dialect_config(name: &str) -> Option<Arc<CustomDialectConfig>> {
3685 CUSTOM_DIALECT_REGISTRY
3686 .read()
3687 .ok()
3688 .and_then(|registry| registry.get(name).cloned())
3689}
3690
3691/// Main entry point for dialect-specific SQL operations.
3692///
3693/// A `Dialect` bundles together a tokenizer, generator configuration, and expression
3694/// transformer for a specific SQL database engine. It is the high-level API through
3695/// which callers parse, generate, transform, and transpile SQL.
3696///
3697/// # Usage
3698///
3699/// ```rust,ignore
3700/// use polyglot_sql::dialects::{Dialect, DialectType};
3701///
3702/// // Parse PostgreSQL SQL into an AST
3703/// let pg = Dialect::get(DialectType::PostgreSQL);
3704/// let exprs = pg.parse("SELECT id, name FROM users WHERE active")?;
3705///
3706/// // Transpile from PostgreSQL to BigQuery
3707/// let results = pg.transpile("SELECT NOW()", DialectType::BigQuery)?;
3708/// assert_eq!(results[0], "SELECT CURRENT_TIMESTAMP()");
3709/// ```
3710///
3711/// Obtain an instance via [`Dialect::get`] or [`Dialect::get_by_name`].
3712/// The struct is `Send + Sync` safe so it can be shared across threads.
3713pub struct Dialect {
3714 dialect_type: DialectType,
3715 tokenizer: Tokenizer,
3716 #[cfg(feature = "generate")]
3717 generator_config: Arc<GeneratorConfig>,
3718 #[cfg(feature = "transpile")]
3719 transformer: Box<dyn Fn(Expression) -> Result<Expression> + Send + Sync>,
3720 /// Optional function to get expression-specific generator config (for hybrid dialects like Athena).
3721 #[cfg(feature = "generate")]
3722 generator_config_for_expr: Option<Box<dyn Fn(&Expression) -> GeneratorConfig + Send + Sync>>,
3723 /// Optional custom preprocessing function (overrides built-in preprocess for custom dialects).
3724 #[cfg(feature = "transpile")]
3725 custom_preprocess: Option<Box<dyn Fn(Expression) -> Result<Expression> + Send + Sync>>,
3726}
3727
3728/// Options for [`Dialect::transpile_with`].
3729///
3730/// Use [`TranspileOptions::default`] for defaults, then tweak the fields you need.
3731/// The struct is marked `#[non_exhaustive]` so new fields can be added without
3732/// breaking the API.
3733///
3734/// The struct derives `Serialize`/`Deserialize` using camelCase field names so
3735/// it can be round-tripped over JSON bridges (C FFI, WASM) without mapping.
3736#[cfg(feature = "transpile")]
3737#[derive(Debug, Clone, Serialize, Deserialize)]
3738#[serde(rename_all = "camelCase", default)]
3739#[non_exhaustive]
3740pub struct TranspileOptions {
3741 /// Whether to pretty-print the output SQL.
3742 pub pretty: bool,
3743 /// How unsupported target-dialect constructs should be handled.
3744 ///
3745 /// The default is [`UnsupportedLevel::Warn`], which preserves the current
3746 /// compatibility behavior and continues transpilation.
3747 pub unsupported_level: UnsupportedLevel,
3748 /// Maximum number of unsupported diagnostics to include in raised errors.
3749 pub max_unsupported: usize,
3750}
3751
3752#[cfg(feature = "transpile")]
3753impl Default for TranspileOptions {
3754 fn default() -> Self {
3755 Self {
3756 pretty: false,
3757 unsupported_level: UnsupportedLevel::Warn,
3758 max_unsupported: 3,
3759 }
3760 }
3761}
3762
3763#[cfg(feature = "transpile")]
3764impl TranspileOptions {
3765 /// Construct options with pretty-printing enabled.
3766 pub fn pretty() -> Self {
3767 Self {
3768 pretty: true,
3769 ..Default::default()
3770 }
3771 }
3772
3773 /// Construct options that raise when known unsupported constructs remain.
3774 pub fn strict() -> Self {
3775 Self {
3776 unsupported_level: UnsupportedLevel::Raise,
3777 ..Default::default()
3778 }
3779 }
3780
3781 /// Set how unsupported target-dialect constructs should be handled.
3782 pub fn with_unsupported_level(mut self, level: UnsupportedLevel) -> Self {
3783 self.unsupported_level = level;
3784 self
3785 }
3786
3787 /// Set the maximum number of unsupported diagnostics to include in raised errors.
3788 pub fn with_max_unsupported(mut self, max: usize) -> Self {
3789 self.max_unsupported = max;
3790 self
3791 }
3792}
3793
3794/// A value that can be used as the target dialect in [`Dialect::transpile`] /
3795/// [`Dialect::transpile_with`].
3796///
3797/// Implemented for [`DialectType`] (built-in dialect enum) and `&Dialect` (any
3798/// dialect handle, including custom ones). End users do not normally need to
3799/// implement this trait themselves.
3800#[cfg(feature = "transpile")]
3801pub trait TranspileTarget {
3802 /// Invoke `f` with a reference to the resolved target dialect.
3803 fn with_dialect<R>(self, f: impl FnOnce(&Dialect) -> R) -> R;
3804}
3805
3806#[cfg(feature = "transpile")]
3807impl TranspileTarget for DialectType {
3808 fn with_dialect<R>(self, f: impl FnOnce(&Dialect) -> R) -> R {
3809 f(&Dialect::get(self))
3810 }
3811}
3812
3813#[cfg(feature = "transpile")]
3814impl TranspileTarget for &Dialect {
3815 fn with_dialect<R>(self, f: impl FnOnce(&Dialect) -> R) -> R {
3816 f(self)
3817 }
3818}
3819
3820impl Dialect {
3821 /// Creates a fully configured [`Dialect`] instance for the given [`DialectType`].
3822 ///
3823 /// This is the primary constructor. It initializes the tokenizer, generator config,
3824 /// and expression transformer based on the dialect's [`DialectImpl`] implementation.
3825 /// For hybrid dialects like Athena, it also sets up expression-specific generator
3826 /// config routing.
3827 pub fn get(dialect_type: DialectType) -> Self {
3828 let configs = configs_for_dialect_type(dialect_type);
3829 let tokenizer_config = configs.tokenizer_config;
3830 #[cfg(feature = "generate")]
3831 let generator_config = configs.generator_config;
3832 #[cfg(feature = "transpile")]
3833 let transformer = configs.transformer;
3834
3835 // Set up expression-specific generator config for hybrid dialects
3836 #[cfg(feature = "generate")]
3837 let generator_config_for_expr: Option<
3838 Box<dyn Fn(&Expression) -> GeneratorConfig + Send + Sync>,
3839 > = match dialect_type {
3840 #[cfg(feature = "dialect-athena")]
3841 DialectType::Athena => Some(Box::new(|expr| {
3842 AthenaDialect.generator_config_for_expr(expr)
3843 })),
3844 _ => None,
3845 };
3846
3847 Self {
3848 dialect_type,
3849 tokenizer: Tokenizer::new(tokenizer_config),
3850 #[cfg(feature = "generate")]
3851 generator_config,
3852 #[cfg(feature = "transpile")]
3853 transformer,
3854 #[cfg(feature = "generate")]
3855 generator_config_for_expr,
3856 #[cfg(feature = "transpile")]
3857 custom_preprocess: None,
3858 }
3859 }
3860
3861 /// Look up a dialect by string name.
3862 ///
3863 /// Checks built-in dialect names first (via [`DialectType::from_str`]), then
3864 /// falls back to the custom dialect registry. Returns `None` if no dialect
3865 /// with the given name exists.
3866 pub fn get_by_name(name: &str) -> Option<Self> {
3867 // Try built-in first
3868 if let Ok(dt) = DialectType::from_str(name) {
3869 return Some(Self::get(dt));
3870 }
3871
3872 // Try custom registry
3873 let config = get_custom_dialect_config(name)?;
3874 Some(Self::from_custom_config(&config))
3875 }
3876
3877 /// Construct a `Dialect` from a custom dialect configuration.
3878 fn from_custom_config(config: &CustomDialectConfig) -> Self {
3879 // Build the transformer: use custom if provided, else use base dialect's
3880 #[cfg(feature = "transpile")]
3881 let transformer: Box<dyn Fn(Expression) -> Result<Expression> + Send + Sync> =
3882 if let Some(ref custom_transform) = config.transform {
3883 let t = Arc::clone(custom_transform);
3884 Box::new(move |e| t(e))
3885 } else {
3886 configs_for_dialect_type(config.base_dialect).transformer
3887 };
3888
3889 // Build the custom preprocess: use custom if provided
3890 #[cfg(feature = "transpile")]
3891 let custom_preprocess: Option<
3892 Box<dyn Fn(Expression) -> Result<Expression> + Send + Sync>,
3893 > = config.preprocess.as_ref().map(|p| {
3894 let p = Arc::clone(p);
3895 Box::new(move |e: Expression| p(e))
3896 as Box<dyn Fn(Expression) -> Result<Expression> + Send + Sync>
3897 });
3898
3899 Self {
3900 dialect_type: config.base_dialect,
3901 tokenizer: Tokenizer::new(config.tokenizer_config.clone()),
3902 #[cfg(feature = "generate")]
3903 generator_config: Arc::new(config.generator_config.clone()),
3904 #[cfg(feature = "transpile")]
3905 transformer,
3906 #[cfg(feature = "generate")]
3907 generator_config_for_expr: None,
3908 #[cfg(feature = "transpile")]
3909 custom_preprocess,
3910 }
3911 }
3912
3913 /// Get the dialect type
3914 pub fn dialect_type(&self) -> DialectType {
3915 self.dialect_type
3916 }
3917
3918 /// Get the generator configuration
3919 #[cfg(feature = "generate")]
3920 pub fn generator_config(&self) -> &GeneratorConfig {
3921 &self.generator_config
3922 }
3923
3924 /// Parses a SQL string into a list of [`Expression`] AST nodes.
3925 ///
3926 /// The input may contain multiple semicolon-separated statements; each one
3927 /// produces a separate element in the returned vector. Tokenization uses
3928 /// this dialect's configured tokenizer, and parsing uses the dialect-aware parser.
3929 pub fn parse(&self, sql: &str) -> Result<Vec<Expression>> {
3930 let tokens = self.tokenizer.tokenize(sql)?;
3931 let config = crate::parser::ParserConfig {
3932 dialect: Some(self.dialect_type),
3933 ..Default::default()
3934 };
3935 let mut parser = Parser::with_source(tokens, config, sql.to_string());
3936 parser.parse()
3937 }
3938
3939 /// Parse a standalone SQL data type using this dialect's tokenizer and parser.
3940 ///
3941 /// This accepts type strings such as `DECIMAL(10, 2)`, `INT[]`, or
3942 /// `STRUCT(a INT, b VARCHAR)` without requiring a surrounding statement.
3943 pub fn parse_data_type(&self, sql: &str) -> Result<DataType> {
3944 let tokens = self.tokenizer.tokenize(sql)?;
3945 let config = crate::parser::ParserConfig {
3946 dialect: Some(self.dialect_type),
3947 ..Default::default()
3948 };
3949 let mut parser = Parser::with_source(tokens, config, sql.to_string());
3950 parser.parse_standalone_data_type()
3951 }
3952
3953 /// Tokenize SQL using this dialect's tokenizer configuration.
3954 pub fn tokenize(&self, sql: &str) -> Result<Vec<Token>> {
3955 self.tokenizer.tokenize(sql)
3956 }
3957
3958 /// Get the generator config for a specific expression (supports hybrid dialects).
3959 /// Returns an owned `GeneratorConfig` suitable for mutation before generation.
3960 #[cfg(feature = "generate")]
3961 fn get_config_for_expr(&self, expr: &Expression) -> GeneratorConfig {
3962 if let Some(ref config_fn) = self.generator_config_for_expr {
3963 config_fn(expr)
3964 } else {
3965 (*self.generator_config).clone()
3966 }
3967 }
3968
3969 /// Generates a SQL string from an [`Expression`] AST node.
3970 ///
3971 /// The output uses this dialect's generator configuration for identifier quoting,
3972 /// keyword casing, function name normalization, and syntax style. The result is
3973 /// a single-line (non-pretty) SQL string.
3974 #[cfg(feature = "generate")]
3975 pub fn generate(&self, expr: &Expression) -> Result<String> {
3976 // Fast path: when no per-expression config override, share the Arc cheaply.
3977 if self.generator_config_for_expr.is_none() {
3978 let mut generator = Generator::with_arc_config(self.generator_config.clone());
3979 return generator.generate(expr);
3980 }
3981 let config = self.get_config_for_expr(expr);
3982 let mut generator = Generator::with_config(config);
3983 generator.generate(expr)
3984 }
3985
3986 /// Generate SQL from an expression with pretty printing enabled
3987 #[cfg(feature = "generate")]
3988 pub fn generate_pretty(&self, expr: &Expression) -> Result<String> {
3989 let mut config = self.get_config_for_expr(expr);
3990 config.pretty = true;
3991 let mut generator = Generator::with_config(config);
3992 generator.generate(expr)
3993 }
3994
3995 /// Generate SQL from an expression with source dialect info (for transpilation)
3996 #[cfg(feature = "generate")]
3997 pub fn generate_with_source(&self, expr: &Expression, source: DialectType) -> Result<String> {
3998 let mut config = self.get_config_for_expr(expr);
3999 config.source_dialect = Some(source);
4000 let mut generator = Generator::with_config(config);
4001 generator.generate(expr)
4002 }
4003
4004 /// Generate SQL from an expression with pretty printing and source dialect info
4005 #[cfg(feature = "generate")]
4006 pub fn generate_pretty_with_source(
4007 &self,
4008 expr: &Expression,
4009 source: DialectType,
4010 ) -> Result<String> {
4011 let mut config = self.get_config_for_expr(expr);
4012 config.pretty = true;
4013 config.source_dialect = Some(source);
4014 let mut generator = Generator::with_config(config);
4015 generator.generate(expr)
4016 }
4017
4018 /// Generate SQL from an expression with source dialect and transpile options.
4019 #[cfg(all(feature = "generate", feature = "transpile"))]
4020 fn generate_with_transpile_options(
4021 &self,
4022 expr: &Expression,
4023 source: DialectType,
4024 opts: &TranspileOptions,
4025 ) -> Result<String> {
4026 let mut config = self.get_config_for_expr(expr);
4027 config.source_dialect = Some(source);
4028 config.pretty = opts.pretty;
4029 config.unsupported_level = opts.unsupported_level;
4030 config.max_unsupported = opts.max_unsupported.max(1);
4031 let mut generator = Generator::with_config(config);
4032 generator.generate(expr)
4033 }
4034
4035 /// Generate SQL from an expression with forced identifier quoting (identify=True)
4036 #[cfg(feature = "generate")]
4037 pub fn generate_with_identify(&self, expr: &Expression) -> Result<String> {
4038 let mut config = self.get_config_for_expr(expr);
4039 config.always_quote_identifiers = true;
4040 let mut generator = Generator::with_config(config);
4041 generator.generate(expr)
4042 }
4043
4044 /// Generate SQL from an expression with pretty printing and forced identifier quoting
4045 #[cfg(feature = "generate")]
4046 pub fn generate_pretty_with_identify(&self, expr: &Expression) -> Result<String> {
4047 let mut config = (*self.generator_config).clone();
4048 config.pretty = true;
4049 config.always_quote_identifiers = true;
4050 let mut generator = Generator::with_config(config);
4051 generator.generate(expr)
4052 }
4053
4054 /// Generate SQL from an expression with caller-specified config overrides
4055 #[cfg(feature = "generate")]
4056 pub fn generate_with_overrides(
4057 &self,
4058 expr: &Expression,
4059 overrides: impl FnOnce(&mut GeneratorConfig),
4060 ) -> Result<String> {
4061 let mut config = self.get_config_for_expr(expr);
4062 overrides(&mut config);
4063 let mut generator = Generator::with_config(config);
4064 generator.generate(expr)
4065 }
4066
4067 /// Transforms an expression tree to conform to this dialect's syntax and semantics.
4068 ///
4069 /// The transformation proceeds in two phases:
4070 /// 1. **Preprocessing** -- whole-tree structural rewrites such as eliminating QUALIFY,
4071 /// ensuring boolean predicates, or converting DISTINCT ON to a window-function pattern.
4072 /// 2. **Recursive per-node transform** -- a bottom-up pass via [`transform_recursive`]
4073 /// that applies this dialect's [`DialectImpl::transform_expr`] to every node.
4074 ///
4075 /// This method is used both during transpilation (to rewrite an AST for a target dialect)
4076 /// and for identity transforms (normalizing SQL within the same dialect).
4077 #[cfg(feature = "transpile")]
4078 pub fn transform(&self, expr: Expression) -> Result<Expression> {
4079 // Apply preprocessing transforms based on dialect
4080 let preprocessed = self.preprocess(expr)?;
4081 // Then apply recursive transformation
4082 transform_recursive(preprocessed, &self.transformer)
4083 }
4084
4085 /// Apply dialect-specific preprocessing transforms
4086 #[cfg(feature = "transpile")]
4087 fn preprocess(&self, expr: Expression) -> Result<Expression> {
4088 // If a custom preprocess function is set, use it instead of the built-in logic
4089 if let Some(ref custom_preprocess) = self.custom_preprocess {
4090 return custom_preprocess(expr);
4091 }
4092
4093 #[cfg(any(
4094 feature = "dialect-mysql",
4095 feature = "dialect-postgresql",
4096 feature = "dialect-bigquery",
4097 feature = "dialect-snowflake",
4098 feature = "dialect-tsql",
4099 feature = "dialect-spark",
4100 feature = "dialect-databricks",
4101 feature = "dialect-hive",
4102 feature = "dialect-sqlite",
4103 feature = "dialect-trino",
4104 feature = "dialect-presto",
4105 feature = "dialect-duckdb",
4106 feature = "dialect-redshift",
4107 feature = "dialect-starrocks",
4108 feature = "dialect-oracle",
4109 feature = "dialect-clickhouse",
4110 ))]
4111 use crate::transforms;
4112
4113 match self.dialect_type {
4114 // MySQL doesn't support QUALIFY, DISTINCT ON, FULL OUTER JOIN
4115 // MySQL doesn't natively support GENERATE_DATE_ARRAY (expand to recursive CTE)
4116 #[cfg(feature = "dialect-mysql")]
4117 DialectType::MySQL => {
4118 let expr = transforms::eliminate_qualify(expr)?;
4119 let expr = transforms::eliminate_full_outer_join(expr)?;
4120 let expr = transforms::eliminate_semi_and_anti_joins(expr)?;
4121 let expr = transforms::unnest_generate_date_array_using_recursive_cte(expr)?;
4122 Ok(expr)
4123 }
4124 // PostgreSQL doesn't support QUALIFY
4125 // PostgreSQL: UNNEST(GENERATE_SERIES) -> subquery wrapping
4126 // PostgreSQL: Normalize SET ... TO to SET ... = in CREATE FUNCTION
4127 #[cfg(feature = "dialect-postgresql")]
4128 DialectType::PostgreSQL => {
4129 let expr = transforms::eliminate_qualify(expr)?;
4130 let expr = transforms::eliminate_semi_and_anti_joins(expr)?;
4131 let expr = transforms::unwrap_unnest_generate_series_for_postgres(expr)?;
4132 // Normalize SET ... TO to SET ... = in CREATE FUNCTION
4133 // Only normalize when sqlglot would fully parse (no body) —
4134 // sqlglot falls back to Command for complex function bodies,
4135 // preserving the original text including TO.
4136 let expr = if let Expression::CreateFunction(mut cf) = expr {
4137 if cf.body.is_none() {
4138 for opt in &mut cf.set_options {
4139 if let crate::expressions::FunctionSetValue::Value { use_to, .. } =
4140 &mut opt.value
4141 {
4142 *use_to = false;
4143 }
4144 }
4145 }
4146 Expression::CreateFunction(cf)
4147 } else {
4148 expr
4149 };
4150 Ok(expr)
4151 }
4152 // BigQuery doesn't support DISTINCT ON or CTE column aliases
4153 #[cfg(feature = "dialect-bigquery")]
4154 DialectType::BigQuery => {
4155 let expr = transforms::eliminate_semi_and_anti_joins(expr)?;
4156 let expr = transforms::pushdown_cte_column_names(expr)?;
4157 let expr = transforms::explode_projection_to_unnest(expr, DialectType::BigQuery)?;
4158 Ok(expr)
4159 }
4160 // Snowflake
4161 #[cfg(feature = "dialect-snowflake")]
4162 DialectType::Snowflake => {
4163 let expr = transforms::eliminate_semi_and_anti_joins(expr)?;
4164 let expr = transforms::eliminate_window_clause(expr)?;
4165 let expr = transforms::snowflake_flatten_projection_to_unnest(expr)?;
4166 Ok(expr)
4167 }
4168 // TSQL doesn't support QUALIFY
4169 // TSQL requires boolean expressions in WHERE/HAVING (no implicit truthiness)
4170 // TSQL doesn't support CTEs in subqueries (hoist to top level)
4171 // NOTE: no_limit_order_by_union is handled in cross_dialect_normalize (not preprocess)
4172 // to avoid breaking TSQL identity tests where ORDER BY on UNION is valid
4173 #[cfg(feature = "dialect-tsql")]
4174 DialectType::TSQL => {
4175 let expr = transforms::eliminate_qualify(expr)?;
4176 let expr = transforms::eliminate_semi_and_anti_joins(expr)?;
4177 let expr = transforms::ensure_bools(expr)?;
4178 let expr = transforms::unnest_generate_date_array_using_recursive_cte(expr)?;
4179 let expr = transforms::move_ctes_to_top_level(expr)?;
4180 let expr = transforms::qualify_derived_table_outputs(expr)?;
4181 Ok(expr)
4182 }
4183 // Spark doesn't support QUALIFY (but Databricks does)
4184 // Spark doesn't support CTEs in subqueries (hoist to top level)
4185 #[cfg(feature = "dialect-spark")]
4186 DialectType::Spark => {
4187 let expr = transforms::eliminate_qualify(expr)?;
4188 let expr = transforms::add_auto_table_alias(expr)?;
4189 let expr = transforms::simplify_nested_paren_values(expr)?;
4190 let expr = transforms::move_ctes_to_top_level(expr)?;
4191 Ok(expr)
4192 }
4193 // Databricks supports QUALIFY natively
4194 // Databricks doesn't support CTEs in subqueries (hoist to top level)
4195 #[cfg(feature = "dialect-databricks")]
4196 DialectType::Databricks => {
4197 let expr = transforms::add_auto_table_alias(expr)?;
4198 let expr = transforms::simplify_nested_paren_values(expr)?;
4199 let expr = transforms::move_ctes_to_top_level(expr)?;
4200 Ok(expr)
4201 }
4202 // Hive doesn't support QUALIFY or CTEs in subqueries
4203 #[cfg(feature = "dialect-hive")]
4204 DialectType::Hive => {
4205 let expr = transforms::eliminate_qualify(expr)?;
4206 let expr = transforms::move_ctes_to_top_level(expr)?;
4207 Ok(expr)
4208 }
4209 // SQLite doesn't support QUALIFY
4210 #[cfg(feature = "dialect-sqlite")]
4211 DialectType::SQLite => {
4212 let expr = transforms::eliminate_qualify(expr)?;
4213 Ok(expr)
4214 }
4215 // Trino doesn't support QUALIFY
4216 #[cfg(feature = "dialect-trino")]
4217 DialectType::Trino => {
4218 let expr = transforms::eliminate_qualify(expr)?;
4219 let expr = transforms::explode_projection_to_unnest(expr, DialectType::Trino)?;
4220 Ok(expr)
4221 }
4222 // Presto doesn't support QUALIFY or WINDOW clause
4223 #[cfg(feature = "dialect-presto")]
4224 DialectType::Presto => {
4225 let expr = transforms::eliminate_qualify(expr)?;
4226 let expr = transforms::eliminate_window_clause(expr)?;
4227 let expr = transforms::explode_projection_to_unnest(expr, DialectType::Presto)?;
4228 Ok(expr)
4229 }
4230 // DuckDB supports QUALIFY - no elimination needed
4231 // Expand POSEXPLODE to GENERATE_SUBSCRIPTS + UNNEST
4232 // Expand LIKE ANY / ILIKE ANY to OR chains (DuckDB doesn't support quantifiers)
4233 #[cfg(feature = "dialect-duckdb")]
4234 DialectType::DuckDB => {
4235 let expr = transforms::expand_posexplode_duckdb(expr)?;
4236 let expr = transforms::expand_like_any(expr)?;
4237 Ok(expr)
4238 }
4239 // Redshift doesn't support QUALIFY, WINDOW clause, or GENERATE_DATE_ARRAY
4240 #[cfg(feature = "dialect-redshift")]
4241 DialectType::Redshift => {
4242 let expr = transforms::eliminate_qualify(expr)?;
4243 let expr = transforms::eliminate_window_clause(expr)?;
4244 let expr = transforms::unnest_generate_date_array_using_recursive_cte(expr)?;
4245 Ok(expr)
4246 }
4247 // StarRocks doesn't support BETWEEN in DELETE statements or QUALIFY
4248 #[cfg(feature = "dialect-starrocks")]
4249 DialectType::StarRocks => {
4250 let expr = transforms::eliminate_qualify(expr)?;
4251 let expr = transforms::expand_between_in_delete(expr)?;
4252 let expr = transforms::eliminate_distinct_on_for_dialect(
4253 expr,
4254 Some(DialectType::StarRocks),
4255 Some(DialectType::StarRocks),
4256 )?;
4257 let expr = transforms::unnest_generate_date_array_using_recursive_cte(expr)?;
4258 Ok(expr)
4259 }
4260 // DataFusion supports QUALIFY and semi/anti joins natively
4261 #[cfg(feature = "dialect-datafusion")]
4262 DialectType::DataFusion => Ok(expr),
4263 // Oracle doesn't support QUALIFY
4264 #[cfg(feature = "dialect-oracle")]
4265 DialectType::Oracle => {
4266 let expr = transforms::eliminate_qualify(expr)?;
4267 Ok(expr)
4268 }
4269 // Drill - no special preprocessing needed
4270 #[cfg(feature = "dialect-drill")]
4271 DialectType::Drill => Ok(expr),
4272 // Teradata - no special preprocessing needed
4273 #[cfg(feature = "dialect-teradata")]
4274 DialectType::Teradata => Ok(expr),
4275 // ClickHouse doesn't support ORDER BY/LIMIT directly on UNION
4276 #[cfg(feature = "dialect-clickhouse")]
4277 DialectType::ClickHouse => {
4278 let expr = transforms::no_limit_order_by_union(expr)?;
4279 Ok(expr)
4280 }
4281 // Other dialects - no preprocessing
4282 _ => Ok(expr),
4283 }
4284 }
4285
4286 /// Transpile SQL from this dialect to the given target dialect.
4287 ///
4288 /// The target may be specified as either a built-in [`DialectType`] enum variant
4289 /// or as a reference to a [`Dialect`] handle (built-in or custom). Both work:
4290 ///
4291 /// ```rust,ignore
4292 /// let pg = Dialect::get(DialectType::PostgreSQL);
4293 /// pg.transpile("SELECT NOW()", DialectType::BigQuery)?; // enum
4294 /// pg.transpile("SELECT NOW()", &custom_dialect)?; // handle
4295 /// ```
4296 ///
4297 /// For pretty-printing or other options, use [`transpile_with`](Self::transpile_with).
4298 #[cfg(feature = "transpile")]
4299 pub fn transpile<T: TranspileTarget>(&self, sql: &str, target: T) -> Result<Vec<String>> {
4300 self.transpile_with(sql, target, TranspileOptions::default())
4301 }
4302
4303 /// Transpile SQL with configurable [`TranspileOptions`] (e.g. pretty-printing).
4304 #[cfg(feature = "transpile")]
4305 pub fn transpile_with<T: TranspileTarget>(
4306 &self,
4307 sql: &str,
4308 target: T,
4309 opts: TranspileOptions,
4310 ) -> Result<Vec<String>> {
4311 target.with_dialect(|td| self.transpile_inner(sql, td, &opts))
4312 }
4313
4314 #[cfg(feature = "transpile")]
4315 fn transpile_inner(
4316 &self,
4317 sql: &str,
4318 target_dialect: &Dialect,
4319 opts: &TranspileOptions,
4320 ) -> Result<Vec<String>> {
4321 let target = target_dialect.dialect_type;
4322 if matches!(self.dialect_type, DialectType::PostgreSQL)
4323 && matches!(target, DialectType::SQLite)
4324 {
4325 self.reject_pgvector_distance_operators_for_sqlite(sql)?;
4326 }
4327 let expressions = self.parse(sql)?;
4328 let generic_identity =
4329 self.dialect_type == DialectType::Generic && target == DialectType::Generic;
4330
4331 if generic_identity {
4332 return expressions
4333 .into_iter()
4334 .map(|expr| {
4335 Self::reject_strict_unsupported(&expr, self.dialect_type, target, opts)?;
4336 target_dialect.generate_with_transpile_options(&expr, self.dialect_type, opts)
4337 })
4338 .collect();
4339 }
4340
4341 expressions
4342 .into_iter()
4343 .map(|expr| {
4344 // DuckDB source: normalize VARCHAR/CHAR to TEXT (DuckDB doesn't support
4345 // VARCHAR length constraints). This emulates Python sqlglot's DuckDB parser
4346 // where VARCHAR_LENGTH = None and VARCHAR maps to TEXT.
4347 let expr = if matches!(self.dialect_type, DialectType::DuckDB) {
4348 use crate::expressions::DataType as DT;
4349 transform_recursive(expr, &|e| match e {
4350 Expression::DataType(DT::VarChar { .. }) => {
4351 Ok(Expression::DataType(DT::Text))
4352 }
4353 Expression::DataType(DT::Char { .. }) => Ok(Expression::DataType(DT::Text)),
4354 _ => Ok(e),
4355 })?
4356 } else {
4357 expr
4358 };
4359
4360 // When source and target differ, first normalize the source dialect's
4361 // AST constructs to standard SQL, so that the target dialect can handle them.
4362 // This handles cases like Snowflake's SQUARE -> POWER, DIV0 -> CASE, etc.
4363 let normalized =
4364 if self.dialect_type != target && self.dialect_type != DialectType::Generic {
4365 self.transform(expr)?
4366 } else {
4367 expr
4368 };
4369
4370 // For TSQL source targeting non-TSQL: unwrap ISNULL(JSON_QUERY(...), JSON_VALUE(...))
4371 // to just JSON_QUERY(...) so cross_dialect_normalize can convert it cleanly.
4372 // The TSQL read transform wraps JsonQuery in ISNULL for identity, but for
4373 // cross-dialect transpilation we need the unwrapped JSON_QUERY.
4374 let normalized =
4375 if matches!(self.dialect_type, DialectType::TSQL | DialectType::Fabric)
4376 && !matches!(target, DialectType::TSQL | DialectType::Fabric)
4377 {
4378 transform_recursive(normalized, &|e| {
4379 if let Expression::Function(ref f) = e {
4380 if f.name.eq_ignore_ascii_case("ISNULL") && f.args.len() == 2 {
4381 // Check if first arg is JSON_QUERY and second is JSON_VALUE
4382 if let (
4383 Expression::Function(ref jq),
4384 Expression::Function(ref jv),
4385 ) = (&f.args[0], &f.args[1])
4386 {
4387 if jq.name.eq_ignore_ascii_case("JSON_QUERY")
4388 && jv.name.eq_ignore_ascii_case("JSON_VALUE")
4389 {
4390 // Unwrap: return just JSON_QUERY(...)
4391 return Ok(f.args[0].clone());
4392 }
4393 }
4394 }
4395 }
4396 Ok(e)
4397 })?
4398 } else {
4399 normalized
4400 };
4401
4402 // Snowflake source to non-Snowflake target: CURRENT_TIME -> LOCALTIME
4403 // Snowflake's CURRENT_TIME is equivalent to LOCALTIME in other dialects.
4404 // Python sqlglot parses Snowflake's CURRENT_TIME as Localtime expression.
4405 let normalized = if matches!(self.dialect_type, DialectType::Snowflake)
4406 && !matches!(target, DialectType::Snowflake)
4407 {
4408 transform_recursive(normalized, &|e| {
4409 if let Expression::Function(ref f) = e {
4410 if f.name.eq_ignore_ascii_case("CURRENT_TIME") {
4411 return Ok(Expression::Localtime(Box::new(
4412 crate::expressions::Localtime { this: None },
4413 )));
4414 }
4415 }
4416 Ok(e)
4417 })?
4418 } else {
4419 normalized
4420 };
4421
4422 // Snowflake source to DuckDB target: REPEAT(' ', n) -> REPEAT(' ', CAST(n AS BIGINT))
4423 // Snowflake's SPACE(n) is converted to REPEAT(' ', n) by the Snowflake source
4424 // transform. DuckDB requires the count argument to be BIGINT.
4425 let normalized = if matches!(self.dialect_type, DialectType::Snowflake)
4426 && matches!(target, DialectType::DuckDB)
4427 {
4428 transform_recursive(normalized, &|e| {
4429 if let Expression::Function(ref f) = e {
4430 if f.name.eq_ignore_ascii_case("REPEAT") && f.args.len() == 2 {
4431 // Check if first arg is space string literal
4432 if let Expression::Literal(ref lit) = f.args[0] {
4433 if let crate::expressions::Literal::String(ref s) = lit.as_ref()
4434 {
4435 if s == " " {
4436 // Wrap second arg in CAST(... AS BIGINT) if not already
4437 if !matches!(f.args[1], Expression::Cast(_)) {
4438 let mut new_args = f.args.clone();
4439 new_args[1] = Expression::Cast(Box::new(
4440 crate::expressions::Cast {
4441 this: new_args[1].clone(),
4442 to: crate::expressions::DataType::BigInt {
4443 length: None,
4444 },
4445 trailing_comments: Vec::new(),
4446 double_colon_syntax: false,
4447 format: None,
4448 default: None,
4449 inferred_type: None,
4450 },
4451 ));
4452 return Ok(Expression::Function(Box::new(
4453 crate::expressions::Function {
4454 name: f.name.clone(),
4455 args: new_args,
4456 distinct: f.distinct,
4457 trailing_comments: f
4458 .trailing_comments
4459 .clone(),
4460 use_bracket_syntax: f.use_bracket_syntax,
4461 no_parens: f.no_parens,
4462 quoted: f.quoted,
4463 span: None,
4464 inferred_type: None,
4465 },
4466 )));
4467 }
4468 }
4469 }
4470 }
4471 }
4472 }
4473 Ok(e)
4474 })?
4475 } else {
4476 normalized
4477 };
4478
4479 // Propagate struct field names in arrays (for BigQuery source to non-BigQuery target)
4480 // BigQuery->BigQuery should NOT propagate names (BigQuery handles implicit inheritance)
4481 let normalized = if matches!(self.dialect_type, DialectType::BigQuery)
4482 && !matches!(target, DialectType::BigQuery)
4483 {
4484 crate::transforms::propagate_struct_field_names(normalized)?
4485 } else {
4486 normalized
4487 };
4488
4489 // Snowflake source to DuckDB target: RANDOM()/RANDOM(seed) -> scaled RANDOM()
4490 // Snowflake RANDOM() returns integer in [-2^63, 2^63-1], DuckDB RANDOM() returns float [0, 1)
4491 // Skip RANDOM inside UNIFORM/NORMAL/ZIPF/RANDSTR generator args since those
4492 // functions handle their generator args differently (as float seeds).
4493 let normalized = if matches!(self.dialect_type, DialectType::Snowflake)
4494 && matches!(target, DialectType::DuckDB)
4495 {
4496 fn make_scaled_random() -> Expression {
4497 let lower =
4498 Expression::Literal(Box::new(crate::expressions::Literal::Number(
4499 "-9.223372036854776E+18".to_string(),
4500 )));
4501 let upper =
4502 Expression::Literal(Box::new(crate::expressions::Literal::Number(
4503 "9.223372036854776e+18".to_string(),
4504 )));
4505 let random_call = Expression::Random(crate::expressions::Random);
4506 let range_size = Expression::Paren(Box::new(crate::expressions::Paren {
4507 this: Expression::Sub(Box::new(crate::expressions::BinaryOp {
4508 left: upper,
4509 right: lower.clone(),
4510 left_comments: vec![],
4511 operator_comments: vec![],
4512 trailing_comments: vec![],
4513 inferred_type: None,
4514 })),
4515 trailing_comments: vec![],
4516 }));
4517 let scaled = Expression::Mul(Box::new(crate::expressions::BinaryOp {
4518 left: random_call,
4519 right: range_size,
4520 left_comments: vec![],
4521 operator_comments: vec![],
4522 trailing_comments: vec![],
4523 inferred_type: None,
4524 }));
4525 let shifted = Expression::Add(Box::new(crate::expressions::BinaryOp {
4526 left: lower,
4527 right: scaled,
4528 left_comments: vec![],
4529 operator_comments: vec![],
4530 trailing_comments: vec![],
4531 inferred_type: None,
4532 }));
4533 Expression::Cast(Box::new(crate::expressions::Cast {
4534 this: shifted,
4535 to: crate::expressions::DataType::BigInt { length: None },
4536 trailing_comments: vec![],
4537 double_colon_syntax: false,
4538 format: None,
4539 default: None,
4540 inferred_type: None,
4541 }))
4542 }
4543
4544 // Pre-process: protect seeded RANDOM(seed) inside UNIFORM/NORMAL/ZIPF/RANDSTR
4545 // by converting Rand{seed: Some(s)} to Function{name:"RANDOM", args:[s]}.
4546 // This prevents transform_recursive (which is bottom-up) from expanding
4547 // seeded RANDOM into make_scaled_random() and losing the seed value.
4548 // Unseeded RANDOM()/Rand{seed:None} is left as-is so it gets expanded
4549 // and then un-expanded back to Expression::Random by the code below.
4550 let normalized = transform_recursive(normalized, &|e| {
4551 if let Expression::Function(ref f) = e {
4552 let n = f.name.to_ascii_uppercase();
4553 if n == "UNIFORM" || n == "NORMAL" || n == "ZIPF" || n == "RANDSTR" {
4554 if let Expression::Function(mut f) = e {
4555 for arg in f.args.iter_mut() {
4556 if let Expression::Rand(ref r) = arg {
4557 if r.lower.is_none() && r.upper.is_none() {
4558 if let Some(ref seed) = r.seed {
4559 // Convert Rand{seed: Some(s)} to Function("RANDOM", [s])
4560 // so it won't be expanded by the RANDOM expansion below
4561 *arg = Expression::Function(Box::new(
4562 crate::expressions::Function::new(
4563 "RANDOM".to_string(),
4564 vec![*seed.clone()],
4565 ),
4566 ));
4567 }
4568 }
4569 }
4570 }
4571 return Ok(Expression::Function(f));
4572 }
4573 }
4574 }
4575 Ok(e)
4576 })?;
4577
4578 // transform_recursive processes bottom-up, so RANDOM() (unseeded) inside
4579 // generator functions (UNIFORM, NORMAL, ZIPF) gets expanded before
4580 // we see the parent. We detect this and undo the expansion by replacing
4581 // the expanded pattern back with Expression::Random.
4582 // Seeded RANDOM(seed) was already protected above as Function("RANDOM", [seed]).
4583 // Note: RANDSTR is NOT included here — it needs the expanded form for unseeded
4584 // RANDOM() since the DuckDB handler uses the expanded SQL as-is in the hash.
4585 transform_recursive(normalized, &|e| {
4586 if let Expression::Function(ref f) = e {
4587 let n = f.name.to_ascii_uppercase();
4588 if n == "UNIFORM" || n == "NORMAL" || n == "ZIPF" {
4589 if let Expression::Function(mut f) = e {
4590 for arg in f.args.iter_mut() {
4591 // Detect expanded RANDOM pattern: CAST(-9.22... + RANDOM() * (...) AS BIGINT)
4592 if let Expression::Cast(ref cast) = arg {
4593 if matches!(
4594 cast.to,
4595 crate::expressions::DataType::BigInt { .. }
4596 ) {
4597 if let Expression::Add(ref add) = cast.this {
4598 if let Expression::Literal(ref lit) = add.left {
4599 if let crate::expressions::Literal::Number(
4600 ref num,
4601 ) = lit.as_ref()
4602 {
4603 if num == "-9.223372036854776E+18" {
4604 *arg = Expression::Random(
4605 crate::expressions::Random,
4606 );
4607 }
4608 }
4609 }
4610 }
4611 }
4612 }
4613 }
4614 return Ok(Expression::Function(f));
4615 }
4616 return Ok(e);
4617 }
4618 }
4619 match e {
4620 Expression::Random(_) => Ok(make_scaled_random()),
4621 // Rand(seed) with no bounds: drop seed and expand
4622 // (DuckDB RANDOM doesn't support seeds)
4623 Expression::Rand(ref r) if r.lower.is_none() && r.upper.is_none() => {
4624 Ok(make_scaled_random())
4625 }
4626 _ => Ok(e),
4627 }
4628 })?
4629 } else {
4630 normalized
4631 };
4632
4633 // Apply cross-dialect semantic normalizations
4634 let normalized =
4635 Self::cross_dialect_normalize(normalized, self.dialect_type, target)?;
4636
4637 let normalized = if matches!(self.dialect_type, DialectType::SQLite)
4638 && !matches!(target, DialectType::SQLite)
4639 {
4640 Self::normalize_sqlite_double_quoted_defaults(normalized)?
4641 } else {
4642 normalized
4643 };
4644
4645 let normalized = if matches!(self.dialect_type, DialectType::PostgreSQL)
4646 && matches!(target, DialectType::SQLite)
4647 {
4648 Self::normalize_postgres_to_sqlite_types(normalized)?
4649 } else {
4650 normalized
4651 };
4652
4653 let normalized = if matches!(self.dialect_type, DialectType::PostgreSQL)
4654 && matches!(target, DialectType::Fabric)
4655 {
4656 Self::normalize_postgres_to_fabric_decimal_types(normalized)?
4657 } else {
4658 normalized
4659 };
4660
4661 // For DuckDB target from BigQuery source: wrap UNNEST of struct arrays in
4662 // (SELECT UNNEST(..., max_depth => 2)) subquery
4663 // Must run BEFORE unnest_alias_to_column_alias since it changes alias structure
4664 let normalized = if matches!(self.dialect_type, DialectType::BigQuery)
4665 && matches!(target, DialectType::DuckDB)
4666 {
4667 crate::transforms::wrap_duckdb_unnest_struct(normalized)?
4668 } else {
4669 normalized
4670 };
4671
4672 // Convert BigQuery UNNEST aliases to column-alias format for DuckDB/Presto/Spark
4673 // UNNEST(arr) AS x -> UNNEST(arr) AS _t0(x)
4674 let normalized = if matches!(self.dialect_type, DialectType::BigQuery)
4675 && matches!(
4676 target,
4677 DialectType::DuckDB
4678 | DialectType::Presto
4679 | DialectType::Trino
4680 | DialectType::Athena
4681 | DialectType::Spark
4682 | DialectType::Databricks
4683 ) {
4684 crate::transforms::unnest_alias_to_column_alias(normalized)?
4685 } else if matches!(self.dialect_type, DialectType::BigQuery)
4686 && matches!(target, DialectType::BigQuery | DialectType::Redshift)
4687 {
4688 // For BigQuery/Redshift targets: move UNNEST FROM items to CROSS JOINs
4689 // but don't convert alias format (no _t0 wrapper)
4690 let result = crate::transforms::unnest_from_to_cross_join(normalized)?;
4691 // For Redshift: strip UNNEST when arg is a column reference path
4692 if matches!(target, DialectType::Redshift) {
4693 crate::transforms::strip_unnest_column_refs(result)?
4694 } else {
4695 result
4696 }
4697 } else {
4698 normalized
4699 };
4700
4701 // For Presto/Trino targets from PostgreSQL/Redshift source:
4702 // Wrap UNNEST aliases from GENERATE_SERIES conversion: AS s -> AS _u(s)
4703 let normalized = if matches!(
4704 self.dialect_type,
4705 DialectType::PostgreSQL | DialectType::Redshift
4706 ) && matches!(
4707 target,
4708 DialectType::Presto | DialectType::Trino | DialectType::Athena
4709 ) {
4710 crate::transforms::wrap_unnest_join_aliases(normalized)?
4711 } else {
4712 normalized
4713 };
4714
4715 // Eliminate DISTINCT ON with target-dialect awareness
4716 // This must happen after source transform (which may produce DISTINCT ON)
4717 // and before target transform, with knowledge of the target dialect's NULL ordering behavior
4718 let normalized = crate::transforms::eliminate_distinct_on_for_dialect(
4719 normalized,
4720 Some(target),
4721 Some(self.dialect_type),
4722 )?;
4723
4724 // GENERATE_DATE_ARRAY in UNNEST -> Snowflake ARRAY_GENERATE_RANGE + DATEADD
4725 let normalized = if matches!(target, DialectType::Snowflake) {
4726 Self::transform_generate_date_array_snowflake(normalized)?
4727 } else {
4728 normalized
4729 };
4730
4731 // CROSS JOIN UNNEST -> LATERAL VIEW EXPLODE/INLINE for Spark/Hive/Databricks
4732 let normalized = if matches!(
4733 target,
4734 DialectType::Spark | DialectType::Databricks | DialectType::Hive
4735 ) {
4736 crate::transforms::unnest_to_explode_select(normalized)?
4737 } else {
4738 normalized
4739 };
4740
4741 // Wrap UNION with ORDER BY/LIMIT in a subquery for dialects that require it
4742 let normalized = if matches!(target, DialectType::ClickHouse | DialectType::TSQL) {
4743 crate::transforms::no_limit_order_by_union(normalized)?
4744 } else {
4745 normalized
4746 };
4747
4748 // TSQL: Convert COUNT(*) -> COUNT_BIG(*) when source is not TSQL/Fabric
4749 // Python sqlglot does this in the TSQL generator, but we can't do it there
4750 // because it would break TSQL -> TSQL identity
4751 let normalized = if matches!(target, DialectType::TSQL | DialectType::Fabric)
4752 && !matches!(self.dialect_type, DialectType::TSQL | DialectType::Fabric)
4753 {
4754 transform_recursive(normalized, &|e| {
4755 if let Expression::Count(ref c) = e {
4756 // Build COUNT_BIG(...) as an AggregateFunction
4757 let args = if c.star {
4758 vec![Expression::Star(crate::expressions::Star {
4759 table: None,
4760 except: None,
4761 replace: None,
4762 rename: None,
4763 trailing_comments: Vec::new(),
4764 span: None,
4765 })]
4766 } else if let Some(ref this) = c.this {
4767 vec![this.clone()]
4768 } else {
4769 vec![]
4770 };
4771 Ok(Expression::AggregateFunction(Box::new(
4772 crate::expressions::AggregateFunction {
4773 name: "COUNT_BIG".to_string(),
4774 args,
4775 distinct: c.distinct,
4776 filter: c.filter.clone(),
4777 order_by: Vec::new(),
4778 limit: None,
4779 ignore_nulls: None,
4780 inferred_type: None,
4781 },
4782 )))
4783 } else {
4784 Ok(e)
4785 }
4786 })?
4787 } else {
4788 normalized
4789 };
4790
4791 // T-SQL/Fabric do not have a scalar boolean type. Keep predicate
4792 // contexts intact, but materialize boolean-valued expressions used
4793 // as values before target transforms add ORDER BY null sort keys.
4794 let normalized = if matches!(target, DialectType::TSQL | DialectType::Fabric)
4795 && !matches!(self.dialect_type, DialectType::TSQL | DialectType::Fabric)
4796 {
4797 Self::rewrite_boolean_values_for_tsql(normalized)?
4798 } else {
4799 normalized
4800 };
4801
4802 let transformed = target_dialect.transform(normalized)?;
4803
4804 // T-SQL and Fabric do not support aggregate FILTER clauses. Rewrite any
4805 // remaining filters after target transforms so special aggregate rewrites
4806 // (for example BOOL_OR/BOOL_AND) can consume their filters first.
4807 let transformed = if matches!(target, DialectType::TSQL | DialectType::Fabric) {
4808 Self::rewrite_aggregate_filters_for_tsql(transformed)?
4809 } else {
4810 transformed
4811 };
4812
4813 // DuckDB target: when FROM is RANGE(n), replace SEQ's ROW_NUMBER pattern with `range`
4814 let transformed = if matches!(target, DialectType::DuckDB) {
4815 Self::seq_rownum_to_range(transformed)?
4816 } else {
4817 transformed
4818 };
4819
4820 Self::reject_strict_unsupported(&transformed, self.dialect_type, target, opts)?;
4821
4822 let mut sql = target_dialect.generate_with_transpile_options(
4823 &transformed,
4824 self.dialect_type,
4825 opts,
4826 )?;
4827
4828 // Align a known Snowflake pretty-print edge case with Python sqlglot output.
4829 if opts.pretty && target == DialectType::Snowflake {
4830 sql = Self::normalize_snowflake_pretty(sql);
4831 }
4832
4833 Ok(sql)
4834 })
4835 .collect()
4836 }
4837}
4838
4839// Transpile-only methods: cross-dialect normalization and helpers
4840#[cfg(feature = "transpile")]
4841impl Dialect {
4842 fn reject_strict_unsupported(
4843 expr: &Expression,
4844 source: DialectType,
4845 target: DialectType,
4846 opts: &TranspileOptions,
4847 ) -> Result<()> {
4848 if !matches!(
4849 opts.unsupported_level,
4850 UnsupportedLevel::Raise | UnsupportedLevel::Immediate
4851 ) {
4852 return Ok(());
4853 }
4854
4855 let mut diagnostics = Vec::new();
4856
4857 for node in expr.dfs() {
4858 if matches!(target, DialectType::Fabric | DialectType::Hive)
4859 && Self::node_has_recursive_with(node)
4860 {
4861 Self::push_unsupported_diagnostic(&mut diagnostics, "recursive CTEs");
4862 }
4863
4864 if matches!(target, DialectType::TSQL | DialectType::Fabric)
4865 && Self::node_has_lateral(node)
4866 {
4867 Self::push_unsupported_diagnostic(&mut diagnostics, "LATERAL joins and subqueries");
4868 }
4869
4870 if !Self::target_supports_remaining_unnest(target) && Self::node_is_unnest(node) {
4871 Self::push_unsupported_diagnostic(&mut diagnostics, "UNNEST");
4872 }
4873
4874 if !Self::target_supports_remaining_explode(target) && Self::node_is_explode(node) {
4875 Self::push_unsupported_diagnostic(&mut diagnostics, "EXPLODE");
4876 }
4877
4878 if Self::target_lacks_array_agg(target) && Self::node_is_array_agg(node) {
4879 Self::push_unsupported_diagnostic(&mut diagnostics, "ARRAY_AGG");
4880 }
4881
4882 if matches!(source, DialectType::PostgreSQL | DialectType::CockroachDB)
4883 && !matches!(target, DialectType::PostgreSQL | DialectType::CockroachDB)
4884 {
4885 if Self::node_is_function_named(node, "JSONB_BUILD_OBJECT") {
4886 Self::push_unsupported_diagnostic(
4887 &mut diagnostics,
4888 "PostgreSQL JSONB_BUILD_OBJECT",
4889 );
4890 }
4891 if Self::node_is_function_named(node, "TO_TSVECTOR") {
4892 Self::push_unsupported_diagnostic(&mut diagnostics, "PostgreSQL TO_TSVECTOR");
4893 }
4894 }
4895
4896 if opts.unsupported_level == UnsupportedLevel::Immediate && !diagnostics.is_empty() {
4897 break;
4898 }
4899 }
4900
4901 if diagnostics.is_empty() {
4902 return Ok(());
4903 }
4904
4905 let limit = if opts.unsupported_level == UnsupportedLevel::Immediate {
4906 1
4907 } else {
4908 opts.max_unsupported.max(1)
4909 };
4910 let mut messages = diagnostics.iter().take(limit).cloned().collect::<Vec<_>>();
4911 if diagnostics.len() > limit {
4912 messages.push(format!("... and {} more", diagnostics.len() - limit));
4913 }
4914
4915 Err(crate::error::Error::unsupported(
4916 messages.join("; "),
4917 target.to_string(),
4918 ))
4919 }
4920
4921 fn push_unsupported_diagnostic(diagnostics: &mut Vec<String>, message: &str) {
4922 if !diagnostics.iter().any(|existing| existing == message) {
4923 diagnostics.push(message.to_string());
4924 }
4925 }
4926
4927 fn node_has_recursive_with(expr: &Expression) -> bool {
4928 fn recursive(with: &Option<With>) -> bool {
4929 with.as_ref().is_some_and(|with| with.recursive)
4930 }
4931
4932 match expr {
4933 Expression::With(with) => with.recursive,
4934 Expression::Select(select) => recursive(&select.with),
4935 Expression::Union(union) => recursive(&union.with),
4936 Expression::Intersect(intersect) => recursive(&intersect.with),
4937 Expression::Except(except) => recursive(&except.with),
4938 Expression::Pivot(pivot) => recursive(&pivot.with),
4939 Expression::Insert(insert) => recursive(&insert.with),
4940 Expression::Update(update) => recursive(&update.with),
4941 Expression::Delete(delete) => recursive(&delete.with),
4942 _ => false,
4943 }
4944 }
4945
4946 fn node_has_lateral(expr: &Expression) -> bool {
4947 fn joins_have_lateral(joins: &[Join]) -> bool {
4948 joins.iter().any(|join| {
4949 matches!(
4950 join.kind,
4951 crate::expressions::JoinKind::Lateral
4952 | crate::expressions::JoinKind::LeftLateral
4953 )
4954 })
4955 }
4956
4957 match expr {
4958 Expression::Subquery(subquery) => subquery.lateral,
4959 Expression::Lateral(_) | Expression::LateralView(_) => true,
4960 Expression::Join(join) => matches!(
4961 join.kind,
4962 crate::expressions::JoinKind::Lateral | crate::expressions::JoinKind::LeftLateral
4963 ),
4964 Expression::Select(select) => {
4965 !select.lateral_views.is_empty() || joins_have_lateral(&select.joins)
4966 }
4967 Expression::JoinedTable(joined) => {
4968 !joined.lateral_views.is_empty() || joins_have_lateral(&joined.joins)
4969 }
4970 Expression::Update(update) => {
4971 joins_have_lateral(&update.table_joins) || joins_have_lateral(&update.from_joins)
4972 }
4973 _ => false,
4974 }
4975 }
4976
4977 fn target_supports_remaining_unnest(target: DialectType) -> bool {
4978 matches!(
4979 target,
4980 DialectType::PostgreSQL
4981 | DialectType::BigQuery
4982 | DialectType::DuckDB
4983 | DialectType::Presto
4984 | DialectType::Trino
4985 | DialectType::Athena
4986 )
4987 }
4988
4989 fn target_supports_remaining_explode(target: DialectType) -> bool {
4990 matches!(
4991 target,
4992 DialectType::Spark | DialectType::Databricks | DialectType::Hive
4993 )
4994 }
4995
4996 fn target_lacks_array_agg(target: DialectType) -> bool {
4997 matches!(
4998 target,
4999 DialectType::Fabric
5000 | DialectType::TSQL
5001 | DialectType::MySQL
5002 | DialectType::SQLite
5003 | DialectType::Oracle
5004 )
5005 }
5006
5007 fn node_is_unnest(expr: &Expression) -> bool {
5008 matches!(expr, Expression::Unnest(_)) || Self::node_is_function_named(expr, "UNNEST")
5009 }
5010
5011 fn node_is_explode(expr: &Expression) -> bool {
5012 matches!(expr, Expression::Explode(_) | Expression::ExplodeOuter(_))
5013 || Self::node_is_function_named(expr, "EXPLODE")
5014 || Self::node_is_function_named(expr, "EXPLODE_OUTER")
5015 }
5016
5017 fn node_is_array_agg(expr: &Expression) -> bool {
5018 matches!(expr, Expression::ArrayAgg(_)) || Self::node_is_function_named(expr, "ARRAY_AGG")
5019 }
5020
5021 fn node_is_function_named(expr: &Expression, name: &str) -> bool {
5022 match expr {
5023 Expression::Function(function) => function.name.eq_ignore_ascii_case(name),
5024 Expression::AggregateFunction(function) => function.name.eq_ignore_ascii_case(name),
5025 _ => false,
5026 }
5027 }
5028
5029 fn rewrite_boolean_values_for_tsql(expr: Expression) -> Result<Expression> {
5030 match expr {
5031 Expression::Select(select) => Self::rewrite_boolean_values_in_tsql_select(select),
5032 Expression::Subquery(mut subquery) => {
5033 subquery.this = Self::rewrite_boolean_values_for_tsql(subquery.this)?;
5034 Ok(Expression::Subquery(subquery))
5035 }
5036 Expression::Union(mut union) => {
5037 let left = std::mem::replace(&mut union.left, Expression::null());
5038 let right = std::mem::replace(&mut union.right, Expression::null());
5039 union.left = Self::rewrite_boolean_values_for_tsql(left)?;
5040 union.right = Self::rewrite_boolean_values_for_tsql(right)?;
5041 if let Some(mut with) = union.with.take() {
5042 with.ctes = with
5043 .ctes
5044 .into_iter()
5045 .map(|mut cte| {
5046 cte.this = Self::rewrite_boolean_values_for_tsql(cte.this)?;
5047 Ok(cte)
5048 })
5049 .collect::<Result<Vec<_>>>()?;
5050 union.with = Some(with);
5051 }
5052 Ok(Expression::Union(union))
5053 }
5054 Expression::Intersect(mut intersect) => {
5055 let left = std::mem::replace(&mut intersect.left, Expression::null());
5056 let right = std::mem::replace(&mut intersect.right, Expression::null());
5057 intersect.left = Self::rewrite_boolean_values_for_tsql(left)?;
5058 intersect.right = Self::rewrite_boolean_values_for_tsql(right)?;
5059 Ok(Expression::Intersect(intersect))
5060 }
5061 Expression::Except(mut except) => {
5062 let left = std::mem::replace(&mut except.left, Expression::null());
5063 let right = std::mem::replace(&mut except.right, Expression::null());
5064 except.left = Self::rewrite_boolean_values_for_tsql(left)?;
5065 except.right = Self::rewrite_boolean_values_for_tsql(right)?;
5066 Ok(Expression::Except(except))
5067 }
5068 other => Self::rewrite_tsql_boolean_embedded_queries(other),
5069 }
5070 }
5071
5072 fn rewrite_boolean_values_in_tsql_select(
5073 mut select: Box<crate::expressions::Select>,
5074 ) -> Result<Expression> {
5075 if let Some(mut with) = select.with.take() {
5076 with.ctes = with
5077 .ctes
5078 .into_iter()
5079 .map(|mut cte| {
5080 cte.this = Self::rewrite_boolean_values_for_tsql(cte.this)?;
5081 Ok(cte)
5082 })
5083 .collect::<Result<Vec<_>>>()?;
5084 select.with = Some(with);
5085 }
5086
5087 select.expressions = select
5088 .expressions
5089 .into_iter()
5090 .map(Self::rewrite_tsql_boolean_scalar_value)
5091 .collect::<Result<Vec<_>>>()?;
5092
5093 if let Some(mut from) = select.from.take() {
5094 from.expressions = from
5095 .expressions
5096 .into_iter()
5097 .map(Self::rewrite_tsql_boolean_embedded_queries)
5098 .collect::<Result<Vec<_>>>()?;
5099 select.from = Some(from);
5100 }
5101
5102 select.joins = select
5103 .joins
5104 .into_iter()
5105 .map(|mut join| {
5106 join.this = Self::rewrite_tsql_boolean_embedded_queries(join.this)?;
5107 if let Some(on) = join.on.take() {
5108 join.on = Some(Self::rewrite_tsql_boolean_predicate_context(on)?);
5109 }
5110 if let Some(match_condition) = join.match_condition.take() {
5111 join.match_condition = Some(Self::rewrite_tsql_boolean_predicate_context(
5112 match_condition,
5113 )?);
5114 }
5115 join.pivots = join
5116 .pivots
5117 .into_iter()
5118 .map(Self::rewrite_tsql_boolean_embedded_queries)
5119 .collect::<Result<Vec<_>>>()?;
5120 Ok(join)
5121 })
5122 .collect::<Result<Vec<_>>>()?;
5123
5124 select.lateral_views = select
5125 .lateral_views
5126 .into_iter()
5127 .map(|mut lateral_view| {
5128 lateral_view.this = Self::rewrite_tsql_boolean_embedded_queries(lateral_view.this)?;
5129 Ok(lateral_view)
5130 })
5131 .collect::<Result<Vec<_>>>()?;
5132
5133 if let Some(prewhere) = select.prewhere.take() {
5134 select.prewhere = Some(Self::rewrite_tsql_boolean_predicate_context(prewhere)?);
5135 }
5136
5137 if let Some(mut where_clause) = select.where_clause.take() {
5138 where_clause.this = Self::rewrite_tsql_boolean_predicate_context(where_clause.this)?;
5139 select.where_clause = Some(where_clause);
5140 }
5141
5142 if let Some(mut group_by) = select.group_by.take() {
5143 group_by.expressions = group_by
5144 .expressions
5145 .into_iter()
5146 .map(Self::rewrite_tsql_boolean_scalar_value)
5147 .collect::<Result<Vec<_>>>()?;
5148 select.group_by = Some(group_by);
5149 }
5150
5151 if let Some(mut having) = select.having.take() {
5152 having.this = Self::rewrite_tsql_boolean_predicate_context(having.this)?;
5153 select.having = Some(having);
5154 }
5155
5156 if let Some(mut qualify) = select.qualify.take() {
5157 qualify.this = Self::rewrite_tsql_boolean_predicate_context(qualify.this)?;
5158 select.qualify = Some(qualify);
5159 }
5160
5161 if let Some(mut order_by) = select.order_by.take() {
5162 order_by.expressions = Self::rewrite_tsql_boolean_ordered_values(order_by.expressions)?;
5163 select.order_by = Some(order_by);
5164 }
5165
5166 if let Some(mut distribute_by) = select.distribute_by.take() {
5167 distribute_by.expressions = distribute_by
5168 .expressions
5169 .into_iter()
5170 .map(Self::rewrite_tsql_boolean_scalar_value)
5171 .collect::<Result<Vec<_>>>()?;
5172 select.distribute_by = Some(distribute_by);
5173 }
5174
5175 if let Some(mut cluster_by) = select.cluster_by.take() {
5176 cluster_by.expressions =
5177 Self::rewrite_tsql_boolean_ordered_values(cluster_by.expressions)?;
5178 select.cluster_by = Some(cluster_by);
5179 }
5180
5181 if let Some(mut sort_by) = select.sort_by.take() {
5182 sort_by.expressions = Self::rewrite_tsql_boolean_ordered_values(sort_by.expressions)?;
5183 select.sort_by = Some(sort_by);
5184 }
5185
5186 if let Some(limit_by) = select.limit_by.take() {
5187 select.limit_by = Some(
5188 limit_by
5189 .into_iter()
5190 .map(Self::rewrite_tsql_boolean_scalar_value)
5191 .collect::<Result<Vec<_>>>()?,
5192 );
5193 }
5194
5195 if let Some(distinct_on) = select.distinct_on.take() {
5196 select.distinct_on = Some(
5197 distinct_on
5198 .into_iter()
5199 .map(Self::rewrite_tsql_boolean_scalar_value)
5200 .collect::<Result<Vec<_>>>()?,
5201 );
5202 }
5203
5204 if let Some(mut sample) = select.sample.take() {
5205 sample.size = Self::rewrite_tsql_boolean_embedded_queries(sample.size)?;
5206 if let Some(offset) = sample.offset.take() {
5207 sample.offset = Some(Self::rewrite_tsql_boolean_embedded_queries(offset)?);
5208 }
5209 if let Some(bucket_numerator) = sample.bucket_numerator.take() {
5210 sample.bucket_numerator = Some(Box::new(
5211 Self::rewrite_tsql_boolean_embedded_queries(*bucket_numerator)?,
5212 ));
5213 }
5214 if let Some(bucket_denominator) = sample.bucket_denominator.take() {
5215 sample.bucket_denominator = Some(Box::new(
5216 Self::rewrite_tsql_boolean_embedded_queries(*bucket_denominator)?,
5217 ));
5218 }
5219 if let Some(bucket_field) = sample.bucket_field.take() {
5220 sample.bucket_field = Some(Box::new(Self::rewrite_tsql_boolean_embedded_queries(
5221 *bucket_field,
5222 )?));
5223 }
5224 select.sample = Some(sample);
5225 }
5226
5227 if let Some(settings) = select.settings.take() {
5228 select.settings = Some(
5229 settings
5230 .into_iter()
5231 .map(Self::rewrite_tsql_boolean_embedded_queries)
5232 .collect::<Result<Vec<_>>>()?,
5233 );
5234 }
5235
5236 if let Some(format) = select.format.take() {
5237 select.format = Some(Self::rewrite_tsql_boolean_embedded_queries(format)?);
5238 }
5239
5240 if let Some(mut windows) = select.windows.take() {
5241 for window in windows.iter_mut() {
5242 Self::rewrite_tsql_boolean_over_values(&mut window.spec)?;
5243 }
5244 select.windows = Some(windows);
5245 }
5246
5247 Ok(Expression::Select(select))
5248 }
5249
5250 fn rewrite_tsql_boolean_scalar_value(expr: Expression) -> Result<Expression> {
5251 if Self::is_tsql_boolean_value_expression(&expr) {
5252 return Ok(Self::tsql_boolean_value_case(expr));
5253 }
5254
5255 match expr {
5256 Expression::Alias(mut alias) => {
5257 alias.this = Self::rewrite_tsql_boolean_scalar_value(alias.this)?;
5258 Ok(Expression::Alias(alias))
5259 }
5260 Expression::Paren(mut paren) => {
5261 paren.this = Self::rewrite_tsql_boolean_scalar_value(paren.this)?;
5262 Ok(Expression::Paren(paren))
5263 }
5264 Expression::Cast(mut cast) => {
5265 cast.this = Self::rewrite_tsql_boolean_scalar_value(cast.this)?;
5266 if let Some(format) = cast.format.take() {
5267 cast.format = Some(Box::new(Self::rewrite_tsql_boolean_embedded_queries(
5268 *format,
5269 )?));
5270 }
5271 if let Some(default) = cast.default.take() {
5272 cast.default =
5273 Some(Box::new(Self::rewrite_tsql_boolean_scalar_value(*default)?));
5274 }
5275 Ok(Expression::Cast(cast))
5276 }
5277 Expression::TryCast(mut cast) => {
5278 cast.this = Self::rewrite_tsql_boolean_scalar_value(cast.this)?;
5279 if let Some(format) = cast.format.take() {
5280 cast.format = Some(Box::new(Self::rewrite_tsql_boolean_embedded_queries(
5281 *format,
5282 )?));
5283 }
5284 if let Some(default) = cast.default.take() {
5285 cast.default =
5286 Some(Box::new(Self::rewrite_tsql_boolean_scalar_value(*default)?));
5287 }
5288 Ok(Expression::TryCast(cast))
5289 }
5290 Expression::SafeCast(mut cast) => {
5291 cast.this = Self::rewrite_tsql_boolean_scalar_value(cast.this)?;
5292 if let Some(format) = cast.format.take() {
5293 cast.format = Some(Box::new(Self::rewrite_tsql_boolean_embedded_queries(
5294 *format,
5295 )?));
5296 }
5297 if let Some(default) = cast.default.take() {
5298 cast.default =
5299 Some(Box::new(Self::rewrite_tsql_boolean_scalar_value(*default)?));
5300 }
5301 Ok(Expression::SafeCast(cast))
5302 }
5303 Expression::Case(mut case) => {
5304 if let Some(operand) = case.operand.take() {
5305 case.operand = Some(Self::rewrite_tsql_boolean_scalar_value(operand)?);
5306 }
5307 case.whens = case
5308 .whens
5309 .into_iter()
5310 .map(|(condition, result)| {
5311 Ok((
5312 Self::rewrite_tsql_boolean_predicate_context(condition)?,
5313 Self::rewrite_tsql_boolean_scalar_value(result)?,
5314 ))
5315 })
5316 .collect::<Result<Vec<_>>>()?;
5317 if let Some(else_) = case.else_.take() {
5318 case.else_ = Some(Self::rewrite_tsql_boolean_scalar_value(else_)?);
5319 }
5320 Ok(Expression::Case(case))
5321 }
5322 Expression::IfFunc(mut if_func) => {
5323 if_func.condition =
5324 Self::rewrite_tsql_boolean_predicate_context(if_func.condition)?;
5325 if_func.true_value = Self::rewrite_tsql_boolean_scalar_value(if_func.true_value)?;
5326 if let Some(false_value) = if_func.false_value.take() {
5327 if_func.false_value =
5328 Some(Self::rewrite_tsql_boolean_scalar_value(false_value)?);
5329 }
5330 Ok(Expression::IfFunc(if_func))
5331 }
5332 Expression::WindowFunction(mut window_function) => {
5333 window_function.this =
5334 Self::rewrite_tsql_boolean_embedded_queries(window_function.this)?;
5335 Self::rewrite_tsql_boolean_over_values(&mut window_function.over)?;
5336 if let Some(mut keep) = window_function.keep.take() {
5337 keep.order_by = Self::rewrite_tsql_boolean_ordered_values(keep.order_by)?;
5338 window_function.keep = Some(keep);
5339 }
5340 Ok(Expression::WindowFunction(window_function))
5341 }
5342 Expression::WithinGroup(mut within_group) => {
5343 within_group.this = Self::rewrite_tsql_boolean_embedded_queries(within_group.this)?;
5344 within_group.order_by =
5345 Self::rewrite_tsql_boolean_ordered_values(within_group.order_by)?;
5346 Ok(Expression::WithinGroup(within_group))
5347 }
5348 Expression::Subquery(mut subquery) => {
5349 subquery.this = Self::rewrite_boolean_values_for_tsql(subquery.this)?;
5350 Ok(Expression::Subquery(subquery))
5351 }
5352 Expression::Select(select) => Self::rewrite_boolean_values_in_tsql_select(select),
5353 other => Self::rewrite_tsql_boolean_embedded_queries(other),
5354 }
5355 }
5356
5357 fn rewrite_tsql_boolean_predicate_context(expr: Expression) -> Result<Expression> {
5358 Self::rewrite_tsql_boolean_embedded_queries(expr)
5359 }
5360
5361 fn rewrite_tsql_boolean_embedded_queries(expr: Expression) -> Result<Expression> {
5362 transform_recursive(expr, &|e| match e {
5363 Expression::Select(select) => Self::rewrite_boolean_values_in_tsql_select(select),
5364 Expression::Subquery(mut subquery) => {
5365 subquery.this = Self::rewrite_boolean_values_for_tsql(subquery.this)?;
5366 Ok(Expression::Subquery(subquery))
5367 }
5368 Expression::Union(_) | Expression::Intersect(_) | Expression::Except(_) => {
5369 Self::rewrite_boolean_values_for_tsql(e)
5370 }
5371 other => Ok(other),
5372 })
5373 }
5374
5375 fn rewrite_tsql_boolean_ordered_values(
5376 ordered: Vec<crate::expressions::Ordered>,
5377 ) -> Result<Vec<crate::expressions::Ordered>> {
5378 ordered
5379 .into_iter()
5380 .map(|mut ordered| {
5381 ordered.this = Self::rewrite_tsql_boolean_scalar_value(ordered.this)?;
5382 if let Some(with_fill) = ordered.with_fill.take() {
5383 ordered.with_fill = Some(Box::new(
5384 Self::rewrite_tsql_boolean_with_fill_values(*with_fill)?,
5385 ));
5386 }
5387 Ok(ordered)
5388 })
5389 .collect()
5390 }
5391
5392 fn rewrite_tsql_boolean_with_fill_values(
5393 mut with_fill: crate::expressions::WithFill,
5394 ) -> Result<crate::expressions::WithFill> {
5395 if let Some(from) = with_fill.from_.take() {
5396 with_fill.from_ = Some(Box::new(Self::rewrite_tsql_boolean_scalar_value(*from)?));
5397 }
5398 if let Some(to) = with_fill.to.take() {
5399 with_fill.to = Some(Box::new(Self::rewrite_tsql_boolean_scalar_value(*to)?));
5400 }
5401 if let Some(step) = with_fill.step.take() {
5402 with_fill.step = Some(Box::new(Self::rewrite_tsql_boolean_scalar_value(*step)?));
5403 }
5404 if let Some(staleness) = with_fill.staleness.take() {
5405 with_fill.staleness = Some(Box::new(Self::rewrite_tsql_boolean_scalar_value(
5406 *staleness,
5407 )?));
5408 }
5409 if let Some(interpolate) = with_fill.interpolate.take() {
5410 with_fill.interpolate = Some(Box::new(Self::rewrite_tsql_boolean_scalar_value(
5411 *interpolate,
5412 )?));
5413 }
5414 Ok(with_fill)
5415 }
5416
5417 fn rewrite_tsql_boolean_over_values(over: &mut crate::expressions::Over) -> Result<()> {
5418 over.partition_by = std::mem::take(&mut over.partition_by)
5419 .into_iter()
5420 .map(Self::rewrite_tsql_boolean_scalar_value)
5421 .collect::<Result<Vec<_>>>()?;
5422 over.order_by =
5423 Self::rewrite_tsql_boolean_ordered_values(std::mem::take(&mut over.order_by))?;
5424 Ok(())
5425 }
5426
5427 fn is_tsql_boolean_value_expression(expr: &Expression) -> bool {
5428 match expr {
5429 Expression::Paren(paren) => Self::is_tsql_boolean_value_expression(&paren.this),
5430 Expression::Eq(_)
5431 | Expression::Neq(_)
5432 | Expression::Lt(_)
5433 | Expression::Lte(_)
5434 | Expression::Gt(_)
5435 | Expression::Gte(_)
5436 | Expression::Is(_)
5437 | Expression::IsNull(_)
5438 | Expression::IsTrue(_)
5439 | Expression::IsFalse(_)
5440 | Expression::Like(_)
5441 | Expression::ILike(_)
5442 | Expression::SimilarTo(_)
5443 | Expression::Glob(_)
5444 | Expression::RegexpLike(_)
5445 | Expression::In(_)
5446 | Expression::Between(_)
5447 | Expression::Exists(_)
5448 | Expression::And(_)
5449 | Expression::Or(_)
5450 | Expression::Not(_)
5451 | Expression::Any(_)
5452 | Expression::All(_)
5453 | Expression::EqualNull(_) => true,
5454 _ => false,
5455 }
5456 }
5457
5458 fn tsql_boolean_value_case(predicate: Expression) -> Expression {
5459 Expression::Case(Box::new(crate::expressions::Case {
5460 operand: None,
5461 whens: vec![
5462 (predicate.clone(), Expression::number(1)),
5463 (
5464 Expression::Not(Box::new(crate::expressions::UnaryOp {
5465 this: predicate,
5466 inferred_type: None,
5467 })),
5468 Expression::number(0),
5469 ),
5470 ],
5471 else_: None,
5472 comments: Vec::new(),
5473 inferred_type: None,
5474 }))
5475 }
5476
5477 fn rewrite_aggregate_filters_for_tsql(expr: Expression) -> Result<Expression> {
5478 transform_recursive(expr, &|e| Self::rewrite_aggregate_filter_for_tsql(e))
5479 }
5480
5481 fn rewrite_aggregate_filter_for_tsql(expr: Expression) -> Result<Expression> {
5482 macro_rules! rewrite_agg_filter {
5483 ($variant:ident, $agg:expr) => {{
5484 let mut agg = $agg;
5485 if let Some(filter) = agg.filter.take() {
5486 let this = std::mem::replace(&mut agg.this, Expression::null());
5487 agg.this = Self::conditional_aggregate_value_for_tsql(filter, this);
5488 }
5489 Ok(Expression::$variant(agg))
5490 }};
5491 }
5492
5493 match expr {
5494 Expression::Filter(filter) => {
5495 let condition = match *filter.expression {
5496 Expression::Where(where_) => where_.this,
5497 other => other,
5498 };
5499 Ok(Self::push_filter_into_tsql_aggregate(
5500 *filter.this,
5501 condition,
5502 ))
5503 }
5504 Expression::AggregateFunction(mut agg) => {
5505 if let Some(filter) = agg.filter.take() {
5506 Self::rewrite_generic_aggregate_filter_for_tsql(&mut agg, filter);
5507 }
5508 Ok(Expression::AggregateFunction(agg))
5509 }
5510 Expression::Count(mut count) => {
5511 if let Some(filter) = count.filter.take() {
5512 let value = if count.star {
5513 Expression::number(1)
5514 } else {
5515 count.this.take().unwrap_or_else(|| Expression::number(1))
5516 };
5517 count.star = false;
5518 count.this = Some(Self::conditional_aggregate_value_for_tsql(filter, value));
5519 }
5520 Ok(Expression::Count(count))
5521 }
5522 Expression::Sum(agg) => rewrite_agg_filter!(Sum, agg),
5523 Expression::Avg(agg) => rewrite_agg_filter!(Avg, agg),
5524 Expression::Min(agg) => rewrite_agg_filter!(Min, agg),
5525 Expression::Max(agg) => rewrite_agg_filter!(Max, agg),
5526 Expression::ArrayAgg(agg) => rewrite_agg_filter!(ArrayAgg, agg),
5527 Expression::CountIf(agg) => Ok(Expression::CountIf(agg)),
5528 Expression::Stddev(agg) => rewrite_agg_filter!(Stddev, agg),
5529 Expression::StddevPop(agg) => rewrite_agg_filter!(StddevPop, agg),
5530 Expression::StddevSamp(agg) => rewrite_agg_filter!(StddevSamp, agg),
5531 Expression::Variance(agg) => rewrite_agg_filter!(Variance, agg),
5532 Expression::VarPop(agg) => rewrite_agg_filter!(VarPop, agg),
5533 Expression::VarSamp(agg) => rewrite_agg_filter!(VarSamp, agg),
5534 Expression::Median(agg) => rewrite_agg_filter!(Median, agg),
5535 Expression::Mode(agg) => rewrite_agg_filter!(Mode, agg),
5536 Expression::First(agg) => rewrite_agg_filter!(First, agg),
5537 Expression::Last(agg) => rewrite_agg_filter!(Last, agg),
5538 Expression::AnyValue(agg) => rewrite_agg_filter!(AnyValue, agg),
5539 Expression::ApproxDistinct(agg) => rewrite_agg_filter!(ApproxDistinct, agg),
5540 Expression::ApproxCountDistinct(agg) => {
5541 rewrite_agg_filter!(ApproxCountDistinct, agg)
5542 }
5543 Expression::LogicalAnd(agg) => rewrite_agg_filter!(LogicalAnd, agg),
5544 Expression::LogicalOr(agg) => rewrite_agg_filter!(LogicalOr, agg),
5545 Expression::Skewness(agg) => rewrite_agg_filter!(Skewness, agg),
5546 Expression::ArrayConcatAgg(agg) => rewrite_agg_filter!(ArrayConcatAgg, agg),
5547 Expression::ArrayUniqueAgg(agg) => rewrite_agg_filter!(ArrayUniqueAgg, agg),
5548 Expression::BoolXorAgg(agg) => rewrite_agg_filter!(BoolXorAgg, agg),
5549 Expression::BitwiseAndAgg(agg) => rewrite_agg_filter!(BitwiseAndAgg, agg),
5550 Expression::BitwiseOrAgg(agg) => rewrite_agg_filter!(BitwiseOrAgg, agg),
5551 Expression::BitwiseXorAgg(agg) => rewrite_agg_filter!(BitwiseXorAgg, agg),
5552 Expression::StringAgg(mut agg) => {
5553 if let Some(filter) = agg.filter.take() {
5554 let this = std::mem::replace(&mut agg.this, Expression::null());
5555 agg.this = Self::conditional_aggregate_value_for_tsql(filter, this);
5556 }
5557 Ok(Expression::StringAgg(agg))
5558 }
5559 Expression::GroupConcat(mut agg) => {
5560 if let Some(filter) = agg.filter.take() {
5561 let this = std::mem::replace(&mut agg.this, Expression::null());
5562 agg.this = Self::conditional_aggregate_value_for_tsql(filter, this);
5563 }
5564 Ok(Expression::GroupConcat(agg))
5565 }
5566 Expression::ListAgg(mut agg) => {
5567 if let Some(filter) = agg.filter.take() {
5568 let this = std::mem::replace(&mut agg.this, Expression::null());
5569 agg.this = Self::conditional_aggregate_value_for_tsql(filter, this);
5570 }
5571 Ok(Expression::ListAgg(agg))
5572 }
5573 Expression::WithinGroup(mut within_group) => {
5574 within_group.this = Self::rewrite_aggregate_filters_for_tsql(within_group.this)?;
5575 Ok(Expression::WithinGroup(within_group))
5576 }
5577 other => Ok(other),
5578 }
5579 }
5580
5581 fn push_filter_into_tsql_aggregate(expr: Expression, filter: Expression) -> Expression {
5582 macro_rules! push_agg_filter {
5583 ($variant:ident, $agg:expr) => {{
5584 let mut agg = $agg;
5585 let this = std::mem::replace(&mut agg.this, Expression::null());
5586 agg.this = Self::conditional_aggregate_value_for_tsql(filter, this);
5587 agg.filter = None;
5588 Expression::$variant(agg)
5589 }};
5590 }
5591
5592 match expr {
5593 Expression::AggregateFunction(mut agg) => {
5594 Self::rewrite_generic_aggregate_filter_for_tsql(&mut agg, filter);
5595 Expression::AggregateFunction(agg)
5596 }
5597 Expression::Count(mut count) => {
5598 let value = if count.star {
5599 Expression::number(1)
5600 } else {
5601 count.this.take().unwrap_or_else(|| Expression::number(1))
5602 };
5603 count.star = false;
5604 count.filter = None;
5605 count.this = Some(Self::conditional_aggregate_value_for_tsql(filter, value));
5606 Expression::Count(count)
5607 }
5608 Expression::Sum(agg) => push_agg_filter!(Sum, agg),
5609 Expression::Avg(agg) => push_agg_filter!(Avg, agg),
5610 Expression::Min(agg) => push_agg_filter!(Min, agg),
5611 Expression::Max(agg) => push_agg_filter!(Max, agg),
5612 Expression::ArrayAgg(agg) => push_agg_filter!(ArrayAgg, agg),
5613 Expression::CountIf(mut agg) => {
5614 agg.filter = Some(filter);
5615 Expression::CountIf(agg)
5616 }
5617 Expression::Stddev(agg) => push_agg_filter!(Stddev, agg),
5618 Expression::StddevPop(agg) => push_agg_filter!(StddevPop, agg),
5619 Expression::StddevSamp(agg) => push_agg_filter!(StddevSamp, agg),
5620 Expression::Variance(agg) => push_agg_filter!(Variance, agg),
5621 Expression::VarPop(agg) => push_agg_filter!(VarPop, agg),
5622 Expression::VarSamp(agg) => push_agg_filter!(VarSamp, agg),
5623 Expression::Median(agg) => push_agg_filter!(Median, agg),
5624 Expression::Mode(agg) => push_agg_filter!(Mode, agg),
5625 Expression::First(agg) => push_agg_filter!(First, agg),
5626 Expression::Last(agg) => push_agg_filter!(Last, agg),
5627 Expression::AnyValue(agg) => push_agg_filter!(AnyValue, agg),
5628 Expression::ApproxDistinct(agg) => push_agg_filter!(ApproxDistinct, agg),
5629 Expression::ApproxCountDistinct(agg) => {
5630 push_agg_filter!(ApproxCountDistinct, agg)
5631 }
5632 Expression::LogicalAnd(agg) => push_agg_filter!(LogicalAnd, agg),
5633 Expression::LogicalOr(agg) => push_agg_filter!(LogicalOr, agg),
5634 Expression::Skewness(agg) => push_agg_filter!(Skewness, agg),
5635 Expression::ArrayConcatAgg(agg) => push_agg_filter!(ArrayConcatAgg, agg),
5636 Expression::ArrayUniqueAgg(agg) => push_agg_filter!(ArrayUniqueAgg, agg),
5637 Expression::BoolXorAgg(agg) => push_agg_filter!(BoolXorAgg, agg),
5638 Expression::BitwiseAndAgg(agg) => push_agg_filter!(BitwiseAndAgg, agg),
5639 Expression::BitwiseOrAgg(agg) => push_agg_filter!(BitwiseOrAgg, agg),
5640 Expression::BitwiseXorAgg(agg) => push_agg_filter!(BitwiseXorAgg, agg),
5641 Expression::StringAgg(mut agg) => {
5642 let this = std::mem::replace(&mut agg.this, Expression::null());
5643 agg.this = Self::conditional_aggregate_value_for_tsql(filter, this);
5644 agg.filter = None;
5645 Expression::StringAgg(agg)
5646 }
5647 Expression::GroupConcat(mut agg) => {
5648 let this = std::mem::replace(&mut agg.this, Expression::null());
5649 agg.this = Self::conditional_aggregate_value_for_tsql(filter, this);
5650 agg.filter = None;
5651 Expression::GroupConcat(agg)
5652 }
5653 Expression::ListAgg(mut agg) => {
5654 let this = std::mem::replace(&mut agg.this, Expression::null());
5655 agg.this = Self::conditional_aggregate_value_for_tsql(filter, this);
5656 agg.filter = None;
5657 Expression::ListAgg(agg)
5658 }
5659 Expression::WithinGroup(mut within_group) => {
5660 within_group.this =
5661 Self::push_filter_into_tsql_aggregate(within_group.this, filter);
5662 Expression::WithinGroup(within_group)
5663 }
5664 other => Expression::Filter(Box::new(crate::expressions::Filter {
5665 this: Box::new(other),
5666 expression: Box::new(filter),
5667 })),
5668 }
5669 }
5670
5671 fn rewrite_generic_aggregate_filter_for_tsql(
5672 agg: &mut crate::expressions::AggregateFunction,
5673 filter: Expression,
5674 ) {
5675 let is_count =
5676 agg.name.eq_ignore_ascii_case("COUNT") || agg.name.eq_ignore_ascii_case("COUNT_BIG");
5677 let is_count_star = is_count
5678 && (agg.args.is_empty()
5679 || (agg.args.len() == 1 && matches!(agg.args[0], Expression::Star(_))));
5680
5681 if is_count_star {
5682 agg.args = vec![Self::conditional_aggregate_value_for_tsql(
5683 filter,
5684 Expression::number(1),
5685 )];
5686 } else if !agg.args.is_empty() {
5687 agg.args = agg
5688 .args
5689 .drain(..)
5690 .map(|arg| Self::conditional_aggregate_value_for_tsql(filter.clone(), arg))
5691 .collect();
5692 } else {
5693 agg.filter = Some(filter);
5694 }
5695 }
5696
5697 fn conditional_aggregate_value_for_tsql(filter: Expression, value: Expression) -> Expression {
5698 Expression::Case(Box::new(crate::expressions::Case {
5699 operand: None,
5700 whens: vec![(filter, value)],
5701 else_: None,
5702 comments: Vec::new(),
5703 inferred_type: None,
5704 }))
5705 }
5706
5707 fn reject_pgvector_distance_operators_for_sqlite(&self, sql: &str) -> Result<()> {
5708 let tokens = self.tokenize(sql)?;
5709 for (i, token) in tokens.iter().enumerate() {
5710 if token.token_type == TokenType::NullsafeEq {
5711 return Err(crate::error::Error::unsupported(
5712 "PostgreSQL pgvector cosine distance operator <=>",
5713 "SQLite",
5714 ));
5715 }
5716 if token.token_type == TokenType::Lt
5717 && tokens
5718 .get(i + 1)
5719 .is_some_and(|token| token.token_type == TokenType::Tilde)
5720 && tokens
5721 .get(i + 2)
5722 .is_some_and(|token| token.token_type == TokenType::Gt)
5723 {
5724 return Err(crate::error::Error::unsupported(
5725 "PostgreSQL pgvector Hamming distance operator <~>",
5726 "SQLite",
5727 ));
5728 }
5729 }
5730 Ok(())
5731 }
5732
5733 fn normalize_sqlite_double_quoted_defaults(expr: Expression) -> Result<Expression> {
5734 fn normalize_default_expr(expr: Expression) -> Result<Expression> {
5735 transform_recursive(expr, &|e| match e {
5736 Expression::Column(col)
5737 if col.table.is_none() && col.name.quoted && !col.join_mark =>
5738 {
5739 Ok(Expression::Literal(Box::new(Literal::String(
5740 col.name.name,
5741 ))))
5742 }
5743 Expression::Identifier(id) if id.quoted => {
5744 Ok(Expression::Literal(Box::new(Literal::String(id.name))))
5745 }
5746 _ => Ok(e),
5747 })
5748 }
5749
5750 fn normalize_column_default(col: &mut crate::expressions::ColumnDef) -> Result<()> {
5751 if let Some(default) = col.default.take() {
5752 col.default = Some(normalize_default_expr(default)?);
5753 }
5754
5755 for constraint in &mut col.constraints {
5756 if let ColumnConstraint::Default(default) = constraint {
5757 *default = normalize_default_expr(default.clone())?;
5758 }
5759 }
5760
5761 Ok(())
5762 }
5763
5764 transform_recursive(expr, &|e| match e {
5765 Expression::CreateTable(mut ct) => {
5766 for column in &mut ct.columns {
5767 normalize_column_default(column)?;
5768 }
5769 Ok(Expression::CreateTable(ct))
5770 }
5771 Expression::ColumnDef(mut col) => {
5772 normalize_column_default(&mut col)?;
5773 Ok(Expression::ColumnDef(col))
5774 }
5775 _ => Ok(e),
5776 })
5777 }
5778
5779 fn normalize_postgres_to_sqlite_types(expr: Expression) -> Result<Expression> {
5780 fn sqlite_type(dt: crate::expressions::DataType) -> crate::expressions::DataType {
5781 use crate::expressions::DataType;
5782
5783 match dt {
5784 DataType::Bit { .. } => DataType::Int {
5785 length: None,
5786 integer_spelling: true,
5787 },
5788 DataType::TextWithLength { .. } => DataType::Text,
5789 DataType::VarChar { .. } => DataType::Text,
5790 DataType::Char { .. } => DataType::Text,
5791 DataType::Timestamp { timezone: true, .. } => DataType::Text,
5792 DataType::Custom { name } => {
5793 let base = name
5794 .split_once('(')
5795 .map_or(name.as_str(), |(base, _)| base)
5796 .trim();
5797 if base.eq_ignore_ascii_case("TSVECTOR")
5798 || base.eq_ignore_ascii_case("TIMESTAMPTZ")
5799 || base.eq_ignore_ascii_case("TIMESTAMP WITH TIME ZONE")
5800 || base.eq_ignore_ascii_case("NVARCHAR")
5801 || base.eq_ignore_ascii_case("NCHAR")
5802 {
5803 DataType::Text
5804 } else {
5805 DataType::Custom { name }
5806 }
5807 }
5808 _ => dt,
5809 }
5810 }
5811
5812 transform_recursive(expr, &|e| match e {
5813 Expression::DataType(dt) => Ok(Expression::DataType(sqlite_type(dt))),
5814 Expression::CreateTable(mut ct) => {
5815 for column in &mut ct.columns {
5816 column.data_type = sqlite_type(column.data_type.clone());
5817 }
5818 Ok(Expression::CreateTable(ct))
5819 }
5820 _ => Ok(e),
5821 })
5822 }
5823
5824 fn normalize_postgres_to_fabric_decimal_types(expr: Expression) -> Result<Expression> {
5825 fn fabric_decimal_type(dt: crate::expressions::DataType) -> crate::expressions::DataType {
5826 use crate::expressions::DataType;
5827
5828 match dt {
5829 DataType::Decimal {
5830 precision: None,
5831 scale: None,
5832 } => DataType::Decimal {
5833 precision: Some(38),
5834 scale: Some(10),
5835 },
5836 _ => dt,
5837 }
5838 }
5839
5840 transform_recursive(expr, &|e| match e {
5841 Expression::DataType(dt) => Ok(Expression::DataType(fabric_decimal_type(dt))),
5842 Expression::CreateTable(mut ct) => {
5843 for column in &mut ct.columns {
5844 column.data_type = fabric_decimal_type(column.data_type.clone());
5845 }
5846 Ok(Expression::CreateTable(ct))
5847 }
5848 Expression::ColumnDef(mut col) => {
5849 col.data_type = fabric_decimal_type(col.data_type);
5850 Ok(Expression::ColumnDef(col))
5851 }
5852 _ => Ok(e),
5853 })
5854 }
5855
5856 /// For DuckDB target: when FROM clause contains RANGE(n), replace
5857 /// `(ROW_NUMBER() OVER (ORDER BY 1 NULLS FIRST) - 1)` with `range` in select expressions.
5858 /// This handles SEQ1/2/4/8 → RANGE transpilation from Snowflake.
5859 fn seq_rownum_to_range(expr: Expression) -> Result<Expression> {
5860 if let Expression::Select(mut select) = expr {
5861 // Check if FROM contains a RANGE function
5862 let has_range_from = if let Some(ref from) = select.from {
5863 from.expressions.iter().any(|e| {
5864 // Check for direct RANGE(...) or aliased RANGE(...)
5865 match e {
5866 Expression::Function(f) => f.name.eq_ignore_ascii_case("RANGE"),
5867 Expression::Alias(a) => {
5868 matches!(&a.this, Expression::Function(f) if f.name.eq_ignore_ascii_case("RANGE"))
5869 }
5870 _ => false,
5871 }
5872 })
5873 } else {
5874 false
5875 };
5876
5877 if has_range_from {
5878 // Replace the ROW_NUMBER pattern in select expressions
5879 select.expressions = select
5880 .expressions
5881 .into_iter()
5882 .map(|e| Self::replace_rownum_with_range(e))
5883 .collect();
5884 }
5885
5886 Ok(Expression::Select(select))
5887 } else {
5888 Ok(expr)
5889 }
5890 }
5891
5892 /// Replace `(ROW_NUMBER() OVER (...) - 1)` with `range` column reference
5893 fn replace_rownum_with_range(expr: Expression) -> Expression {
5894 match expr {
5895 // Match: (ROW_NUMBER() OVER (...) - 1) % N → range % N
5896 Expression::Mod(op) => {
5897 let new_left = Self::try_replace_rownum_paren(&op.left);
5898 Expression::Mod(Box::new(crate::expressions::BinaryOp {
5899 left: new_left,
5900 right: op.right,
5901 left_comments: op.left_comments,
5902 operator_comments: op.operator_comments,
5903 trailing_comments: op.trailing_comments,
5904 inferred_type: op.inferred_type,
5905 }))
5906 }
5907 // Match: (CASE WHEN (ROW...) % N >= ... THEN ... ELSE ... END)
5908 Expression::Paren(p) => {
5909 let inner = Self::replace_rownum_with_range(p.this);
5910 Expression::Paren(Box::new(crate::expressions::Paren {
5911 this: inner,
5912 trailing_comments: p.trailing_comments,
5913 }))
5914 }
5915 Expression::Case(mut c) => {
5916 // Replace ROW_NUMBER in WHEN conditions and THEN expressions
5917 c.whens = c
5918 .whens
5919 .into_iter()
5920 .map(|(cond, then)| {
5921 (
5922 Self::replace_rownum_with_range(cond),
5923 Self::replace_rownum_with_range(then),
5924 )
5925 })
5926 .collect();
5927 if let Some(else_) = c.else_ {
5928 c.else_ = Some(Self::replace_rownum_with_range(else_));
5929 }
5930 Expression::Case(c)
5931 }
5932 Expression::Gte(op) => Expression::Gte(Box::new(crate::expressions::BinaryOp {
5933 left: Self::replace_rownum_with_range(op.left),
5934 right: op.right,
5935 left_comments: op.left_comments,
5936 operator_comments: op.operator_comments,
5937 trailing_comments: op.trailing_comments,
5938 inferred_type: op.inferred_type,
5939 })),
5940 Expression::Sub(op) => Expression::Sub(Box::new(crate::expressions::BinaryOp {
5941 left: Self::replace_rownum_with_range(op.left),
5942 right: op.right,
5943 left_comments: op.left_comments,
5944 operator_comments: op.operator_comments,
5945 trailing_comments: op.trailing_comments,
5946 inferred_type: op.inferred_type,
5947 })),
5948 Expression::Alias(mut a) => {
5949 a.this = Self::replace_rownum_with_range(a.this);
5950 Expression::Alias(a)
5951 }
5952 other => other,
5953 }
5954 }
5955
5956 /// Check if an expression is `(ROW_NUMBER() OVER (...) - 1)` and replace with `range`
5957 fn try_replace_rownum_paren(expr: &Expression) -> Expression {
5958 if let Expression::Paren(ref p) = expr {
5959 if let Expression::Sub(ref sub) = p.this {
5960 if let Expression::WindowFunction(ref wf) = sub.left {
5961 if let Expression::Function(ref f) = wf.this {
5962 if f.name.eq_ignore_ascii_case("ROW_NUMBER") {
5963 if let Expression::Literal(ref lit) = sub.right {
5964 if let crate::expressions::Literal::Number(ref n) = lit.as_ref() {
5965 if n == "1" {
5966 return Expression::column("range");
5967 }
5968 }
5969 }
5970 }
5971 }
5972 }
5973 }
5974 }
5975 expr.clone()
5976 }
5977
5978 /// Transform BigQuery GENERATE_DATE_ARRAY in UNNEST for Snowflake target.
5979 /// Converts:
5980 /// SELECT ..., alias, ... FROM t CROSS JOIN UNNEST(GENERATE_DATE_ARRAY(start, end, INTERVAL '1' unit)) AS alias
5981 /// To:
5982 /// SELECT ..., DATEADD(unit, CAST(alias AS INT), CAST(start AS DATE)) AS alias, ...
5983 /// FROM t, LATERAL FLATTEN(INPUT => ARRAY_GENERATE_RANGE(0, DATEDIFF(unit, start, end) + 1)) AS _t0(seq, key, path, index, alias, this)
5984 fn transform_generate_date_array_snowflake(expr: Expression) -> Result<Expression> {
5985 use crate::expressions::*;
5986 transform_recursive(expr, &|e| {
5987 // Handle ARRAY_SIZE(GENERATE_DATE_ARRAY(...)) -> ARRAY_SIZE((SELECT ARRAY_AGG(*) FROM subquery))
5988 if let Expression::ArraySize(ref af) = e {
5989 if let Expression::Function(ref f) = af.this {
5990 if f.name.eq_ignore_ascii_case("GENERATE_DATE_ARRAY") && f.args.len() >= 2 {
5991 let result = Self::convert_array_size_gda_snowflake(f)?;
5992 return Ok(result);
5993 }
5994 }
5995 }
5996
5997 let Expression::Select(mut sel) = e else {
5998 return Ok(e);
5999 };
6000
6001 // Find joins with UNNEST containing GenerateSeries (from GENERATE_DATE_ARRAY conversion)
6002 let mut gda_info: Option<(String, Expression, Expression, String)> = None; // (alias_name, start_expr, end_expr, unit)
6003 let mut gda_join_idx: Option<usize> = None;
6004
6005 for (idx, join) in sel.joins.iter().enumerate() {
6006 // The join.this may be:
6007 // 1. Unnest(UnnestFunc { alias: Some("mnth"), ... })
6008 // 2. Alias(Alias { this: Unnest(UnnestFunc { alias: None, ... }), alias: "mnth", ... })
6009 let (unnest_ref, alias_name) = match &join.this {
6010 Expression::Unnest(ref unnest) => {
6011 let alias = unnest.alias.as_ref().map(|id| id.name.clone());
6012 (Some(unnest.as_ref()), alias)
6013 }
6014 Expression::Alias(ref a) => {
6015 if let Expression::Unnest(ref unnest) = a.this {
6016 (Some(unnest.as_ref()), Some(a.alias.name.clone()))
6017 } else {
6018 (None, None)
6019 }
6020 }
6021 _ => (None, None),
6022 };
6023
6024 if let (Some(unnest), Some(alias)) = (unnest_ref, alias_name) {
6025 // Check the main expression (this) of the UNNEST for GENERATE_DATE_ARRAY function
6026 if let Expression::Function(ref f) = unnest.this {
6027 if f.name.eq_ignore_ascii_case("GENERATE_DATE_ARRAY") && f.args.len() >= 2 {
6028 let start_expr = f.args[0].clone();
6029 let end_expr = f.args[1].clone();
6030 let step = f.args.get(2).cloned();
6031
6032 // Extract unit from step interval
6033 let unit = if let Some(Expression::Interval(ref iv)) = step {
6034 if let Some(IntervalUnitSpec::Simple { ref unit, .. }) = iv.unit {
6035 Some(format!("{:?}", unit).to_ascii_uppercase())
6036 } else if let Some(ref this) = iv.this {
6037 // The interval may be stored as a string like "1 MONTH"
6038 if let Expression::Literal(lit) = this {
6039 if let Literal::String(ref s) = lit.as_ref() {
6040 let parts: Vec<&str> = s.split_whitespace().collect();
6041 if parts.len() == 2 {
6042 Some(parts[1].to_ascii_uppercase())
6043 } else if parts.len() == 1 {
6044 // Single word like "MONTH" or just "1"
6045 let upper = parts[0].to_ascii_uppercase();
6046 if matches!(
6047 upper.as_str(),
6048 "YEAR"
6049 | "QUARTER"
6050 | "MONTH"
6051 | "WEEK"
6052 | "DAY"
6053 | "HOUR"
6054 | "MINUTE"
6055 | "SECOND"
6056 ) {
6057 Some(upper)
6058 } else {
6059 None
6060 }
6061 } else {
6062 None
6063 }
6064 } else {
6065 None
6066 }
6067 } else {
6068 None
6069 }
6070 } else {
6071 None
6072 }
6073 } else {
6074 None
6075 };
6076
6077 if let Some(unit_str) = unit {
6078 gda_info = Some((alias, start_expr, end_expr, unit_str));
6079 gda_join_idx = Some(idx);
6080 }
6081 }
6082 }
6083 }
6084 if gda_info.is_some() {
6085 break;
6086 }
6087 }
6088
6089 let Some((alias_name, start_expr, end_expr, unit_str)) = gda_info else {
6090 // Also check FROM clause for UNNEST(GENERATE_DATE_ARRAY(...)) patterns
6091 // This handles Generic->Snowflake where GENERATE_DATE_ARRAY is in FROM, not in JOIN
6092 let result = Self::try_transform_from_gda_snowflake(sel);
6093 return result;
6094 };
6095 let join_idx = gda_join_idx.unwrap();
6096
6097 // Build ARRAY_GENERATE_RANGE(0, DATEDIFF(unit, start, end) + 1)
6098 // ARRAY_GENERATE_RANGE uses exclusive end, and we need DATEDIFF + 1 values
6099 // (inclusive date range), so the exclusive end is DATEDIFF + 1.
6100 let datediff = Expression::Function(Box::new(Function::new(
6101 "DATEDIFF".to_string(),
6102 vec![
6103 Expression::boxed_column(Column {
6104 name: Identifier::new(&unit_str),
6105 table: None,
6106 join_mark: false,
6107 trailing_comments: vec![],
6108 span: None,
6109 inferred_type: None,
6110 }),
6111 start_expr.clone(),
6112 end_expr.clone(),
6113 ],
6114 )));
6115 let datediff_plus_one = Expression::Add(Box::new(BinaryOp {
6116 left: datediff,
6117 right: Expression::Literal(Box::new(Literal::Number("1".to_string()))),
6118 left_comments: vec![],
6119 operator_comments: vec![],
6120 trailing_comments: vec![],
6121 inferred_type: None,
6122 }));
6123
6124 let array_gen_range = Expression::Function(Box::new(Function::new(
6125 "ARRAY_GENERATE_RANGE".to_string(),
6126 vec![
6127 Expression::Literal(Box::new(Literal::Number("0".to_string()))),
6128 datediff_plus_one,
6129 ],
6130 )));
6131
6132 // Build FLATTEN(INPUT => ARRAY_GENERATE_RANGE(...))
6133 let flatten_input = Expression::NamedArgument(Box::new(NamedArgument {
6134 name: Identifier::new("INPUT"),
6135 value: array_gen_range,
6136 separator: crate::expressions::NamedArgSeparator::DArrow,
6137 }));
6138 let flatten = Expression::Function(Box::new(Function::new(
6139 "FLATTEN".to_string(),
6140 vec![flatten_input],
6141 )));
6142
6143 // Build LATERAL FLATTEN(...) AS _t0(seq, key, path, index, alias, this)
6144 let alias_table = Alias {
6145 this: flatten,
6146 alias: Identifier::new("_t0"),
6147 column_aliases: vec![
6148 Identifier::new("seq"),
6149 Identifier::new("key"),
6150 Identifier::new("path"),
6151 Identifier::new("index"),
6152 Identifier::new(&alias_name),
6153 Identifier::new("this"),
6154 ],
6155 alias_explicit_as: false,
6156 alias_keyword: None,
6157 pre_alias_comments: vec![],
6158 trailing_comments: vec![],
6159 inferred_type: None,
6160 };
6161 let lateral_expr = Expression::Lateral(Box::new(Lateral {
6162 this: Box::new(Expression::Alias(Box::new(alias_table))),
6163 view: None,
6164 outer: None,
6165 alias: None,
6166 alias_quoted: false,
6167 cross_apply: None,
6168 ordinality: None,
6169 column_aliases: vec![],
6170 }));
6171
6172 // Remove the original join and add to FROM expressions
6173 sel.joins.remove(join_idx);
6174 if let Some(ref mut from) = sel.from {
6175 from.expressions.push(lateral_expr);
6176 }
6177
6178 // Build DATEADD(unit, CAST(alias AS INT), CAST(start AS DATE))
6179 let dateadd_expr = Expression::Function(Box::new(Function::new(
6180 "DATEADD".to_string(),
6181 vec![
6182 Expression::boxed_column(Column {
6183 name: Identifier::new(&unit_str),
6184 table: None,
6185 join_mark: false,
6186 trailing_comments: vec![],
6187 span: None,
6188 inferred_type: None,
6189 }),
6190 Expression::Cast(Box::new(Cast {
6191 this: Expression::boxed_column(Column {
6192 name: Identifier::new(&alias_name),
6193 table: None,
6194 join_mark: false,
6195 trailing_comments: vec![],
6196 span: None,
6197 inferred_type: None,
6198 }),
6199 to: DataType::Int {
6200 length: None,
6201 integer_spelling: false,
6202 },
6203 trailing_comments: vec![],
6204 double_colon_syntax: false,
6205 format: None,
6206 default: None,
6207 inferred_type: None,
6208 })),
6209 Expression::Cast(Box::new(Cast {
6210 this: start_expr.clone(),
6211 to: DataType::Date,
6212 trailing_comments: vec![],
6213 double_colon_syntax: false,
6214 format: None,
6215 default: None,
6216 inferred_type: None,
6217 })),
6218 ],
6219 )));
6220
6221 // Replace references to the alias in the SELECT list
6222 let new_exprs: Vec<Expression> = sel
6223 .expressions
6224 .iter()
6225 .map(|expr| Self::replace_column_ref_with_dateadd(expr, &alias_name, &dateadd_expr))
6226 .collect();
6227 sel.expressions = new_exprs;
6228
6229 Ok(Expression::Select(sel))
6230 })
6231 }
6232
6233 /// Helper: replace column references to `alias_name` with dateadd expression
6234 fn replace_column_ref_with_dateadd(
6235 expr: &Expression,
6236 alias_name: &str,
6237 dateadd: &Expression,
6238 ) -> Expression {
6239 use crate::expressions::*;
6240 match expr {
6241 Expression::Column(c) if c.name.name == alias_name && c.table.is_none() => {
6242 // Plain column reference -> DATEADD(...) AS alias_name
6243 Expression::Alias(Box::new(Alias {
6244 this: dateadd.clone(),
6245 alias: Identifier::new(alias_name),
6246 column_aliases: vec![],
6247 alias_explicit_as: false,
6248 alias_keyword: None,
6249 pre_alias_comments: vec![],
6250 trailing_comments: vec![],
6251 inferred_type: None,
6252 }))
6253 }
6254 Expression::Alias(a) => {
6255 // Check if the inner expression references the alias
6256 let new_this = Self::replace_column_ref_inner(&a.this, alias_name, dateadd);
6257 Expression::Alias(Box::new(Alias {
6258 this: new_this,
6259 alias: a.alias.clone(),
6260 column_aliases: a.column_aliases.clone(),
6261 alias_explicit_as: false,
6262 alias_keyword: None,
6263 pre_alias_comments: a.pre_alias_comments.clone(),
6264 trailing_comments: a.trailing_comments.clone(),
6265 inferred_type: None,
6266 }))
6267 }
6268 _ => expr.clone(),
6269 }
6270 }
6271
6272 /// Helper: replace column references in inner expression (not top-level)
6273 fn replace_column_ref_inner(
6274 expr: &Expression,
6275 alias_name: &str,
6276 dateadd: &Expression,
6277 ) -> Expression {
6278 use crate::expressions::*;
6279 match expr {
6280 Expression::Column(c) if c.name.name == alias_name && c.table.is_none() => {
6281 dateadd.clone()
6282 }
6283 Expression::Add(op) => {
6284 let left = Self::replace_column_ref_inner(&op.left, alias_name, dateadd);
6285 let right = Self::replace_column_ref_inner(&op.right, alias_name, dateadd);
6286 Expression::Add(Box::new(BinaryOp {
6287 left,
6288 right,
6289 left_comments: op.left_comments.clone(),
6290 operator_comments: op.operator_comments.clone(),
6291 trailing_comments: op.trailing_comments.clone(),
6292 inferred_type: None,
6293 }))
6294 }
6295 Expression::Sub(op) => {
6296 let left = Self::replace_column_ref_inner(&op.left, alias_name, dateadd);
6297 let right = Self::replace_column_ref_inner(&op.right, alias_name, dateadd);
6298 Expression::Sub(Box::new(BinaryOp {
6299 left,
6300 right,
6301 left_comments: op.left_comments.clone(),
6302 operator_comments: op.operator_comments.clone(),
6303 trailing_comments: op.trailing_comments.clone(),
6304 inferred_type: None,
6305 }))
6306 }
6307 Expression::Mul(op) => {
6308 let left = Self::replace_column_ref_inner(&op.left, alias_name, dateadd);
6309 let right = Self::replace_column_ref_inner(&op.right, alias_name, dateadd);
6310 Expression::Mul(Box::new(BinaryOp {
6311 left,
6312 right,
6313 left_comments: op.left_comments.clone(),
6314 operator_comments: op.operator_comments.clone(),
6315 trailing_comments: op.trailing_comments.clone(),
6316 inferred_type: None,
6317 }))
6318 }
6319 _ => expr.clone(),
6320 }
6321 }
6322
6323 /// Handle UNNEST(GENERATE_DATE_ARRAY(...)) in FROM clause for Snowflake target.
6324 /// Converts to a subquery with DATEADD + TABLE(FLATTEN(ARRAY_GENERATE_RANGE(...))).
6325 fn try_transform_from_gda_snowflake(
6326 mut sel: Box<crate::expressions::Select>,
6327 ) -> Result<Expression> {
6328 use crate::expressions::*;
6329
6330 // Extract GDA info from FROM clause
6331 let mut gda_info: Option<(
6332 usize,
6333 String,
6334 Expression,
6335 Expression,
6336 String,
6337 Option<(String, Vec<Identifier>)>,
6338 )> = None; // (from_idx, col_name, start, end, unit, outer_alias)
6339
6340 if let Some(ref from) = sel.from {
6341 for (idx, table_expr) in from.expressions.iter().enumerate() {
6342 // Pattern 1: UNNEST(GENERATE_DATE_ARRAY(...))
6343 // Pattern 2: Alias(UNNEST(GENERATE_DATE_ARRAY(...))) AS _q(date_week)
6344 let (unnest_opt, outer_alias_info) = match table_expr {
6345 Expression::Unnest(ref unnest) => (Some(unnest.as_ref()), None),
6346 Expression::Alias(ref a) => {
6347 if let Expression::Unnest(ref unnest) = a.this {
6348 let alias_info = (a.alias.name.clone(), a.column_aliases.clone());
6349 (Some(unnest.as_ref()), Some(alias_info))
6350 } else {
6351 (None, None)
6352 }
6353 }
6354 _ => (None, None),
6355 };
6356
6357 if let Some(unnest) = unnest_opt {
6358 // Check for GENERATE_DATE_ARRAY function
6359 let func_opt = match &unnest.this {
6360 Expression::Function(ref f)
6361 if f.name.eq_ignore_ascii_case("GENERATE_DATE_ARRAY")
6362 && f.args.len() >= 2 =>
6363 {
6364 Some(f)
6365 }
6366 // Also check for GenerateSeries (from earlier normalization)
6367 _ => None,
6368 };
6369
6370 if let Some(f) = func_opt {
6371 let start_expr = f.args[0].clone();
6372 let end_expr = f.args[1].clone();
6373 let step = f.args.get(2).cloned();
6374
6375 // Extract unit and column name
6376 let unit = Self::extract_interval_unit_str(&step);
6377 let col_name = outer_alias_info
6378 .as_ref()
6379 .and_then(|(_, cols)| cols.first().map(|id| id.name.clone()))
6380 .unwrap_or_else(|| "value".to_string());
6381
6382 if let Some(unit_str) = unit {
6383 gda_info = Some((
6384 idx,
6385 col_name,
6386 start_expr,
6387 end_expr,
6388 unit_str,
6389 outer_alias_info,
6390 ));
6391 break;
6392 }
6393 }
6394 }
6395 }
6396 }
6397
6398 let Some((from_idx, col_name, start_expr, end_expr, unit_str, outer_alias_info)) = gda_info
6399 else {
6400 return Ok(Expression::Select(sel));
6401 };
6402
6403 // Build the Snowflake subquery:
6404 // (SELECT DATEADD(unit, CAST(col_name AS INT), CAST(start AS DATE)) AS col_name
6405 // FROM TABLE(FLATTEN(INPUT => ARRAY_GENERATE_RANGE(0, DATEDIFF(unit, start, end) + 1))) AS _t0(seq, key, path, index, col_name, this))
6406
6407 // DATEDIFF(unit, start, end)
6408 let datediff = Expression::Function(Box::new(Function::new(
6409 "DATEDIFF".to_string(),
6410 vec![
6411 Expression::boxed_column(Column {
6412 name: Identifier::new(&unit_str),
6413 table: None,
6414 join_mark: false,
6415 trailing_comments: vec![],
6416 span: None,
6417 inferred_type: None,
6418 }),
6419 start_expr.clone(),
6420 end_expr.clone(),
6421 ],
6422 )));
6423 // DATEDIFF(...) + 1
6424 let datediff_plus_one = Expression::Add(Box::new(BinaryOp {
6425 left: datediff,
6426 right: Expression::Literal(Box::new(Literal::Number("1".to_string()))),
6427 left_comments: vec![],
6428 operator_comments: vec![],
6429 trailing_comments: vec![],
6430 inferred_type: None,
6431 }));
6432
6433 let array_gen_range = Expression::Function(Box::new(Function::new(
6434 "ARRAY_GENERATE_RANGE".to_string(),
6435 vec![
6436 Expression::Literal(Box::new(Literal::Number("0".to_string()))),
6437 datediff_plus_one,
6438 ],
6439 )));
6440
6441 // TABLE(FLATTEN(INPUT => ...))
6442 let flatten_input = Expression::NamedArgument(Box::new(NamedArgument {
6443 name: Identifier::new("INPUT"),
6444 value: array_gen_range,
6445 separator: crate::expressions::NamedArgSeparator::DArrow,
6446 }));
6447 let flatten = Expression::Function(Box::new(Function::new(
6448 "FLATTEN".to_string(),
6449 vec![flatten_input],
6450 )));
6451
6452 // Determine alias name for the table: use outer alias or _t0
6453 let table_alias_name = outer_alias_info
6454 .as_ref()
6455 .map(|(name, _)| name.clone())
6456 .unwrap_or_else(|| "_t0".to_string());
6457
6458 // TABLE(FLATTEN(...)) AS _t0(seq, key, path, index, col_name, this)
6459 let table_func =
6460 Expression::Function(Box::new(Function::new("TABLE".to_string(), vec![flatten])));
6461 let flatten_aliased = Expression::Alias(Box::new(Alias {
6462 this: table_func,
6463 alias: Identifier::new(&table_alias_name),
6464 column_aliases: vec![
6465 Identifier::new("seq"),
6466 Identifier::new("key"),
6467 Identifier::new("path"),
6468 Identifier::new("index"),
6469 Identifier::new(&col_name),
6470 Identifier::new("this"),
6471 ],
6472 alias_explicit_as: false,
6473 alias_keyword: None,
6474 pre_alias_comments: vec![],
6475 trailing_comments: vec![],
6476 inferred_type: None,
6477 }));
6478
6479 // SELECT DATEADD(unit, CAST(col_name AS INT), CAST(start AS DATE)) AS col_name
6480 let dateadd_expr = Expression::Function(Box::new(Function::new(
6481 "DATEADD".to_string(),
6482 vec![
6483 Expression::boxed_column(Column {
6484 name: Identifier::new(&unit_str),
6485 table: None,
6486 join_mark: false,
6487 trailing_comments: vec![],
6488 span: None,
6489 inferred_type: None,
6490 }),
6491 Expression::Cast(Box::new(Cast {
6492 this: Expression::boxed_column(Column {
6493 name: Identifier::new(&col_name),
6494 table: None,
6495 join_mark: false,
6496 trailing_comments: vec![],
6497 span: None,
6498 inferred_type: None,
6499 }),
6500 to: DataType::Int {
6501 length: None,
6502 integer_spelling: false,
6503 },
6504 trailing_comments: vec![],
6505 double_colon_syntax: false,
6506 format: None,
6507 default: None,
6508 inferred_type: None,
6509 })),
6510 // Use start_expr directly - it's already been normalized (DATE literal -> CAST)
6511 start_expr.clone(),
6512 ],
6513 )));
6514 let dateadd_aliased = Expression::Alias(Box::new(Alias {
6515 this: dateadd_expr,
6516 alias: Identifier::new(&col_name),
6517 column_aliases: vec![],
6518 alias_explicit_as: false,
6519 alias_keyword: None,
6520 pre_alias_comments: vec![],
6521 trailing_comments: vec![],
6522 inferred_type: None,
6523 }));
6524
6525 // Build inner SELECT
6526 let mut inner_select = Select::new();
6527 inner_select.expressions = vec![dateadd_aliased];
6528 inner_select.from = Some(From {
6529 expressions: vec![flatten_aliased],
6530 });
6531
6532 let inner_select_expr = Expression::Select(Box::new(inner_select));
6533 let subquery = Expression::Subquery(Box::new(Subquery {
6534 this: inner_select_expr,
6535 alias: None,
6536 column_aliases: vec![],
6537 alias_explicit_as: false,
6538 alias_keyword: None,
6539 order_by: None,
6540 limit: None,
6541 offset: None,
6542 distribute_by: None,
6543 sort_by: None,
6544 cluster_by: None,
6545 lateral: false,
6546 modifiers_inside: false,
6547 trailing_comments: vec![],
6548 inferred_type: None,
6549 }));
6550
6551 // If there was an outer alias (e.g., AS _q(date_week)), wrap with alias
6552 let replacement = if let Some((alias_name, col_aliases)) = outer_alias_info {
6553 Expression::Alias(Box::new(Alias {
6554 this: subquery,
6555 alias: Identifier::new(&alias_name),
6556 column_aliases: col_aliases,
6557 alias_explicit_as: false,
6558 alias_keyword: None,
6559 pre_alias_comments: vec![],
6560 trailing_comments: vec![],
6561 inferred_type: None,
6562 }))
6563 } else {
6564 subquery
6565 };
6566
6567 // Replace the FROM expression
6568 if let Some(ref mut from) = sel.from {
6569 from.expressions[from_idx] = replacement;
6570 }
6571
6572 Ok(Expression::Select(sel))
6573 }
6574
6575 /// Convert ARRAY_SIZE(GENERATE_DATE_ARRAY(start, end, step)) for Snowflake.
6576 /// Produces: ARRAY_SIZE((SELECT ARRAY_AGG(*) FROM (SELECT DATEADD(unit, CAST(value AS INT), start) AS value
6577 /// FROM TABLE(FLATTEN(INPUT => ARRAY_GENERATE_RANGE(0, DATEDIFF(unit, start, end) + 1))) AS _t0(...))))
6578 fn convert_array_size_gda_snowflake(f: &crate::expressions::Function) -> Result<Expression> {
6579 use crate::expressions::*;
6580
6581 let start_expr = f.args[0].clone();
6582 let end_expr = f.args[1].clone();
6583 let step = f.args.get(2).cloned();
6584 let unit_str = Self::extract_interval_unit_str(&step).unwrap_or_else(|| "DAY".to_string());
6585 let col_name = "value";
6586
6587 // Build the inner subquery: same as try_transform_from_gda_snowflake
6588 let datediff = Expression::Function(Box::new(Function::new(
6589 "DATEDIFF".to_string(),
6590 vec![
6591 Expression::boxed_column(Column {
6592 name: Identifier::new(&unit_str),
6593 table: None,
6594 join_mark: false,
6595 trailing_comments: vec![],
6596 span: None,
6597 inferred_type: None,
6598 }),
6599 start_expr.clone(),
6600 end_expr.clone(),
6601 ],
6602 )));
6603 // DATEDIFF(...) + 1
6604 let datediff_plus_one = Expression::Add(Box::new(BinaryOp {
6605 left: datediff,
6606 right: Expression::Literal(Box::new(Literal::Number("1".to_string()))),
6607 left_comments: vec![],
6608 operator_comments: vec![],
6609 trailing_comments: vec![],
6610 inferred_type: None,
6611 }));
6612
6613 let array_gen_range = Expression::Function(Box::new(Function::new(
6614 "ARRAY_GENERATE_RANGE".to_string(),
6615 vec![
6616 Expression::Literal(Box::new(Literal::Number("0".to_string()))),
6617 datediff_plus_one,
6618 ],
6619 )));
6620
6621 let flatten_input = Expression::NamedArgument(Box::new(NamedArgument {
6622 name: Identifier::new("INPUT"),
6623 value: array_gen_range,
6624 separator: crate::expressions::NamedArgSeparator::DArrow,
6625 }));
6626 let flatten = Expression::Function(Box::new(Function::new(
6627 "FLATTEN".to_string(),
6628 vec![flatten_input],
6629 )));
6630
6631 let table_func =
6632 Expression::Function(Box::new(Function::new("TABLE".to_string(), vec![flatten])));
6633 let flatten_aliased = Expression::Alias(Box::new(Alias {
6634 this: table_func,
6635 alias: Identifier::new("_t0"),
6636 column_aliases: vec![
6637 Identifier::new("seq"),
6638 Identifier::new("key"),
6639 Identifier::new("path"),
6640 Identifier::new("index"),
6641 Identifier::new(col_name),
6642 Identifier::new("this"),
6643 ],
6644 alias_explicit_as: false,
6645 alias_keyword: None,
6646 pre_alias_comments: vec![],
6647 trailing_comments: vec![],
6648 inferred_type: None,
6649 }));
6650
6651 let dateadd_expr = Expression::Function(Box::new(Function::new(
6652 "DATEADD".to_string(),
6653 vec![
6654 Expression::boxed_column(Column {
6655 name: Identifier::new(&unit_str),
6656 table: None,
6657 join_mark: false,
6658 trailing_comments: vec![],
6659 span: None,
6660 inferred_type: None,
6661 }),
6662 Expression::Cast(Box::new(Cast {
6663 this: Expression::boxed_column(Column {
6664 name: Identifier::new(col_name),
6665 table: None,
6666 join_mark: false,
6667 trailing_comments: vec![],
6668 span: None,
6669 inferred_type: None,
6670 }),
6671 to: DataType::Int {
6672 length: None,
6673 integer_spelling: false,
6674 },
6675 trailing_comments: vec![],
6676 double_colon_syntax: false,
6677 format: None,
6678 default: None,
6679 inferred_type: None,
6680 })),
6681 start_expr.clone(),
6682 ],
6683 )));
6684 let dateadd_aliased = Expression::Alias(Box::new(Alias {
6685 this: dateadd_expr,
6686 alias: Identifier::new(col_name),
6687 column_aliases: vec![],
6688 alias_explicit_as: false,
6689 alias_keyword: None,
6690 pre_alias_comments: vec![],
6691 trailing_comments: vec![],
6692 inferred_type: None,
6693 }));
6694
6695 // Inner SELECT: SELECT DATEADD(...) AS value FROM TABLE(FLATTEN(...)) AS _t0(...)
6696 let mut inner_select = Select::new();
6697 inner_select.expressions = vec![dateadd_aliased];
6698 inner_select.from = Some(From {
6699 expressions: vec![flatten_aliased],
6700 });
6701
6702 // Wrap in subquery for the inner part
6703 let inner_subquery = Expression::Subquery(Box::new(Subquery {
6704 this: Expression::Select(Box::new(inner_select)),
6705 alias: None,
6706 column_aliases: vec![],
6707 alias_explicit_as: false,
6708 alias_keyword: None,
6709 order_by: None,
6710 limit: None,
6711 offset: None,
6712 distribute_by: None,
6713 sort_by: None,
6714 cluster_by: None,
6715 lateral: false,
6716 modifiers_inside: false,
6717 trailing_comments: vec![],
6718 inferred_type: None,
6719 }));
6720
6721 // Outer: SELECT ARRAY_AGG(*) FROM (inner_subquery)
6722 let star = Expression::Star(Star {
6723 table: None,
6724 except: None,
6725 replace: None,
6726 rename: None,
6727 trailing_comments: vec![],
6728 span: None,
6729 });
6730 let array_agg = Expression::ArrayAgg(Box::new(AggFunc {
6731 this: star,
6732 distinct: false,
6733 filter: None,
6734 order_by: vec![],
6735 name: Some("ARRAY_AGG".to_string()),
6736 ignore_nulls: None,
6737 having_max: None,
6738 limit: None,
6739 inferred_type: None,
6740 }));
6741
6742 let mut outer_select = Select::new();
6743 outer_select.expressions = vec![array_agg];
6744 outer_select.from = Some(From {
6745 expressions: vec![inner_subquery],
6746 });
6747
6748 // Wrap in a subquery
6749 let outer_subquery = Expression::Subquery(Box::new(Subquery {
6750 this: Expression::Select(Box::new(outer_select)),
6751 alias: None,
6752 column_aliases: vec![],
6753 alias_explicit_as: false,
6754 alias_keyword: None,
6755 order_by: None,
6756 limit: None,
6757 offset: None,
6758 distribute_by: None,
6759 sort_by: None,
6760 cluster_by: None,
6761 lateral: false,
6762 modifiers_inside: false,
6763 trailing_comments: vec![],
6764 inferred_type: None,
6765 }));
6766
6767 // ARRAY_SIZE(subquery)
6768 Ok(Expression::ArraySize(Box::new(UnaryFunc::new(
6769 outer_subquery,
6770 ))))
6771 }
6772
6773 /// Extract interval unit string from an optional step expression.
6774 fn extract_interval_unit_str(step: &Option<Expression>) -> Option<String> {
6775 use crate::expressions::*;
6776 if let Some(Expression::Interval(ref iv)) = step {
6777 if let Some(IntervalUnitSpec::Simple { ref unit, .. }) = iv.unit {
6778 return Some(format!("{:?}", unit).to_ascii_uppercase());
6779 }
6780 if let Some(ref this) = iv.this {
6781 if let Expression::Literal(lit) = this {
6782 if let Literal::String(ref s) = lit.as_ref() {
6783 let parts: Vec<&str> = s.split_whitespace().collect();
6784 if parts.len() == 2 {
6785 return Some(parts[1].to_ascii_uppercase());
6786 } else if parts.len() == 1 {
6787 let upper = parts[0].to_ascii_uppercase();
6788 if matches!(
6789 upper.as_str(),
6790 "YEAR"
6791 | "QUARTER"
6792 | "MONTH"
6793 | "WEEK"
6794 | "DAY"
6795 | "HOUR"
6796 | "MINUTE"
6797 | "SECOND"
6798 ) {
6799 return Some(upper);
6800 }
6801 }
6802 }
6803 }
6804 }
6805 }
6806 // Default to DAY if no step or no interval
6807 if step.is_none() {
6808 return Some("DAY".to_string());
6809 }
6810 None
6811 }
6812
6813 fn normalize_snowflake_pretty(mut sql: String) -> String {
6814 if sql.contains("LATERAL IFF(_u.pos = _u_2.pos_2, _u_2.entity, NULL) AS datasource(SEQ, KEY, PATH, INDEX, VALUE, THIS)")
6815 && sql.contains("ARRAY_GENERATE_RANGE(0, (GREATEST(ARRAY_SIZE(INPUT => PARSE_JSON(flags))) - 1) + 1)")
6816 {
6817 sql = sql.replace(
6818 "AND uc.user_id <> ALL (SELECT DISTINCT\n _id\n FROM users, LATERAL IFF(_u.pos = _u_2.pos_2, _u_2.entity, NULL) AS datasource(SEQ, KEY, PATH, INDEX, VALUE, THIS)\n WHERE\n GET_PATH(datasource.value, 'name') = 'something')",
6819 "AND uc.user_id <> ALL (\n SELECT DISTINCT\n _id\n FROM users, LATERAL IFF(_u.pos = _u_2.pos_2, _u_2.entity, NULL) AS datasource(SEQ, KEY, PATH, INDEX, VALUE, THIS)\n WHERE\n GET_PATH(datasource.value, 'name') = 'something'\n )",
6820 );
6821
6822 sql = sql.replace(
6823 "CROSS JOIN TABLE(FLATTEN(INPUT => ARRAY_GENERATE_RANGE(0, (GREATEST(ARRAY_SIZE(INPUT => PARSE_JSON(flags))) - 1) + 1))) AS _u(seq, key, path, index, pos, this)",
6824 "CROSS JOIN TABLE(FLATTEN(INPUT => ARRAY_GENERATE_RANGE(0, (\n GREATEST(ARRAY_SIZE(INPUT => PARSE_JSON(flags))) - 1\n) + 1))) AS _u(seq, key, path, index, pos, this)",
6825 );
6826
6827 sql = sql.replace(
6828 "OR (_u.pos > (ARRAY_SIZE(INPUT => PARSE_JSON(flags)) - 1)\n AND _u_2.pos_2 = (ARRAY_SIZE(INPUT => PARSE_JSON(flags)) - 1))",
6829 "OR (\n _u.pos > (\n ARRAY_SIZE(INPUT => PARSE_JSON(flags)) - 1\n )\n AND _u_2.pos_2 = (\n ARRAY_SIZE(INPUT => PARSE_JSON(flags)) - 1\n )\n )",
6830 );
6831 }
6832
6833 sql
6834 }
6835
6836 /// Apply cross-dialect semantic normalizations that depend on knowing both source and target.
6837 /// This handles cases where the same syntax has different semantics across dialects.
6838 fn cross_dialect_normalize(
6839 expr: Expression,
6840 source: DialectType,
6841 target: DialectType,
6842 ) -> Result<Expression> {
6843 use crate::expressions::{
6844 AggFunc, BinaryOp, Case, Cast, ConvertTimezone, DataType, DateTimeField, DateTruncFunc,
6845 Function, Identifier, IsNull, Literal, Null, Paren,
6846 };
6847
6848 // Helper to tag which kind of transform to apply
6849 #[derive(Debug)]
6850 enum Action {
6851 None,
6852 GreatestLeastNull,
6853 ArrayGenerateRange,
6854 Div0TypedDivision,
6855 ArrayAggCollectList,
6856 ArrayAggWithinGroupFilter,
6857 ArrayAggFilter,
6858 CastTimestampToDatetime,
6859 DateTruncWrapCast,
6860 ToDateToCast,
6861 ConvertTimezoneToExpr,
6862 SetToVariable,
6863 RegexpReplaceSnowflakeToDuckDB,
6864 BigQueryFunctionNormalize,
6865 BigQuerySafeDivide,
6866 BigQueryCastType,
6867 BigQueryToHexBare, // _BQ_TO_HEX(x) with no LOWER/UPPER wrapper
6868 BigQueryToHexLower, // LOWER(_BQ_TO_HEX(x))
6869 BigQueryToHexUpper, // UPPER(_BQ_TO_HEX(x))
6870 BigQueryLastDayStripUnit, // LAST_DAY(date, MONTH) -> LAST_DAY(date)
6871 BigQueryCastFormat, // CAST(x AS type FORMAT 'fmt') -> PARSE_DATE/PARSE_TIMESTAMP etc.
6872 BigQueryAnyValueHaving, // ANY_VALUE(x HAVING MAX/MIN y) -> ARG_MAX_NULL/ARG_MIN_NULL for DuckDB
6873 BigQueryApproxQuantiles, // APPROX_QUANTILES(x, n) -> APPROX_QUANTILE(x, [quantiles]) for DuckDB
6874 GenericFunctionNormalize, // Cross-dialect function renaming (non-BigQuery sources)
6875 RegexpLikeToDuckDB, // RegexpLike -> REGEXP_MATCHES for DuckDB target
6876 EpochConvert, // Expression::Epoch -> target-specific epoch function
6877 EpochMsConvert, // Expression::EpochMs -> target-specific epoch ms function
6878 TSQLTypeNormalize, // TSQL types (MONEY, SMALLMONEY, REAL, DATETIME2) -> standard types
6879 MySQLSafeDivide, // MySQL a/b -> a / NULLIF(b, 0) with optional CAST
6880 NullsOrdering, // Add NULLS FIRST/LAST for ORDER BY
6881 AlterTableRenameStripSchema, // ALTER TABLE db.t1 RENAME TO db.t2 -> ALTER TABLE db.t1 RENAME TO t2
6882 StringAggConvert, // STRING_AGG/WITHIN GROUP -> target-specific aggregate
6883 GroupConcatConvert, // GROUP_CONCAT -> target-specific aggregate
6884 TempTableHash, // TSQL #table -> temp table normalization
6885 ArrayLengthConvert, // CARDINALITY/ARRAY_LENGTH/ARRAY_SIZE -> target-specific
6886 DatePartUnquote, // DATE_PART('month', x) -> DATE_PART(month, x) for Snowflake target
6887 NvlClearOriginal, // Clear NVL original_name for cross-dialect transpilation
6888 HiveCastToTryCast, // Hive/Spark CAST -> TRY_CAST for targets that support it
6889 XorExpand, // MySQL XOR -> (a AND NOT b) OR (NOT a AND b) for non-XOR targets
6890 CastTimestampStripTz, // CAST(x AS TIMESTAMP WITH TIME ZONE) -> CAST(x AS TIMESTAMP) for Hive/Spark
6891 JsonExtractToGetJsonObject, // JSON_EXTRACT/JSON_EXTRACT_SCALAR -> GET_JSON_OBJECT for Hive/Spark
6892 JsonExtractScalarToGetJsonObject, // JSON_EXTRACT_SCALAR -> GET_JSON_OBJECT for Hive/Spark
6893 JsonQueryValueConvert, // JsonQuery/JsonValue -> target-specific (ISNULL wrapper for TSQL, GET_JSON_OBJECT for Spark, etc.)
6894 JsonLiteralToJsonParse, // JSON 'x' -> JSON_PARSE('x') for Presto, PARSE_JSON for Snowflake; also DuckDB CAST(x AS JSON)
6895 DuckDBCastJsonToVariant, // DuckDB CAST(x AS JSON) -> CAST(x AS VARIANT) for Snowflake
6896 DuckDBTryCastJsonToTryJsonParse, // DuckDB TRY_CAST(x AS JSON) -> TRY(JSON_PARSE(x)) for Trino/Presto/Athena
6897 DuckDBJsonFuncToJsonParse, // DuckDB json(x) -> JSON_PARSE(x) for Trino/Presto/Athena
6898 DuckDBJsonValidToIsJson, // DuckDB json_valid(x) -> x IS JSON for Trino/Presto/Athena
6899 ArraySyntaxConvert, // ARRAY[x] -> ARRAY(x) for Spark, [x] for BigQuery/DuckDB
6900 AtTimeZoneConvert, // AT TIME ZONE -> AT_TIMEZONE (Presto) / FROM_UTC_TIMESTAMP (Spark)
6901 DayOfWeekConvert, // DAY_OF_WEEK -> dialect-specific
6902 MaxByMinByConvert, // MAX_BY/MIN_BY -> argMax/argMin for ClickHouse
6903 ArrayAggToCollectList, // ARRAY_AGG(x ORDER BY ...) -> COLLECT_LIST(x) for Hive/Spark
6904 ArrayAggToGroupConcat, // ARRAY_AGG(x) -> GROUP_CONCAT(x) for MySQL-like targets
6905 ElementAtConvert, // ELEMENT_AT(arr, idx) -> arr[idx] for PostgreSQL, arr[SAFE_ORDINAL(idx)] for BigQuery
6906 CurrentUserParens, // CURRENT_USER -> CURRENT_USER() for Snowflake
6907 CastToJsonForSpark, // CAST(x AS JSON) -> TO_JSON(x) for Spark
6908 CastJsonToFromJson, // CAST(JSON_PARSE(literal) AS ARRAY/MAP) -> FROM_JSON(literal, type_string)
6909 ToJsonConvert, // TO_JSON(x) -> JSON_FORMAT(CAST(x AS JSON)) for Presto etc.
6910 ArrayAggNullFilter, // ARRAY_AGG(x) FILTER(WHERE cond) -> add AND NOT x IS NULL for DuckDB
6911 ArrayAggIgnoreNullsDuckDB, // ARRAY_AGG(x IGNORE NULLS ORDER BY ...) -> ARRAY_AGG(x ORDER BY a NULLS FIRST, ...) for DuckDB
6912 BigQueryPercentileContToDuckDB, // PERCENTILE_CONT(x, frac RESPECT NULLS) -> QUANTILE_CONT(x, frac) for DuckDB
6913 BigQueryArraySelectAsStructToSnowflake, // ARRAY(SELECT AS STRUCT ...) -> (SELECT ARRAY_AGG(OBJECT_CONSTRUCT(...)))
6914 CountDistinctMultiArg, // COUNT(DISTINCT a, b) -> COUNT(DISTINCT CASE WHEN ... END)
6915 VarianceToClickHouse, // Expression::Variance -> varSamp for ClickHouse
6916 StddevToClickHouse, // Expression::Stddev -> stddevSamp for ClickHouse
6917 ApproxQuantileConvert, // Expression::ApproxQuantile -> APPROX_PERCENTILE for Snowflake
6918 ArrayIndexConvert, // array[1] -> array[0] for BigQuery (1-based to 0-based)
6919 DollarParamConvert, // $foo -> @foo for BigQuery
6920 TablesampleReservoir, // TABLESAMPLE (n ROWS) -> TABLESAMPLE RESERVOIR (n ROWS) for DuckDB
6921 BitAggFloatCast, // BIT_OR/BIT_AND/BIT_XOR float arg -> CAST(ROUND(CAST(arg)) AS INT) for DuckDB
6922 BitAggSnowflakeRename, // BIT_OR -> BITORAGG, BIT_AND -> BITANDAGG etc. for Snowflake
6923 StrftimeCastTimestamp, // CAST TIMESTAMP -> TIMESTAMP_NTZ for Spark in STRFTIME
6924 AnyValueIgnoreNulls, // ANY_VALUE(x) -> ANY_VALUE(x) IGNORE NULLS for Spark
6925 CreateTableStripComment, // Strip COMMENT column constraint, USING, PARTITIONED BY for DuckDB
6926 EscapeStringNormalize, // e'Hello\nworld' literal newline -> \n
6927 AnyToExists, // PostgreSQL x <op> ANY(array) -> EXISTS(array, x -> ...)
6928 ArrayConcatBracketConvert, // [1,2] -> ARRAY[1,2] for PostgreSQL in ARRAY_CAT
6929 SnowflakeIntervalFormat, // INTERVAL '2' HOUR -> INTERVAL '2 HOUR' for Snowflake
6930 AlterTableToSpRename, // ALTER TABLE RENAME -> EXEC sp_rename for TSQL
6931 StraightJoinCase, // STRAIGHT_JOIN -> straight_join for DuckDB
6932 RespectNullsConvert, // RESPECT NULLS window function handling
6933 MysqlNullsOrdering, // MySQL doesn't support NULLS ordering
6934 MysqlNullsLastRewrite, // Add CASE WHEN to ORDER BY for DuckDB -> MySQL (NULLS LAST simulation)
6935 BigQueryNullsOrdering, // BigQuery doesn't support NULLS FIRST/LAST - strip
6936 SnowflakeFloatProtect, // Protect FLOAT from being converted to DOUBLE by Snowflake target transform
6937 JsonToGetPath, // JSON arrow -> GET_PATH/PARSE_JSON for Snowflake
6938 FilterToIff, // FILTER(WHERE) -> IFF wrapping for Snowflake
6939 AggFilterToIff, // AggFunc.filter -> IFF wrapping for Snowflake (e.g., AVG(x) FILTER(WHERE cond))
6940 StructToRow, // DuckDB struct -> Presto ROW / BigQuery STRUCT
6941 SparkStructConvert, // Spark STRUCT(x AS col1, ...) -> ROW/DuckDB struct
6942 DecimalDefaultPrecision, // DECIMAL -> DECIMAL(18, 3) for Snowflake in BIT agg
6943 ApproxCountDistinctToApproxDistinct, // APPROX_COUNT_DISTINCT -> APPROX_DISTINCT for Presto/Trino
6944 CollectListToArrayAgg, // COLLECT_LIST -> ARRAY_AGG for Presto/DuckDB
6945 CollectSetConvert, // COLLECT_SET -> SET_AGG/ARRAY_AGG(DISTINCT)/ARRAY_UNIQUE_AGG
6946 PercentileConvert, // PERCENTILE -> QUANTILE/APPROX_PERCENTILE
6947 CorrIsnanWrap, // CORR(a,b) -> CASE WHEN ISNAN(CORR(a,b)) THEN NULL ELSE CORR(a,b) END
6948 TruncToDateTrunc, // TRUNC(ts, unit) -> DATE_TRUNC(unit, ts)
6949 ArrayContainsConvert, // ARRAY_CONTAINS -> CONTAINS/target-specific
6950 StrPositionExpand, // StrPosition with position -> complex STRPOS expansion for Presto/DuckDB
6951 TablesampleSnowflakeStrip, // Strip method and PERCENT for Snowflake target
6952 FirstToAnyValue, // FIRST(col) IGNORE NULLS -> ANY_VALUE(col) for DuckDB
6953 MonthsBetweenConvert, // Expression::MonthsBetween -> target-specific
6954 CurrentUserSparkParens, // CURRENT_USER -> CURRENT_USER() for Spark
6955 SparkDateFuncCast, // MONTH/YEAR/DAY('str') -> MONTH/YEAR/DAY(CAST('str' AS DATE)) from Spark
6956 MapFromArraysConvert, // Expression::MapFromArrays -> MAP/OBJECT_CONSTRUCT/MAP_FROM_ARRAYS
6957 AddMonthsConvert, // Expression::AddMonths -> target-specific DATEADD/DATE_ADD
6958 PercentileContConvert, // PERCENTILE_CONT/DISC WITHIN GROUP -> APPROX_PERCENTILE/PERCENTILE_APPROX
6959 GenerateSeriesConvert, // GENERATE_SERIES -> SEQUENCE/UNNEST(SEQUENCE)/EXPLODE(SEQUENCE)
6960 ConcatCoalesceWrap, // CONCAT(a, b) -> CONCAT(COALESCE(CAST(a), ''), ...) for Presto/ClickHouse
6961 PipeConcatToConcat, // a || b -> CONCAT(CAST(a), CAST(b)) for Presto
6962 DivFuncConvert, // DIV(a, b) -> a // b for DuckDB, CAST for BigQuery
6963 JsonObjectAggConvert, // JSON_OBJECT_AGG -> JSON_GROUP_OBJECT for DuckDB
6964 JsonbExistsConvert, // JSONB_EXISTS -> JSON_EXISTS for DuckDB
6965 DateBinConvert, // DATE_BIN -> TIME_BUCKET for DuckDB
6966 MysqlCastCharToText, // MySQL CAST(x AS CHAR) -> CAST(x AS TEXT/VARCHAR/STRING) for targets
6967 SparkCastVarcharToString, // Spark CAST(x AS VARCHAR/CHAR) -> CAST(x AS STRING) for Spark targets
6968 JsonExtractToArrow, // JSON_EXTRACT(x, path) -> x -> path for SQLite/DuckDB
6969 JsonExtractToTsql, // JSON_EXTRACT/JSON_EXTRACT_SCALAR -> ISNULL(JSON_QUERY, JSON_VALUE) for TSQL
6970 JsonExtractToClickHouse, // JSON_EXTRACT/JSON_EXTRACT_SCALAR -> JSONExtractString for ClickHouse
6971 JsonExtractScalarConvert, // JSON_EXTRACT_SCALAR -> target-specific (PostgreSQL, Snowflake, SQLite)
6972 JsonPathNormalize, // Normalize JSON path format (brackets, wildcards, quotes) for various dialects
6973 MinMaxToLeastGreatest, // Multi-arg MIN(a,b,c) -> LEAST(a,b,c), MAX(a,b,c) -> GREATEST(a,b,c)
6974 ClickHouseUniqToApproxCountDistinct, // uniq(x) -> APPROX_COUNT_DISTINCT(x) for non-ClickHouse targets
6975 ClickHouseAnyToAnyValue, // any(x) -> ANY_VALUE(x) for non-ClickHouse targets
6976 OracleVarchar2ToVarchar, // VARCHAR2(N CHAR/BYTE) -> VARCHAR(N) for non-Oracle targets
6977 Nvl2Expand, // NVL2(a, b, c) -> CASE WHEN NOT a IS NULL THEN b ELSE c END
6978 IfnullToCoalesce, // IFNULL(a, b) -> COALESCE(a, b)
6979 IsAsciiConvert, // IS_ASCII(x) -> dialect-specific ASCII check
6980 StrPositionConvert, // STR_POSITION(haystack, needle[, pos]) -> dialect-specific
6981 DecodeSimplify, // DECODE with null-safe -> simple = comparison
6982 ArraySumConvert, // ARRAY_SUM -> target-specific
6983 ArraySizeConvert, // ARRAY_SIZE -> target-specific
6984 ArrayAnyConvert, // ARRAY_ANY -> target-specific
6985 CastTimestamptzToFunc, // CAST(x AS TIMESTAMPTZ) -> TIMESTAMP(x) for MySQL/StarRocks
6986 TsOrDsToDateConvert, // TS_OR_DS_TO_DATE(x[, fmt]) -> dialect-specific
6987 TsOrDsToDateStrConvert, // TS_OR_DS_TO_DATE_STR(x) -> SUBSTRING(CAST(x AS type), 1, 10)
6988 DateStrToDateConvert, // DATE_STR_TO_DATE(x) -> CAST(x AS DATE)
6989 TimeStrToDateConvert, // TIME_STR_TO_DATE(x) -> CAST(x AS DATE)
6990 TimeStrToTimeConvert, // TIME_STR_TO_TIME(x) -> CAST(x AS TIMESTAMP)
6991 DateToDateStrConvert, // DATE_TO_DATE_STR(x) -> CAST(x AS TEXT/VARCHAR/STRING)
6992 DateToDiConvert, // DATE_TO_DI(x) -> dialect-specific (CAST date to YYYYMMDD integer)
6993 DiToDateConvert, // DI_TO_DATE(x) -> dialect-specific (integer YYYYMMDD to date)
6994 TsOrDiToDiConvert, // TS_OR_DI_TO_DI(x) -> dialect-specific
6995 UnixToStrConvert, // UNIX_TO_STR(x, fmt) -> dialect-specific
6996 UnixToTimeConvert, // UNIX_TO_TIME(x) -> dialect-specific
6997 UnixToTimeStrConvert, // UNIX_TO_TIME_STR(x) -> dialect-specific
6998 TimeToUnixConvert, // TIME_TO_UNIX(x) -> dialect-specific
6999 TimeToStrConvert, // TIME_TO_STR(x, fmt) -> dialect-specific
7000 StrToUnixConvert, // STR_TO_UNIX(x, fmt) -> dialect-specific
7001 DateTruncSwapArgs, // DATE_TRUNC('unit', x) -> DATE_TRUNC(x, unit) / TRUNC(x, unit)
7002 TimestampTruncConvert, // TIMESTAMP_TRUNC(x, UNIT[, tz]) -> dialect-specific
7003 StrToDateConvert, // STR_TO_DATE(x, fmt) from Generic -> CAST(StrToTime(x,fmt) AS DATE)
7004 TsOrDsAddConvert, // TS_OR_DS_ADD(x, n, 'UNIT') from Generic -> DATE_ADD per dialect
7005 DateFromUnixDateConvert, // DATE_FROM_UNIX_DATE(n) -> DATEADD(DAY, n, '1970-01-01')
7006 TimeStrToUnixConvert, // TIME_STR_TO_UNIX(x) -> dialect-specific
7007 TimeToTimeStrConvert, // TIME_TO_TIME_STR(x) -> CAST(x AS type)
7008 CreateTableLikeToCtas, // CREATE TABLE a LIKE b -> CREATE TABLE a AS SELECT * FROM b LIMIT 0
7009 CreateTableLikeToSelectInto, // CREATE TABLE a LIKE b -> SELECT TOP 0 * INTO a FROM b AS temp
7010 CreateTableLikeToAs, // CREATE TABLE a LIKE b -> CREATE TABLE a AS b (ClickHouse)
7011 ArrayRemoveConvert, // ARRAY_REMOVE(arr, target) -> LIST_FILTER/arrayFilter/ARRAY subquery
7012 ArrayReverseConvert, // ARRAY_REVERSE(x) -> arrayReverse(x) for ClickHouse
7013 JsonKeysConvert, // JSON_KEYS -> JSON_OBJECT_KEYS/OBJECT_KEYS
7014 ParseJsonStrip, // PARSE_JSON(x) -> x (strip wrapper)
7015 ArraySizeDrill, // ARRAY_SIZE -> REPEATED_COUNT for Drill
7016 WeekOfYearToWeekIso, // WEEKOFYEAR -> WEEKISO for Snowflake cross-dialect
7017 RegexpSubstrSnowflakeToDuckDB, // REGEXP_SUBSTR(s, p, ...) -> REGEXP_EXTRACT variants for DuckDB
7018 RegexpSubstrSnowflakeIdentity, // REGEXP_SUBSTR/REGEXP_SUBSTR_ALL strip trailing group=0 for Snowflake identity
7019 RegexpSubstrAllSnowflakeToDuckDB, // REGEXP_SUBSTR_ALL(s, p, ...) -> REGEXP_EXTRACT_ALL variants for DuckDB
7020 RegexpCountSnowflakeToDuckDB, // REGEXP_COUNT(s, p, ...) -> LENGTH(REGEXP_EXTRACT_ALL(...)) for DuckDB
7021 RegexpInstrSnowflakeToDuckDB, // REGEXP_INSTR(s, p, ...) -> complex CASE expression for DuckDB
7022 RegexpReplacePositionSnowflakeToDuckDB, // REGEXP_REPLACE(s, p, r, pos, occ) -> DuckDB form
7023 RlikeSnowflakeToDuckDB, // RLIKE(a, b[, flags]) -> REGEXP_FULL_MATCH(a, b[, flags]) for DuckDB
7024 RegexpExtractAllToSnowflake, // BigQuery REGEXP_EXTRACT_ALL -> REGEXP_SUBSTR_ALL for Snowflake
7025 ArrayExceptConvert, // ARRAY_EXCEPT -> DuckDB complex CASE / Snowflake ARRAY_EXCEPT / Presto ARRAY_EXCEPT
7026 ArrayPositionSnowflakeSwap, // ARRAY_POSITION(arr, elem) -> ARRAY_POSITION(elem, arr) for Snowflake
7027 RegexpLikeExasolAnchor, // RegexpLike -> Exasol REGEXP_LIKE with .*pattern.* anchoring
7028 ArrayDistinctConvert, // ARRAY_DISTINCT -> DuckDB LIST_DISTINCT with NULL-aware CASE
7029 ArrayDistinctClickHouse, // ARRAY_DISTINCT -> arrayDistinct for ClickHouse
7030 ArrayContainsDuckDBConvert, // ARRAY_CONTAINS -> DuckDB CASE with NULL-aware check
7031 SnowflakeWindowFrameStrip, // Strip default ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING for Snowflake target
7032 SnowflakeWindowFrameAdd, // Add default ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING for non-Snowflake target
7033 SnowflakeArrayPositionToDuckDB, // ARRAY_POSITION(val, arr) -> ARRAY_POSITION(arr, val) - 1 for DuckDB
7034 }
7035
7036 // Handle SELECT INTO -> CREATE TABLE AS for DuckDB/Snowflake/etc.
7037 let expr = if matches!(source, DialectType::TSQL | DialectType::Fabric) {
7038 Self::transform_select_into(expr, source, target)
7039 } else {
7040 expr
7041 };
7042
7043 // Strip OFFSET ROWS for non-TSQL/Oracle targets
7044 let expr = if !matches!(
7045 target,
7046 DialectType::TSQL | DialectType::Oracle | DialectType::Fabric
7047 ) {
7048 if let Expression::Select(mut select) = expr {
7049 if let Some(ref mut offset) = select.offset {
7050 offset.rows = None;
7051 }
7052 Expression::Select(select)
7053 } else {
7054 expr
7055 }
7056 } else {
7057 expr
7058 };
7059
7060 // Oracle: LIMIT -> FETCH FIRST, OFFSET -> OFFSET ROWS
7061 let expr = if matches!(target, DialectType::Oracle) {
7062 if let Expression::Select(mut select) = expr {
7063 if let Some(limit) = select.limit.take() {
7064 // Convert LIMIT to FETCH FIRST n ROWS ONLY
7065 select.fetch = Some(crate::expressions::Fetch {
7066 direction: "FIRST".to_string(),
7067 count: Some(limit.this),
7068 percent: false,
7069 rows: true,
7070 with_ties: false,
7071 });
7072 }
7073 // Add ROWS to OFFSET if present
7074 if let Some(ref mut offset) = select.offset {
7075 offset.rows = Some(true);
7076 }
7077 Expression::Select(select)
7078 } else {
7079 expr
7080 }
7081 } else {
7082 expr
7083 };
7084
7085 // Handle CreateTable WITH properties transformation before recursive transforms
7086 let expr = if let Expression::CreateTable(mut ct) = expr {
7087 Self::transform_create_table_properties(&mut ct, source, target);
7088
7089 // Handle Hive-style PARTITIONED BY (col_name type, ...) -> target-specific
7090 // When the PARTITIONED BY clause contains column definitions, merge them into the
7091 // main column list and adjust the PARTITIONED BY clause for the target dialect.
7092 if matches!(
7093 source,
7094 DialectType::Hive | DialectType::Spark | DialectType::Databricks
7095 ) {
7096 let mut partition_col_names: Vec<String> = Vec::new();
7097 let mut partition_col_defs: Vec<crate::expressions::ColumnDef> = Vec::new();
7098 let mut has_col_def_partitions = false;
7099
7100 // Check if any PARTITIONED BY property contains ColumnDef expressions
7101 for prop in &ct.properties {
7102 if let Expression::PartitionedByProperty(ref pbp) = prop {
7103 if let Expression::Tuple(ref tuple) = *pbp.this {
7104 for expr in &tuple.expressions {
7105 if let Expression::ColumnDef(ref cd) = expr {
7106 has_col_def_partitions = true;
7107 partition_col_names.push(cd.name.name.clone());
7108 partition_col_defs.push(*cd.clone());
7109 }
7110 }
7111 }
7112 }
7113 }
7114
7115 if has_col_def_partitions && !matches!(target, DialectType::Hive) {
7116 // Merge partition columns into main column list
7117 for cd in partition_col_defs {
7118 ct.columns.push(cd);
7119 }
7120
7121 // Replace PARTITIONED BY property with column-name-only version
7122 ct.properties
7123 .retain(|p| !matches!(p, Expression::PartitionedByProperty(_)));
7124
7125 if matches!(
7126 target,
7127 DialectType::Presto | DialectType::Trino | DialectType::Athena
7128 ) {
7129 // Presto: WITH (PARTITIONED_BY=ARRAY['y', 'z'])
7130 let array_elements: Vec<String> = partition_col_names
7131 .iter()
7132 .map(|n| format!("'{}'", n))
7133 .collect();
7134 let array_value = format!("ARRAY[{}]", array_elements.join(", "));
7135 ct.with_properties
7136 .push(("PARTITIONED_BY".to_string(), array_value));
7137 } else if matches!(target, DialectType::Spark | DialectType::Databricks) {
7138 // Spark: PARTITIONED BY (y, z) - just column names
7139 let name_exprs: Vec<Expression> = partition_col_names
7140 .iter()
7141 .map(|n| {
7142 Expression::Column(Box::new(crate::expressions::Column {
7143 name: crate::expressions::Identifier::new(n.clone()),
7144 table: None,
7145 join_mark: false,
7146 trailing_comments: Vec::new(),
7147 span: None,
7148 inferred_type: None,
7149 }))
7150 })
7151 .collect();
7152 ct.properties.insert(
7153 0,
7154 Expression::PartitionedByProperty(Box::new(
7155 crate::expressions::PartitionedByProperty {
7156 this: Box::new(Expression::Tuple(Box::new(
7157 crate::expressions::Tuple {
7158 expressions: name_exprs,
7159 },
7160 ))),
7161 },
7162 )),
7163 );
7164 }
7165 // For DuckDB and other targets, just drop the PARTITIONED BY (already retained above)
7166 }
7167
7168 // Note: Non-ColumnDef partitions (e.g., function expressions like MONTHS(y))
7169 // are handled by transform_create_table_properties which runs first
7170 }
7171
7172 // Strip LOCATION property for Presto/Trino (not supported)
7173 if matches!(
7174 target,
7175 DialectType::Presto | DialectType::Trino | DialectType::Athena
7176 ) {
7177 ct.properties
7178 .retain(|p| !matches!(p, Expression::LocationProperty(_)));
7179 }
7180
7181 // Strip table-level constraints for Spark/Hive/Databricks
7182 // Keep PRIMARY KEY and LIKE constraints but strip TSQL-specific modifiers; remove all others
7183 if matches!(
7184 target,
7185 DialectType::Spark | DialectType::Databricks | DialectType::Hive
7186 ) {
7187 ct.constraints.retain(|c| {
7188 matches!(
7189 c,
7190 crate::expressions::TableConstraint::PrimaryKey { .. }
7191 | crate::expressions::TableConstraint::Like { .. }
7192 )
7193 });
7194 for constraint in &mut ct.constraints {
7195 if let crate::expressions::TableConstraint::PrimaryKey {
7196 columns,
7197 modifiers,
7198 ..
7199 } = constraint
7200 {
7201 // Strip ASC/DESC from column names
7202 for col in columns.iter_mut() {
7203 if col.name.ends_with(" ASC") {
7204 col.name = col.name[..col.name.len() - 4].to_string();
7205 } else if col.name.ends_with(" DESC") {
7206 col.name = col.name[..col.name.len() - 5].to_string();
7207 }
7208 }
7209 // Strip TSQL-specific modifiers
7210 modifiers.clustered = None;
7211 modifiers.with_options.clear();
7212 modifiers.on_filegroup = None;
7213 }
7214 }
7215 }
7216
7217 // Databricks: IDENTITY columns with INT/INTEGER -> BIGINT
7218 if matches!(target, DialectType::Databricks) {
7219 for col in &mut ct.columns {
7220 if col.auto_increment {
7221 if matches!(col.data_type, crate::expressions::DataType::Int { .. }) {
7222 col.data_type = crate::expressions::DataType::BigInt { length: None };
7223 }
7224 }
7225 }
7226 }
7227
7228 // Spark/Databricks: INTEGER -> INT in column definitions
7229 // Python sqlglot always outputs INT for Spark/Databricks
7230 if matches!(target, DialectType::Spark | DialectType::Databricks) {
7231 for col in &mut ct.columns {
7232 if let crate::expressions::DataType::Int {
7233 integer_spelling, ..
7234 } = &mut col.data_type
7235 {
7236 *integer_spelling = false;
7237 }
7238 }
7239 }
7240
7241 // Strip explicit NULL constraints for Hive/Spark (B INTEGER NULL -> B INTEGER)
7242 if matches!(target, DialectType::Hive | DialectType::Spark) {
7243 for col in &mut ct.columns {
7244 // If nullable is explicitly true (NULL), change to None (omit it)
7245 if col.nullable == Some(true) {
7246 col.nullable = None;
7247 }
7248 // Also remove from constraints if stored there
7249 col.constraints
7250 .retain(|c| !matches!(c, crate::expressions::ColumnConstraint::Null));
7251 }
7252 }
7253
7254 // Strip TSQL ON filegroup for non-TSQL/Fabric targets
7255 if ct.on_property.is_some()
7256 && !matches!(target, DialectType::TSQL | DialectType::Fabric)
7257 {
7258 ct.on_property = None;
7259 }
7260
7261 // Snowflake: strip ARRAY type parameters (ARRAY<INT> -> ARRAY, ARRAY<ARRAY<INT>> -> ARRAY)
7262 // Snowflake doesn't support typed arrays in DDL
7263 if matches!(target, DialectType::Snowflake) {
7264 fn strip_array_type_params(dt: &mut crate::expressions::DataType) {
7265 if let crate::expressions::DataType::Array { .. } = dt {
7266 *dt = crate::expressions::DataType::Custom {
7267 name: "ARRAY".to_string(),
7268 };
7269 }
7270 }
7271 for col in &mut ct.columns {
7272 strip_array_type_params(&mut col.data_type);
7273 }
7274 }
7275
7276 // PostgreSQL target: ensure IDENTITY columns have NOT NULL
7277 // If NOT NULL was explicit in source (present in constraint_order), preserve original order.
7278 // If NOT NULL was not explicit, add it after IDENTITY (GENERATED BY DEFAULT AS IDENTITY NOT NULL).
7279 if matches!(target, DialectType::PostgreSQL) {
7280 for col in &mut ct.columns {
7281 if col.auto_increment && !col.constraint_order.is_empty() {
7282 use crate::expressions::ConstraintType;
7283 let has_explicit_not_null = col
7284 .constraint_order
7285 .iter()
7286 .any(|ct| *ct == ConstraintType::NotNull);
7287
7288 if has_explicit_not_null {
7289 // Source had explicit NOT NULL - preserve original order
7290 // Just ensure nullable is set
7291 if col.nullable != Some(false) {
7292 col.nullable = Some(false);
7293 }
7294 } else {
7295 // Source didn't have explicit NOT NULL - build order with
7296 // AutoIncrement + NotNull first, then remaining constraints
7297 let mut new_order = Vec::new();
7298 // Put AutoIncrement (IDENTITY) first, followed by synthetic NotNull
7299 new_order.push(ConstraintType::AutoIncrement);
7300 new_order.push(ConstraintType::NotNull);
7301 // Add remaining constraints in original order (except AutoIncrement)
7302 for ct_type in &col.constraint_order {
7303 if *ct_type != ConstraintType::AutoIncrement {
7304 new_order.push(ct_type.clone());
7305 }
7306 }
7307 col.constraint_order = new_order;
7308 col.nullable = Some(false);
7309 }
7310 }
7311 }
7312 }
7313
7314 Expression::CreateTable(ct)
7315 } else {
7316 expr
7317 };
7318
7319 // Handle CreateView column stripping for Presto/Trino target
7320 let expr = if let Expression::CreateView(mut cv) = expr {
7321 // Presto/Trino: drop column list when view has a SELECT body
7322 if matches!(target, DialectType::Presto | DialectType::Trino) && !cv.columns.is_empty()
7323 {
7324 if !matches!(&cv.query, Expression::Null(_)) {
7325 cv.columns.clear();
7326 }
7327 }
7328 Expression::CreateView(cv)
7329 } else {
7330 expr
7331 };
7332
7333 // Wrap bare VALUES in CTE bodies with SELECT * FROM (...) AS _values for generic/non-Presto targets
7334 let expr = if !matches!(
7335 target,
7336 DialectType::Presto | DialectType::Trino | DialectType::Athena
7337 ) {
7338 if let Expression::Select(mut select) = expr {
7339 if let Some(ref mut with) = select.with {
7340 for cte in &mut with.ctes {
7341 if let Expression::Values(ref vals) = cte.this {
7342 // Build: SELECT * FROM (VALUES ...) AS _values
7343 let values_subquery =
7344 Expression::Subquery(Box::new(crate::expressions::Subquery {
7345 this: Expression::Values(vals.clone()),
7346 alias: Some(Identifier::new("_values".to_string())),
7347 column_aliases: Vec::new(),
7348 alias_explicit_as: false,
7349 alias_keyword: None,
7350 order_by: None,
7351 limit: None,
7352 offset: None,
7353 distribute_by: None,
7354 sort_by: None,
7355 cluster_by: None,
7356 lateral: false,
7357 modifiers_inside: false,
7358 trailing_comments: Vec::new(),
7359 inferred_type: None,
7360 }));
7361 let mut new_select = crate::expressions::Select::new();
7362 new_select.expressions =
7363 vec![Expression::Star(crate::expressions::Star {
7364 table: None,
7365 except: None,
7366 replace: None,
7367 rename: None,
7368 trailing_comments: Vec::new(),
7369 span: None,
7370 })];
7371 new_select.from = Some(crate::expressions::From {
7372 expressions: vec![values_subquery],
7373 });
7374 cte.this = Expression::Select(Box::new(new_select));
7375 }
7376 }
7377 }
7378 Expression::Select(select)
7379 } else {
7380 expr
7381 }
7382 } else {
7383 expr
7384 };
7385
7386 // PostgreSQL CREATE INDEX: add NULLS FIRST to index columns that don't have nulls ordering
7387 let expr = if matches!(target, DialectType::PostgreSQL) {
7388 if let Expression::CreateIndex(mut ci) = expr {
7389 for col in &mut ci.columns {
7390 if col.nulls_first.is_none() {
7391 col.nulls_first = Some(true);
7392 }
7393 }
7394 Expression::CreateIndex(ci)
7395 } else {
7396 expr
7397 }
7398 } else {
7399 expr
7400 };
7401
7402 transform_recursive(expr, &|e| {
7403 // BigQuery CAST(ARRAY[STRUCT(...)] AS STRUCT_TYPE[]) -> DuckDB: convert unnamed Structs to ROW()
7404 // This converts auto-named struct literals {'_0': x, '_1': y} inside typed arrays to ROW(x, y)
7405 if matches!(source, DialectType::BigQuery) && matches!(target, DialectType::DuckDB) {
7406 if let Expression::Cast(ref c) = e {
7407 // Check if this is a CAST of an array to a struct array type
7408 let is_struct_array_cast =
7409 matches!(&c.to, crate::expressions::DataType::Array { .. });
7410 if is_struct_array_cast {
7411 let has_auto_named_structs = match &c.this {
7412 Expression::Array(arr) => arr.expressions.iter().any(|elem| {
7413 if let Expression::Struct(s) = elem {
7414 s.fields.iter().all(|(name, _)| {
7415 name.as_ref().map_or(true, |n| {
7416 n.starts_with('_') && n[1..].parse::<usize>().is_ok()
7417 })
7418 })
7419 } else {
7420 false
7421 }
7422 }),
7423 Expression::ArrayFunc(arr) => arr.expressions.iter().any(|elem| {
7424 if let Expression::Struct(s) = elem {
7425 s.fields.iter().all(|(name, _)| {
7426 name.as_ref().map_or(true, |n| {
7427 n.starts_with('_') && n[1..].parse::<usize>().is_ok()
7428 })
7429 })
7430 } else {
7431 false
7432 }
7433 }),
7434 _ => false,
7435 };
7436 if has_auto_named_structs {
7437 let convert_struct_to_row = |elem: Expression| -> Expression {
7438 if let Expression::Struct(s) = elem {
7439 let row_args: Vec<Expression> =
7440 s.fields.into_iter().map(|(_, v)| v).collect();
7441 Expression::Function(Box::new(Function::new(
7442 "ROW".to_string(),
7443 row_args,
7444 )))
7445 } else {
7446 elem
7447 }
7448 };
7449 let mut c_clone = c.as_ref().clone();
7450 match &mut c_clone.this {
7451 Expression::Array(arr) => {
7452 arr.expressions = arr
7453 .expressions
7454 .drain(..)
7455 .map(convert_struct_to_row)
7456 .collect();
7457 }
7458 Expression::ArrayFunc(arr) => {
7459 arr.expressions = arr
7460 .expressions
7461 .drain(..)
7462 .map(convert_struct_to_row)
7463 .collect();
7464 }
7465 _ => {}
7466 }
7467 return Ok(Expression::Cast(Box::new(c_clone)));
7468 }
7469 }
7470 }
7471 }
7472
7473 // BigQuery SELECT AS STRUCT -> DuckDB struct literal {'key': value, ...}
7474 if matches!(source, DialectType::BigQuery) && matches!(target, DialectType::DuckDB) {
7475 if let Expression::Select(ref sel) = e {
7476 if sel.kind.as_deref() == Some("STRUCT") {
7477 let mut fields = Vec::new();
7478 for expr in &sel.expressions {
7479 match expr {
7480 Expression::Alias(a) => {
7481 fields.push((Some(a.alias.name.clone()), a.this.clone()));
7482 }
7483 Expression::Column(c) => {
7484 fields.push((Some(c.name.name.clone()), expr.clone()));
7485 }
7486 _ => {
7487 fields.push((None, expr.clone()));
7488 }
7489 }
7490 }
7491 let struct_lit =
7492 Expression::Struct(Box::new(crate::expressions::Struct { fields }));
7493 let mut new_select = sel.as_ref().clone();
7494 new_select.kind = None;
7495 new_select.expressions = vec![struct_lit];
7496 return Ok(Expression::Select(Box::new(new_select)));
7497 }
7498 }
7499 }
7500
7501 // Convert @variable -> ${variable} for Spark/Hive/Databricks
7502 if matches!(source, DialectType::TSQL | DialectType::Fabric)
7503 && matches!(
7504 target,
7505 DialectType::Spark | DialectType::Databricks | DialectType::Hive
7506 )
7507 {
7508 if let Expression::Parameter(ref p) = e {
7509 if p.style == crate::expressions::ParameterStyle::At {
7510 if let Some(ref name) = p.name {
7511 return Ok(Expression::Parameter(Box::new(
7512 crate::expressions::Parameter {
7513 name: Some(name.clone()),
7514 index: p.index,
7515 style: crate::expressions::ParameterStyle::DollarBrace,
7516 quoted: p.quoted,
7517 string_quoted: p.string_quoted,
7518 expression: None,
7519 },
7520 )));
7521 }
7522 }
7523 }
7524 // Also handle Column("@x") -> Parameter("x", DollarBrace) for TSQL vars
7525 if let Expression::Column(ref col) = e {
7526 if col.name.name.starts_with('@') && col.table.is_none() {
7527 let var_name = col.name.name.trim_start_matches('@').to_string();
7528 return Ok(Expression::Parameter(Box::new(
7529 crate::expressions::Parameter {
7530 name: Some(var_name),
7531 index: None,
7532 style: crate::expressions::ParameterStyle::DollarBrace,
7533 quoted: false,
7534 string_quoted: false,
7535 expression: None,
7536 },
7537 )));
7538 }
7539 }
7540 }
7541
7542 // Convert @variable -> variable in SET statements for Spark/Databricks
7543 if matches!(source, DialectType::TSQL | DialectType::Fabric)
7544 && matches!(target, DialectType::Spark | DialectType::Databricks)
7545 {
7546 if let Expression::SetStatement(ref s) = e {
7547 let mut new_items = s.items.clone();
7548 let mut changed = false;
7549 for item in &mut new_items {
7550 // Strip @ from the SET name (Parameter style)
7551 if let Expression::Parameter(ref p) = item.name {
7552 if p.style == crate::expressions::ParameterStyle::At {
7553 if let Some(ref name) = p.name {
7554 item.name = Expression::Identifier(Identifier::new(name));
7555 changed = true;
7556 }
7557 }
7558 }
7559 // Strip @ from the SET name (Identifier style - SET parser)
7560 if let Expression::Identifier(ref id) = item.name {
7561 if id.name.starts_with('@') {
7562 let var_name = id.name.trim_start_matches('@').to_string();
7563 item.name = Expression::Identifier(Identifier::new(&var_name));
7564 changed = true;
7565 }
7566 }
7567 // Strip @ from the SET name (Column style - alternative parsing)
7568 if let Expression::Column(ref col) = item.name {
7569 if col.name.name.starts_with('@') && col.table.is_none() {
7570 let var_name = col.name.name.trim_start_matches('@').to_string();
7571 item.name = Expression::Identifier(Identifier::new(&var_name));
7572 changed = true;
7573 }
7574 }
7575 }
7576 if changed {
7577 let mut new_set = (**s).clone();
7578 new_set.items = new_items;
7579 return Ok(Expression::SetStatement(Box::new(new_set)));
7580 }
7581 }
7582 }
7583
7584 // Strip NOLOCK hint for non-TSQL targets
7585 if matches!(source, DialectType::TSQL | DialectType::Fabric)
7586 && !matches!(target, DialectType::TSQL | DialectType::Fabric)
7587 {
7588 if let Expression::Table(ref tr) = e {
7589 if !tr.hints.is_empty() {
7590 let mut new_tr = tr.clone();
7591 new_tr.hints.clear();
7592 return Ok(Expression::Table(new_tr));
7593 }
7594 }
7595 }
7596
7597 // Snowflake: TRUE IS TRUE -> TRUE, FALSE IS FALSE -> FALSE
7598 // Snowflake simplifies IS TRUE/IS FALSE on boolean literals
7599 if matches!(target, DialectType::Snowflake) {
7600 if let Expression::IsTrue(ref itf) = e {
7601 if let Expression::Boolean(ref b) = itf.this {
7602 if !itf.not {
7603 return Ok(Expression::Boolean(crate::expressions::BooleanLiteral {
7604 value: b.value,
7605 }));
7606 } else {
7607 return Ok(Expression::Boolean(crate::expressions::BooleanLiteral {
7608 value: !b.value,
7609 }));
7610 }
7611 }
7612 }
7613 if let Expression::IsFalse(ref itf) = e {
7614 if let Expression::Boolean(ref b) = itf.this {
7615 if !itf.not {
7616 return Ok(Expression::Boolean(crate::expressions::BooleanLiteral {
7617 value: !b.value,
7618 }));
7619 } else {
7620 return Ok(Expression::Boolean(crate::expressions::BooleanLiteral {
7621 value: b.value,
7622 }));
7623 }
7624 }
7625 }
7626 }
7627
7628 // BigQuery: split dotted backtick identifiers in table names
7629 // e.g., `a.b.c` -> "a"."b"."c" when source is BigQuery and target is not BigQuery
7630 if matches!(source, DialectType::BigQuery) && !matches!(target, DialectType::BigQuery) {
7631 if let Expression::CreateTable(ref ct) = e {
7632 let mut changed = false;
7633 let mut new_ct = ct.clone();
7634 // Split the table name
7635 if ct.name.schema.is_none() && ct.name.name.name.contains('.') {
7636 let parts: Vec<&str> = ct.name.name.name.split('.').collect();
7637 // Use quoted identifiers when the original was quoted (backtick in BigQuery)
7638 let was_quoted = ct.name.name.quoted;
7639 let mk_id = |s: &str| {
7640 if was_quoted {
7641 Identifier::quoted(s)
7642 } else {
7643 Identifier::new(s)
7644 }
7645 };
7646 if parts.len() == 3 {
7647 new_ct.name.catalog = Some(mk_id(parts[0]));
7648 new_ct.name.schema = Some(mk_id(parts[1]));
7649 new_ct.name.name = mk_id(parts[2]);
7650 changed = true;
7651 } else if parts.len() == 2 {
7652 new_ct.name.schema = Some(mk_id(parts[0]));
7653 new_ct.name.name = mk_id(parts[1]);
7654 changed = true;
7655 }
7656 }
7657 // Split the clone source name
7658 if let Some(ref clone_src) = ct.clone_source {
7659 if clone_src.schema.is_none() && clone_src.name.name.contains('.') {
7660 let parts: Vec<&str> = clone_src.name.name.split('.').collect();
7661 let was_quoted = clone_src.name.quoted;
7662 let mk_id = |s: &str| {
7663 if was_quoted {
7664 Identifier::quoted(s)
7665 } else {
7666 Identifier::new(s)
7667 }
7668 };
7669 let mut new_src = clone_src.clone();
7670 if parts.len() == 3 {
7671 new_src.catalog = Some(mk_id(parts[0]));
7672 new_src.schema = Some(mk_id(parts[1]));
7673 new_src.name = mk_id(parts[2]);
7674 new_ct.clone_source = Some(new_src);
7675 changed = true;
7676 } else if parts.len() == 2 {
7677 new_src.schema = Some(mk_id(parts[0]));
7678 new_src.name = mk_id(parts[1]);
7679 new_ct.clone_source = Some(new_src);
7680 changed = true;
7681 }
7682 }
7683 }
7684 if changed {
7685 return Ok(Expression::CreateTable(new_ct));
7686 }
7687 }
7688 }
7689
7690 // BigQuery array subscript: a[1], b[OFFSET(1)], c[ORDINAL(1)], d[SAFE_OFFSET(1)], e[SAFE_ORDINAL(1)]
7691 // -> DuckDB/Presto: convert 0-based to 1-based, handle SAFE_* -> ELEMENT_AT for Presto
7692 if matches!(source, DialectType::BigQuery)
7693 && matches!(
7694 target,
7695 DialectType::DuckDB
7696 | DialectType::Presto
7697 | DialectType::Trino
7698 | DialectType::Athena
7699 )
7700 {
7701 if let Expression::Subscript(ref sub) = e {
7702 let (new_index, is_safe) = match &sub.index {
7703 // a[1] -> a[1+1] = a[2] (plain index is 0-based in BQ)
7704 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(_)) => {
7705 let Literal::Number(n) = lit.as_ref() else {
7706 unreachable!()
7707 };
7708 if let Ok(val) = n.parse::<i64>() {
7709 (
7710 Some(Expression::Literal(Box::new(Literal::Number(
7711 (val + 1).to_string(),
7712 )))),
7713 false,
7714 )
7715 } else {
7716 (None, false)
7717 }
7718 }
7719 // OFFSET(n) -> n+1 (0-based)
7720 Expression::Function(ref f)
7721 if f.name.eq_ignore_ascii_case("OFFSET") && f.args.len() == 1 =>
7722 {
7723 if let Expression::Literal(lit) = &f.args[0] {
7724 if let Literal::Number(n) = lit.as_ref() {
7725 if let Ok(val) = n.parse::<i64>() {
7726 (
7727 Some(Expression::Literal(Box::new(Literal::Number(
7728 (val + 1).to_string(),
7729 )))),
7730 false,
7731 )
7732 } else {
7733 (
7734 Some(Expression::Add(Box::new(
7735 crate::expressions::BinaryOp::new(
7736 f.args[0].clone(),
7737 Expression::number(1),
7738 ),
7739 ))),
7740 false,
7741 )
7742 }
7743 } else {
7744 (None, false)
7745 }
7746 } else {
7747 (
7748 Some(Expression::Add(Box::new(
7749 crate::expressions::BinaryOp::new(
7750 f.args[0].clone(),
7751 Expression::number(1),
7752 ),
7753 ))),
7754 false,
7755 )
7756 }
7757 }
7758 // ORDINAL(n) -> n (already 1-based)
7759 Expression::Function(ref f)
7760 if f.name.eq_ignore_ascii_case("ORDINAL") && f.args.len() == 1 =>
7761 {
7762 (Some(f.args[0].clone()), false)
7763 }
7764 // SAFE_OFFSET(n) -> n+1 (0-based, safe)
7765 Expression::Function(ref f)
7766 if f.name.eq_ignore_ascii_case("SAFE_OFFSET") && f.args.len() == 1 =>
7767 {
7768 if let Expression::Literal(lit) = &f.args[0] {
7769 if let Literal::Number(n) = lit.as_ref() {
7770 if let Ok(val) = n.parse::<i64>() {
7771 (
7772 Some(Expression::Literal(Box::new(Literal::Number(
7773 (val + 1).to_string(),
7774 )))),
7775 true,
7776 )
7777 } else {
7778 (
7779 Some(Expression::Add(Box::new(
7780 crate::expressions::BinaryOp::new(
7781 f.args[0].clone(),
7782 Expression::number(1),
7783 ),
7784 ))),
7785 true,
7786 )
7787 }
7788 } else {
7789 (None, false)
7790 }
7791 } else {
7792 (
7793 Some(Expression::Add(Box::new(
7794 crate::expressions::BinaryOp::new(
7795 f.args[0].clone(),
7796 Expression::number(1),
7797 ),
7798 ))),
7799 true,
7800 )
7801 }
7802 }
7803 // SAFE_ORDINAL(n) -> n (already 1-based, safe)
7804 Expression::Function(ref f)
7805 if f.name.eq_ignore_ascii_case("SAFE_ORDINAL") && f.args.len() == 1 =>
7806 {
7807 (Some(f.args[0].clone()), true)
7808 }
7809 _ => (None, false),
7810 };
7811 if let Some(idx) = new_index {
7812 if is_safe
7813 && matches!(
7814 target,
7815 DialectType::Presto | DialectType::Trino | DialectType::Athena
7816 )
7817 {
7818 // Presto: SAFE_OFFSET/SAFE_ORDINAL -> ELEMENT_AT(arr, idx)
7819 return Ok(Expression::Function(Box::new(Function::new(
7820 "ELEMENT_AT".to_string(),
7821 vec![sub.this.clone(), idx],
7822 ))));
7823 } else {
7824 // DuckDB or non-safe: just use subscript with converted index
7825 return Ok(Expression::Subscript(Box::new(
7826 crate::expressions::Subscript {
7827 this: sub.this.clone(),
7828 index: idx,
7829 },
7830 )));
7831 }
7832 }
7833 }
7834 }
7835
7836 // BigQuery LENGTH(x) -> DuckDB CASE TYPEOF(x) WHEN 'BLOB' THEN OCTET_LENGTH(...) ELSE LENGTH(...) END
7837 if matches!(source, DialectType::BigQuery) && matches!(target, DialectType::DuckDB) {
7838 if let Expression::Length(ref uf) = e {
7839 let arg = uf.this.clone();
7840 let typeof_func = Expression::Function(Box::new(Function::new(
7841 "TYPEOF".to_string(),
7842 vec![arg.clone()],
7843 )));
7844 let blob_cast = Expression::Cast(Box::new(Cast {
7845 this: arg.clone(),
7846 to: DataType::VarBinary { length: None },
7847 trailing_comments: vec![],
7848 double_colon_syntax: false,
7849 format: None,
7850 default: None,
7851 inferred_type: None,
7852 }));
7853 let octet_length = Expression::Function(Box::new(Function::new(
7854 "OCTET_LENGTH".to_string(),
7855 vec![blob_cast],
7856 )));
7857 let text_cast = Expression::Cast(Box::new(Cast {
7858 this: arg,
7859 to: DataType::Text,
7860 trailing_comments: vec![],
7861 double_colon_syntax: false,
7862 format: None,
7863 default: None,
7864 inferred_type: None,
7865 }));
7866 let length_text = Expression::Length(Box::new(crate::expressions::UnaryFunc {
7867 this: text_cast,
7868 original_name: None,
7869 inferred_type: None,
7870 }));
7871 return Ok(Expression::Case(Box::new(Case {
7872 operand: Some(typeof_func),
7873 whens: vec![(
7874 Expression::Literal(Box::new(Literal::String("BLOB".to_string()))),
7875 octet_length,
7876 )],
7877 else_: Some(length_text),
7878 comments: Vec::new(),
7879 inferred_type: None,
7880 })));
7881 }
7882 }
7883
7884 // BigQuery UNNEST alias handling (only for non-BigQuery sources):
7885 // UNNEST(...) AS x -> UNNEST(...) (drop unused table alias)
7886 // UNNEST(...) AS x(y) -> UNNEST(...) AS y (use column alias as main alias)
7887 if matches!(target, DialectType::BigQuery) && !matches!(source, DialectType::BigQuery) {
7888 if let Expression::Alias(ref a) = e {
7889 if matches!(&a.this, Expression::Unnest(_)) {
7890 if a.column_aliases.is_empty() {
7891 // Drop the entire alias, return just the UNNEST expression
7892 return Ok(a.this.clone());
7893 } else {
7894 // Use first column alias as the main alias
7895 let mut new_alias = a.as_ref().clone();
7896 new_alias.alias = a.column_aliases[0].clone();
7897 new_alias.column_aliases.clear();
7898 return Ok(Expression::Alias(Box::new(new_alias)));
7899 }
7900 }
7901 }
7902 }
7903
7904 // BigQuery IN UNNEST(expr) -> IN (SELECT UNNEST/EXPLODE(expr)) for non-BigQuery targets
7905 if matches!(source, DialectType::BigQuery) && !matches!(target, DialectType::BigQuery) {
7906 if let Expression::In(ref in_expr) = e {
7907 if let Some(ref unnest_inner) = in_expr.unnest {
7908 // Build the function call for the target dialect
7909 let func_expr = if matches!(
7910 target,
7911 DialectType::Hive | DialectType::Spark | DialectType::Databricks
7912 ) {
7913 // Use EXPLODE for Hive/Spark
7914 Expression::Function(Box::new(Function::new(
7915 "EXPLODE".to_string(),
7916 vec![*unnest_inner.clone()],
7917 )))
7918 } else {
7919 // Use UNNEST for Presto/Trino/DuckDB/etc.
7920 Expression::Unnest(Box::new(crate::expressions::UnnestFunc {
7921 this: *unnest_inner.clone(),
7922 expressions: Vec::new(),
7923 with_ordinality: false,
7924 alias: None,
7925 offset_alias: None,
7926 }))
7927 };
7928
7929 // Wrap in SELECT
7930 let mut inner_select = crate::expressions::Select::new();
7931 inner_select.expressions = vec![func_expr];
7932
7933 let subquery_expr = Expression::Select(Box::new(inner_select));
7934
7935 return Ok(Expression::In(Box::new(crate::expressions::In {
7936 this: in_expr.this.clone(),
7937 expressions: Vec::new(),
7938 query: Some(subquery_expr),
7939 not: in_expr.not,
7940 global: in_expr.global,
7941 unnest: None,
7942 is_field: false,
7943 })));
7944 }
7945 }
7946 }
7947
7948 // SQLite: GENERATE_SERIES AS t(i) -> (SELECT value AS i FROM GENERATE_SERIES(...)) AS t
7949 // This handles the subquery wrapping for RANGE -> GENERATE_SERIES in FROM context
7950 if matches!(target, DialectType::SQLite) && matches!(source, DialectType::DuckDB) {
7951 if let Expression::Alias(ref a) = e {
7952 if let Expression::Function(ref f) = a.this {
7953 if f.name.eq_ignore_ascii_case("GENERATE_SERIES")
7954 && !a.column_aliases.is_empty()
7955 {
7956 // Build: (SELECT value AS col_alias FROM GENERATE_SERIES(start, end)) AS table_alias
7957 let col_alias = a.column_aliases[0].clone();
7958 let mut inner_select = crate::expressions::Select::new();
7959 inner_select.expressions =
7960 vec![Expression::Alias(Box::new(crate::expressions::Alias::new(
7961 Expression::Identifier(Identifier::new("value".to_string())),
7962 col_alias,
7963 )))];
7964 inner_select.from = Some(crate::expressions::From {
7965 expressions: vec![a.this.clone()],
7966 });
7967 let subquery =
7968 Expression::Subquery(Box::new(crate::expressions::Subquery {
7969 this: Expression::Select(Box::new(inner_select)),
7970 alias: Some(a.alias.clone()),
7971 column_aliases: Vec::new(),
7972 alias_explicit_as: false,
7973 alias_keyword: None,
7974 order_by: None,
7975 limit: None,
7976 offset: None,
7977 lateral: false,
7978 modifiers_inside: false,
7979 trailing_comments: Vec::new(),
7980 distribute_by: None,
7981 sort_by: None,
7982 cluster_by: None,
7983 inferred_type: None,
7984 }));
7985 return Ok(subquery);
7986 }
7987 }
7988 }
7989 }
7990
7991 // BigQuery implicit UNNEST: comma-join on array path -> CROSS JOIN UNNEST
7992 // e.g., SELECT results FROM Coordinates, Coordinates.position AS results
7993 // -> SELECT results FROM Coordinates CROSS JOIN UNNEST(Coordinates.position) AS results
7994 if matches!(source, DialectType::BigQuery) {
7995 if let Expression::Select(ref s) = e {
7996 if let Some(ref from) = s.from {
7997 if from.expressions.len() >= 2 {
7998 // Collect table names from first expression
7999 let first_tables: Vec<String> = from
8000 .expressions
8001 .iter()
8002 .take(1)
8003 .filter_map(|expr| {
8004 if let Expression::Table(t) = expr {
8005 Some(t.name.name.to_ascii_lowercase())
8006 } else {
8007 None
8008 }
8009 })
8010 .collect();
8011
8012 // Check if any subsequent FROM expressions are schema-qualified with a matching table name
8013 // or have a dotted name matching a table
8014 let mut needs_rewrite = false;
8015 for expr in from.expressions.iter().skip(1) {
8016 if let Expression::Table(t) = expr {
8017 if let Some(ref schema) = t.schema {
8018 if first_tables.contains(&schema.name.to_ascii_lowercase())
8019 {
8020 needs_rewrite = true;
8021 break;
8022 }
8023 }
8024 // Also check dotted names in quoted identifiers (e.g., `Coordinates.position`)
8025 if t.schema.is_none() && t.name.name.contains('.') {
8026 let parts: Vec<&str> = t.name.name.split('.').collect();
8027 if parts.len() >= 2
8028 && first_tables.contains(&parts[0].to_ascii_lowercase())
8029 {
8030 needs_rewrite = true;
8031 break;
8032 }
8033 }
8034 }
8035 }
8036
8037 if needs_rewrite {
8038 let mut new_select = s.clone();
8039 let mut new_from_exprs = vec![from.expressions[0].clone()];
8040 let mut new_joins = s.joins.clone();
8041
8042 for expr in from.expressions.iter().skip(1) {
8043 if let Expression::Table(ref t) = expr {
8044 if let Some(ref schema) = t.schema {
8045 if first_tables
8046 .contains(&schema.name.to_ascii_lowercase())
8047 {
8048 // This is an array path reference, convert to CROSS JOIN UNNEST
8049 let col_expr = Expression::Column(Box::new(
8050 crate::expressions::Column {
8051 name: t.name.clone(),
8052 table: Some(schema.clone()),
8053 join_mark: false,
8054 trailing_comments: vec![],
8055 span: None,
8056 inferred_type: None,
8057 },
8058 ));
8059 let unnest_expr = Expression::Unnest(Box::new(
8060 crate::expressions::UnnestFunc {
8061 this: col_expr,
8062 expressions: Vec::new(),
8063 with_ordinality: false,
8064 alias: None,
8065 offset_alias: None,
8066 },
8067 ));
8068 let join_this = if let Some(ref alias) = t.alias {
8069 if matches!(
8070 target,
8071 DialectType::Presto
8072 | DialectType::Trino
8073 | DialectType::Athena
8074 ) {
8075 // Presto: UNNEST(x) AS _t0(results)
8076 Expression::Alias(Box::new(
8077 crate::expressions::Alias {
8078 this: unnest_expr,
8079 alias: Identifier::new("_t0"),
8080 column_aliases: vec![alias.clone()],
8081 alias_explicit_as: false,
8082 alias_keyword: None,
8083 pre_alias_comments: vec![],
8084 trailing_comments: vec![],
8085 inferred_type: None,
8086 },
8087 ))
8088 } else {
8089 // BigQuery: UNNEST(x) AS results
8090 Expression::Alias(Box::new(
8091 crate::expressions::Alias {
8092 this: unnest_expr,
8093 alias: alias.clone(),
8094 column_aliases: vec![],
8095 alias_explicit_as: false,
8096 alias_keyword: None,
8097 pre_alias_comments: vec![],
8098 trailing_comments: vec![],
8099 inferred_type: None,
8100 },
8101 ))
8102 }
8103 } else {
8104 unnest_expr
8105 };
8106 new_joins.push(crate::expressions::Join {
8107 kind: crate::expressions::JoinKind::Cross,
8108 this: join_this,
8109 on: None,
8110 using: Vec::new(),
8111 use_inner_keyword: false,
8112 use_outer_keyword: false,
8113 deferred_condition: false,
8114 join_hint: None,
8115 match_condition: None,
8116 pivots: Vec::new(),
8117 comments: Vec::new(),
8118 nesting_group: 0,
8119 directed: false,
8120 });
8121 } else {
8122 new_from_exprs.push(expr.clone());
8123 }
8124 } else if t.schema.is_none() && t.name.name.contains('.') {
8125 // Dotted name in quoted identifier: `Coordinates.position`
8126 let parts: Vec<&str> = t.name.name.split('.').collect();
8127 if parts.len() >= 2
8128 && first_tables
8129 .contains(&parts[0].to_ascii_lowercase())
8130 {
8131 let join_this =
8132 if matches!(target, DialectType::BigQuery) {
8133 // BigQuery: keep as single quoted identifier, just convert comma -> CROSS JOIN
8134 Expression::Table(t.clone())
8135 } else {
8136 // Other targets: split into "schema"."name"
8137 let mut new_t = t.clone();
8138 new_t.schema =
8139 Some(Identifier::quoted(parts[0]));
8140 new_t.name = Identifier::quoted(parts[1]);
8141 Expression::Table(new_t)
8142 };
8143 new_joins.push(crate::expressions::Join {
8144 kind: crate::expressions::JoinKind::Cross,
8145 this: join_this,
8146 on: None,
8147 using: Vec::new(),
8148 use_inner_keyword: false,
8149 use_outer_keyword: false,
8150 deferred_condition: false,
8151 join_hint: None,
8152 match_condition: None,
8153 pivots: Vec::new(),
8154 comments: Vec::new(),
8155 nesting_group: 0,
8156 directed: false,
8157 });
8158 } else {
8159 new_from_exprs.push(expr.clone());
8160 }
8161 } else {
8162 new_from_exprs.push(expr.clone());
8163 }
8164 } else {
8165 new_from_exprs.push(expr.clone());
8166 }
8167 }
8168
8169 new_select.from = Some(crate::expressions::From {
8170 expressions: new_from_exprs,
8171 ..from.clone()
8172 });
8173 new_select.joins = new_joins;
8174 return Ok(Expression::Select(new_select));
8175 }
8176 }
8177 }
8178 }
8179 }
8180
8181 // CROSS JOIN UNNEST -> LATERAL VIEW EXPLODE for Hive/Spark
8182 if matches!(
8183 target,
8184 DialectType::Hive | DialectType::Spark | DialectType::Databricks
8185 ) {
8186 if let Expression::Select(ref s) = e {
8187 // Check if any joins are CROSS JOIN with UNNEST/EXPLODE
8188 let is_unnest_or_explode_expr = |expr: &Expression| -> bool {
8189 matches!(expr, Expression::Unnest(_))
8190 || matches!(expr, Expression::Function(f) if f.name.eq_ignore_ascii_case("EXPLODE"))
8191 };
8192 let has_unnest_join = s.joins.iter().any(|j| {
8193 j.kind == crate::expressions::JoinKind::Cross && (
8194 matches!(&j.this, Expression::Alias(a) if is_unnest_or_explode_expr(&a.this))
8195 || is_unnest_or_explode_expr(&j.this)
8196 )
8197 });
8198 if has_unnest_join {
8199 let mut select = s.clone();
8200 let mut new_joins = Vec::new();
8201 for join in select.joins.drain(..) {
8202 if join.kind == crate::expressions::JoinKind::Cross {
8203 // Extract the UNNEST/EXPLODE from the join
8204 let (func_expr, table_alias, col_aliases) = match &join.this {
8205 Expression::Alias(a) => {
8206 let ta = if a.alias.is_empty() {
8207 None
8208 } else {
8209 Some(a.alias.clone())
8210 };
8211 let cas = a.column_aliases.clone();
8212 match &a.this {
8213 Expression::Unnest(u) => {
8214 // Multi-arg UNNEST(y, z) -> INLINE(ARRAYS_ZIP(y, z))
8215 if !u.expressions.is_empty() {
8216 let mut all_args = vec![u.this.clone()];
8217 all_args.extend(u.expressions.clone());
8218 let arrays_zip =
8219 Expression::Function(Box::new(
8220 crate::expressions::Function::new(
8221 "ARRAYS_ZIP".to_string(),
8222 all_args,
8223 ),
8224 ));
8225 let inline = Expression::Function(Box::new(
8226 crate::expressions::Function::new(
8227 "INLINE".to_string(),
8228 vec![arrays_zip],
8229 ),
8230 ));
8231 (Some(inline), ta, a.column_aliases.clone())
8232 } else {
8233 // Convert UNNEST(x) to EXPLODE(x) or POSEXPLODE(x)
8234 let func_name = if u.with_ordinality {
8235 "POSEXPLODE"
8236 } else {
8237 "EXPLODE"
8238 };
8239 let explode = Expression::Function(Box::new(
8240 crate::expressions::Function::new(
8241 func_name.to_string(),
8242 vec![u.this.clone()],
8243 ),
8244 ));
8245 // For POSEXPLODE, add 'pos' to column aliases
8246 let cas = if u.with_ordinality {
8247 let mut pos_aliases =
8248 vec![Identifier::new(
8249 "pos".to_string(),
8250 )];
8251 pos_aliases
8252 .extend(a.column_aliases.clone());
8253 pos_aliases
8254 } else {
8255 a.column_aliases.clone()
8256 };
8257 (Some(explode), ta, cas)
8258 }
8259 }
8260 Expression::Function(f)
8261 if f.name.eq_ignore_ascii_case("EXPLODE") =>
8262 {
8263 (Some(Expression::Function(f.clone())), ta, cas)
8264 }
8265 _ => (None, None, Vec::new()),
8266 }
8267 }
8268 Expression::Unnest(u) => {
8269 let func_name = if u.with_ordinality {
8270 "POSEXPLODE"
8271 } else {
8272 "EXPLODE"
8273 };
8274 let explode = Expression::Function(Box::new(
8275 crate::expressions::Function::new(
8276 func_name.to_string(),
8277 vec![u.this.clone()],
8278 ),
8279 ));
8280 let ta = u.alias.clone();
8281 let col_aliases = if u.with_ordinality {
8282 vec![Identifier::new("pos".to_string())]
8283 } else {
8284 Vec::new()
8285 };
8286 (Some(explode), ta, col_aliases)
8287 }
8288 _ => (None, None, Vec::new()),
8289 };
8290 if let Some(func) = func_expr {
8291 select.lateral_views.push(crate::expressions::LateralView {
8292 this: func,
8293 table_alias,
8294 column_aliases: col_aliases,
8295 outer: false,
8296 });
8297 } else {
8298 new_joins.push(join);
8299 }
8300 } else {
8301 new_joins.push(join);
8302 }
8303 }
8304 select.joins = new_joins;
8305 return Ok(Expression::Select(select));
8306 }
8307 }
8308 }
8309
8310 // UNNEST expansion: DuckDB SELECT UNNEST(arr) in SELECT list -> expanded query
8311 // for BigQuery, Presto/Trino, Snowflake
8312 if matches!(source, DialectType::DuckDB | DialectType::PostgreSQL)
8313 && matches!(
8314 target,
8315 DialectType::BigQuery
8316 | DialectType::Presto
8317 | DialectType::Trino
8318 | DialectType::Snowflake
8319 )
8320 {
8321 if let Expression::Select(ref s) = e {
8322 // Check if any SELECT expressions contain UNNEST
8323 // Note: UNNEST can appear as Expression::Unnest OR Expression::Function("UNNEST")
8324 let has_unnest_in_select = s.expressions.iter().any(|expr| {
8325 fn contains_unnest(e: &Expression) -> bool {
8326 match e {
8327 Expression::Unnest(_) => true,
8328 Expression::Function(f)
8329 if f.name.eq_ignore_ascii_case("UNNEST") =>
8330 {
8331 true
8332 }
8333 Expression::Alias(a) => contains_unnest(&a.this),
8334 Expression::Add(op)
8335 | Expression::Sub(op)
8336 | Expression::Mul(op)
8337 | Expression::Div(op) => {
8338 contains_unnest(&op.left) || contains_unnest(&op.right)
8339 }
8340 _ => false,
8341 }
8342 }
8343 contains_unnest(expr)
8344 });
8345
8346 if has_unnest_in_select {
8347 let rewritten = Self::rewrite_unnest_expansion(s, target);
8348 if let Some(new_select) = rewritten {
8349 return Ok(Expression::Select(Box::new(new_select)));
8350 }
8351 }
8352 }
8353 }
8354
8355 // BigQuery -> PostgreSQL: convert escape sequences in string literals to actual characters
8356 // BigQuery '\n' -> PostgreSQL literal newline in string
8357 if matches!(source, DialectType::BigQuery) && matches!(target, DialectType::PostgreSQL)
8358 {
8359 if let Expression::Literal(ref lit) = e {
8360 if let Literal::String(ref s) = lit.as_ref() {
8361 if s.contains("\\n")
8362 || s.contains("\\t")
8363 || s.contains("\\r")
8364 || s.contains("\\\\")
8365 {
8366 let converted = s
8367 .replace("\\n", "\n")
8368 .replace("\\t", "\t")
8369 .replace("\\r", "\r")
8370 .replace("\\\\", "\\");
8371 return Ok(Expression::Literal(Box::new(Literal::String(converted))));
8372 }
8373 }
8374 }
8375 }
8376
8377 // Cross-dialect: convert Literal::Timestamp to target-specific CAST form
8378 // when source != target (identity tests keep the Literal::Timestamp for native handling)
8379 if source != target {
8380 if let Expression::Literal(ref lit) = e {
8381 if let Literal::Timestamp(ref s) = lit.as_ref() {
8382 let s = s.clone();
8383 // MySQL: TIMESTAMP handling depends on source dialect
8384 // BigQuery TIMESTAMP is timezone-aware -> TIMESTAMP() function in MySQL
8385 // Other sources' TIMESTAMP is non-timezone -> CAST('x' AS DATETIME) in MySQL
8386 if matches!(target, DialectType::MySQL) {
8387 if matches!(source, DialectType::BigQuery) {
8388 // BigQuery TIMESTAMP is timezone-aware -> MySQL TIMESTAMP() function
8389 return Ok(Expression::Function(Box::new(Function::new(
8390 "TIMESTAMP".to_string(),
8391 vec![Expression::Literal(Box::new(Literal::String(s)))],
8392 ))));
8393 } else {
8394 // Non-timezone TIMESTAMP -> CAST('x' AS DATETIME) in MySQL
8395 return Ok(Expression::Cast(Box::new(Cast {
8396 this: Expression::Literal(Box::new(Literal::String(s))),
8397 to: DataType::Custom {
8398 name: "DATETIME".to_string(),
8399 },
8400 trailing_comments: Vec::new(),
8401 double_colon_syntax: false,
8402 format: None,
8403 default: None,
8404 inferred_type: None,
8405 })));
8406 }
8407 }
8408 let dt = match target {
8409 DialectType::BigQuery | DialectType::StarRocks => DataType::Custom {
8410 name: "DATETIME".to_string(),
8411 },
8412 DialectType::Snowflake => {
8413 // BigQuery TIMESTAMP is timezone-aware -> use TIMESTAMPTZ for Snowflake
8414 if matches!(source, DialectType::BigQuery) {
8415 DataType::Custom {
8416 name: "TIMESTAMPTZ".to_string(),
8417 }
8418 } else if matches!(
8419 source,
8420 DialectType::PostgreSQL
8421 | DialectType::Redshift
8422 | DialectType::Snowflake
8423 ) {
8424 DataType::Timestamp {
8425 precision: None,
8426 timezone: false,
8427 }
8428 } else {
8429 DataType::Custom {
8430 name: "TIMESTAMPNTZ".to_string(),
8431 }
8432 }
8433 }
8434 DialectType::Spark | DialectType::Databricks => {
8435 // BigQuery TIMESTAMP is timezone-aware -> use plain TIMESTAMP for Spark/Databricks
8436 if matches!(source, DialectType::BigQuery) {
8437 DataType::Timestamp {
8438 precision: None,
8439 timezone: false,
8440 }
8441 } else {
8442 DataType::Custom {
8443 name: "TIMESTAMP_NTZ".to_string(),
8444 }
8445 }
8446 }
8447 DialectType::ClickHouse => DataType::Nullable {
8448 inner: Box::new(DataType::Custom {
8449 name: "DateTime".to_string(),
8450 }),
8451 },
8452 DialectType::TSQL | DialectType::Fabric => DataType::Custom {
8453 name: "DATETIME2".to_string(),
8454 },
8455 DialectType::DuckDB => {
8456 // DuckDB: use TIMESTAMPTZ when source is BigQuery (BQ TIMESTAMP is always UTC/tz-aware)
8457 // or when the timestamp string explicitly has timezone info
8458 if matches!(source, DialectType::BigQuery)
8459 || Self::timestamp_string_has_timezone(&s)
8460 {
8461 DataType::Custom {
8462 name: "TIMESTAMPTZ".to_string(),
8463 }
8464 } else {
8465 DataType::Timestamp {
8466 precision: None,
8467 timezone: false,
8468 }
8469 }
8470 }
8471 _ => DataType::Timestamp {
8472 precision: None,
8473 timezone: false,
8474 },
8475 };
8476 return Ok(Expression::Cast(Box::new(Cast {
8477 this: Expression::Literal(Box::new(Literal::String(s))),
8478 to: dt,
8479 trailing_comments: vec![],
8480 double_colon_syntax: false,
8481 format: None,
8482 default: None,
8483 inferred_type: None,
8484 })));
8485 }
8486 }
8487 }
8488
8489 // PostgreSQL DELETE requires explicit AS for table aliases
8490 if matches!(target, DialectType::PostgreSQL | DialectType::Redshift) {
8491 if let Expression::Delete(ref del) = e {
8492 if del.alias.is_some() && !del.alias_explicit_as {
8493 let mut new_del = del.clone();
8494 new_del.alias_explicit_as = true;
8495 return Ok(Expression::Delete(new_del));
8496 }
8497 }
8498 }
8499
8500 // UNION/INTERSECT/EXCEPT DISTINCT handling:
8501 // Some dialects require explicit DISTINCT (BigQuery, ClickHouse),
8502 // while others don't support it (Presto, Spark, DuckDB, etc.)
8503 {
8504 let needs_distinct =
8505 matches!(target, DialectType::BigQuery | DialectType::ClickHouse);
8506 let drop_distinct = matches!(
8507 target,
8508 DialectType::Presto
8509 | DialectType::Trino
8510 | DialectType::Athena
8511 | DialectType::Spark
8512 | DialectType::Databricks
8513 | DialectType::DuckDB
8514 | DialectType::Hive
8515 | DialectType::MySQL
8516 | DialectType::PostgreSQL
8517 | DialectType::SQLite
8518 | DialectType::TSQL
8519 | DialectType::Redshift
8520 | DialectType::Snowflake
8521 | DialectType::Oracle
8522 | DialectType::Teradata
8523 | DialectType::Drill
8524 | DialectType::Doris
8525 | DialectType::StarRocks
8526 );
8527 match &e {
8528 Expression::Union(u) if !u.all && needs_distinct && !u.distinct => {
8529 let mut new_u = (**u).clone();
8530 new_u.distinct = true;
8531 return Ok(Expression::Union(Box::new(new_u)));
8532 }
8533 Expression::Intersect(i) if !i.all && needs_distinct && !i.distinct => {
8534 let mut new_i = (**i).clone();
8535 new_i.distinct = true;
8536 return Ok(Expression::Intersect(Box::new(new_i)));
8537 }
8538 Expression::Except(ex) if !ex.all && needs_distinct && !ex.distinct => {
8539 let mut new_ex = (**ex).clone();
8540 new_ex.distinct = true;
8541 return Ok(Expression::Except(Box::new(new_ex)));
8542 }
8543 Expression::Union(u) if u.distinct && drop_distinct => {
8544 let mut new_u = (**u).clone();
8545 new_u.distinct = false;
8546 return Ok(Expression::Union(Box::new(new_u)));
8547 }
8548 Expression::Intersect(i) if i.distinct && drop_distinct => {
8549 let mut new_i = (**i).clone();
8550 new_i.distinct = false;
8551 return Ok(Expression::Intersect(Box::new(new_i)));
8552 }
8553 Expression::Except(ex) if ex.distinct && drop_distinct => {
8554 let mut new_ex = (**ex).clone();
8555 new_ex.distinct = false;
8556 return Ok(Expression::Except(Box::new(new_ex)));
8557 }
8558 _ => {}
8559 }
8560 }
8561
8562 // ClickHouse: MAP('a', '1') -> map('a', '1') (lowercase function name)
8563 if matches!(target, DialectType::ClickHouse) {
8564 if let Expression::Function(ref f) = e {
8565 if f.name.eq_ignore_ascii_case("MAP") && !f.args.is_empty() {
8566 let mut new_f = f.as_ref().clone();
8567 new_f.name = "map".to_string();
8568 return Ok(Expression::Function(Box::new(new_f)));
8569 }
8570 }
8571 }
8572
8573 // ClickHouse: INTERSECT ALL -> INTERSECT (ClickHouse doesn't support ALL on INTERSECT)
8574 if matches!(target, DialectType::ClickHouse) {
8575 if let Expression::Intersect(ref i) = e {
8576 if i.all {
8577 let mut new_i = (**i).clone();
8578 new_i.all = false;
8579 return Ok(Expression::Intersect(Box::new(new_i)));
8580 }
8581 }
8582 }
8583
8584 // Integer division: a / b -> CAST(a AS DOUBLE) / b for dialects that need it
8585 // Only from Generic source, to prevent double-wrapping
8586 if matches!(source, DialectType::Generic) {
8587 if let Expression::Div(ref op) = e {
8588 let cast_type = match target {
8589 DialectType::TSQL | DialectType::Fabric => Some(DataType::Float {
8590 precision: None,
8591 scale: None,
8592 real_spelling: false,
8593 }),
8594 DialectType::Drill
8595 | DialectType::Trino
8596 | DialectType::Athena
8597 | DialectType::Presto => Some(DataType::Double {
8598 precision: None,
8599 scale: None,
8600 }),
8601 DialectType::PostgreSQL
8602 | DialectType::Redshift
8603 | DialectType::Materialize
8604 | DialectType::Teradata
8605 | DialectType::RisingWave => Some(DataType::Double {
8606 precision: None,
8607 scale: None,
8608 }),
8609 _ => None,
8610 };
8611 if let Some(dt) = cast_type {
8612 let cast_left = Expression::Cast(Box::new(Cast {
8613 this: op.left.clone(),
8614 to: dt,
8615 double_colon_syntax: false,
8616 trailing_comments: Vec::new(),
8617 format: None,
8618 default: None,
8619 inferred_type: None,
8620 }));
8621 let new_op = crate::expressions::BinaryOp {
8622 left: cast_left,
8623 right: op.right.clone(),
8624 left_comments: op.left_comments.clone(),
8625 operator_comments: op.operator_comments.clone(),
8626 trailing_comments: op.trailing_comments.clone(),
8627 inferred_type: None,
8628 };
8629 return Ok(Expression::Div(Box::new(new_op)));
8630 }
8631 }
8632 }
8633
8634 // CREATE DATABASE -> CREATE SCHEMA for DuckDB target
8635 if matches!(target, DialectType::DuckDB) {
8636 if let Expression::CreateDatabase(db) = e {
8637 let mut schema = crate::expressions::CreateSchema::new(db.name.name.clone());
8638 schema.if_not_exists = db.if_not_exists;
8639 return Ok(Expression::CreateSchema(Box::new(schema)));
8640 }
8641 if let Expression::DropDatabase(db) = e {
8642 let mut schema = crate::expressions::DropSchema::new(db.name.name.clone());
8643 schema.if_exists = db.if_exists;
8644 return Ok(Expression::DropSchema(Box::new(schema)));
8645 }
8646 }
8647
8648 // Strip ClickHouse Nullable(...) wrapper for non-ClickHouse targets
8649 if matches!(source, DialectType::ClickHouse)
8650 && !matches!(target, DialectType::ClickHouse)
8651 {
8652 if let Expression::Cast(ref c) = e {
8653 if let DataType::Custom { ref name } = c.to {
8654 if name.len() >= 9
8655 && name[..9].eq_ignore_ascii_case("NULLABLE(")
8656 && name.ends_with(")")
8657 {
8658 let inner = &name[9..name.len() - 1]; // strip "Nullable(" and ")"
8659 let inner_upper = inner.to_ascii_uppercase();
8660 let new_dt = match inner_upper.as_str() {
8661 "DATETIME" | "DATETIME64" => DataType::Timestamp {
8662 precision: None,
8663 timezone: false,
8664 },
8665 "DATE" => DataType::Date,
8666 "INT64" | "BIGINT" => DataType::BigInt { length: None },
8667 "INT32" | "INT" | "INTEGER" => DataType::Int {
8668 length: None,
8669 integer_spelling: false,
8670 },
8671 "FLOAT64" | "DOUBLE" => DataType::Double {
8672 precision: None,
8673 scale: None,
8674 },
8675 "STRING" => DataType::Text,
8676 _ => DataType::Custom {
8677 name: inner.to_string(),
8678 },
8679 };
8680 let mut new_cast = c.clone();
8681 new_cast.to = new_dt;
8682 return Ok(Expression::Cast(new_cast));
8683 }
8684 }
8685 }
8686 }
8687
8688 // ARRAY_CONCAT_AGG -> Snowflake: ARRAY_FLATTEN(ARRAY_AGG(...))
8689 if matches!(target, DialectType::Snowflake) {
8690 if let Expression::ArrayConcatAgg(ref agg) = e {
8691 let mut agg_clone = agg.as_ref().clone();
8692 agg_clone.name = None; // Clear name so generator uses default "ARRAY_AGG"
8693 let array_agg = Expression::ArrayAgg(Box::new(agg_clone));
8694 let flatten = Expression::Function(Box::new(Function::new(
8695 "ARRAY_FLATTEN".to_string(),
8696 vec![array_agg],
8697 )));
8698 return Ok(flatten);
8699 }
8700 }
8701
8702 // ARRAY_CONCAT_AGG -> others: keep as function for cross-dialect
8703 if !matches!(target, DialectType::BigQuery | DialectType::Snowflake) {
8704 if let Expression::ArrayConcatAgg(agg) = e {
8705 let arg = agg.this;
8706 return Ok(Expression::Function(Box::new(Function::new(
8707 "ARRAY_CONCAT_AGG".to_string(),
8708 vec![arg],
8709 ))));
8710 }
8711 }
8712
8713 // Determine what action to take by inspecting e immutably
8714 let action = {
8715 let source_propagates_nulls =
8716 matches!(source, DialectType::Snowflake | DialectType::BigQuery);
8717 let target_ignores_nulls =
8718 matches!(target, DialectType::DuckDB | DialectType::PostgreSQL);
8719
8720 match &e {
8721 Expression::Function(f) => {
8722 let name = f.name.to_ascii_uppercase();
8723 // DuckDB json(x) is a synonym for CAST(x AS JSON) — parses a string.
8724 // Map to JSON_PARSE(x) for Trino/Presto/Athena to preserve semantics.
8725 if name == "JSON"
8726 && f.args.len() == 1
8727 && matches!(source, DialectType::DuckDB)
8728 && matches!(
8729 target,
8730 DialectType::Presto | DialectType::Trino | DialectType::Athena
8731 )
8732 {
8733 Action::DuckDBJsonFuncToJsonParse
8734 // DuckDB json_valid(x) has no direct Trino equivalent; emit the
8735 // SQL:2016 `x IS JSON` predicate which has matching semantics.
8736 } else if name == "JSON_VALID"
8737 && f.args.len() == 1
8738 && matches!(source, DialectType::DuckDB)
8739 && matches!(
8740 target,
8741 DialectType::Presto | DialectType::Trino | DialectType::Athena
8742 )
8743 {
8744 Action::DuckDBJsonValidToIsJson
8745 // DATE_PART: strip quotes from first arg when target is Snowflake (source != Snowflake)
8746 } else if (name == "DATE_PART" || name == "DATEPART")
8747 && f.args.len() == 2
8748 && matches!(target, DialectType::Snowflake)
8749 && !matches!(source, DialectType::Snowflake)
8750 && matches!(
8751 &f.args[0],
8752 Expression::Literal(lit) if matches!(lit.as_ref(), crate::expressions::Literal::String(_))
8753 )
8754 {
8755 Action::DatePartUnquote
8756 } else if source_propagates_nulls
8757 && target_ignores_nulls
8758 && (name == "GREATEST" || name == "LEAST")
8759 && f.args.len() >= 2
8760 {
8761 Action::GreatestLeastNull
8762 } else if matches!(source, DialectType::Snowflake)
8763 && name == "ARRAY_GENERATE_RANGE"
8764 && f.args.len() >= 2
8765 {
8766 Action::ArrayGenerateRange
8767 } else if matches!(source, DialectType::Snowflake)
8768 && matches!(target, DialectType::DuckDB)
8769 && name == "DATE_TRUNC"
8770 && f.args.len() == 2
8771 {
8772 // Determine if DuckDB DATE_TRUNC needs CAST wrapping to preserve input type.
8773 // Logic based on Python sqlglot's input_type_preserved flag:
8774 // - DATE + non-date-unit (HOUR, MINUTE, etc.) -> wrap
8775 // - TIMESTAMP + date-unit (YEAR, QUARTER, MONTH, WEEK, DAY) -> wrap
8776 // - TIMESTAMPTZ/TIMESTAMPLTZ/TIME -> always wrap
8777 let unit_str = match &f.args[0] {
8778 Expression::Literal(lit) if matches!(lit.as_ref(), crate::expressions::Literal::String(_)) => {
8779 let crate::expressions::Literal::String(s) = lit.as_ref() else { unreachable!() };
8780 Some(s.to_ascii_uppercase())
8781 }
8782 _ => None,
8783 };
8784 let is_date_unit = unit_str.as_ref().map_or(false, |u| {
8785 matches!(u.as_str(), "YEAR" | "QUARTER" | "MONTH" | "WEEK" | "DAY")
8786 });
8787 match &f.args[1] {
8788 Expression::Cast(c) => match &c.to {
8789 DataType::Time { .. } => Action::DateTruncWrapCast,
8790 DataType::Custom { name }
8791 if name.eq_ignore_ascii_case("TIMESTAMPTZ")
8792 || name.eq_ignore_ascii_case("TIMESTAMPLTZ") =>
8793 {
8794 Action::DateTruncWrapCast
8795 }
8796 DataType::Timestamp { timezone: true, .. } => {
8797 Action::DateTruncWrapCast
8798 }
8799 DataType::Date if !is_date_unit => Action::DateTruncWrapCast,
8800 DataType::Timestamp {
8801 timezone: false, ..
8802 } if is_date_unit => Action::DateTruncWrapCast,
8803 _ => Action::None,
8804 },
8805 _ => Action::None,
8806 }
8807 } else if matches!(source, DialectType::Snowflake)
8808 && matches!(target, DialectType::DuckDB)
8809 && name == "TO_DATE"
8810 && f.args.len() == 1
8811 && !matches!(
8812 &f.args[0],
8813 Expression::Literal(lit) if matches!(lit.as_ref(), crate::expressions::Literal::String(_))
8814 )
8815 {
8816 Action::ToDateToCast
8817 } else if !matches!(source, DialectType::Redshift)
8818 && matches!(target, DialectType::Redshift)
8819 && name == "CONVERT_TIMEZONE"
8820 && (f.args.len() == 2 || f.args.len() == 3)
8821 {
8822 // Convert Function("CONVERT_TIMEZONE") to Expression::ConvertTimezone
8823 // so Redshift's transform_expr won't expand 2-arg to 3-arg with 'UTC'.
8824 // The Redshift parser adds 'UTC' as default source_tz, but when
8825 // transpiling from other dialects, we should preserve the original form.
8826 Action::ConvertTimezoneToExpr
8827 } else if matches!(source, DialectType::Snowflake)
8828 && matches!(target, DialectType::DuckDB)
8829 && name == "REGEXP_REPLACE"
8830 && f.args.len() == 4
8831 && !matches!(
8832 &f.args[3],
8833 Expression::Literal(lit) if matches!(lit.as_ref(), crate::expressions::Literal::String(_))
8834 )
8835 {
8836 // Snowflake REGEXP_REPLACE with position arg -> DuckDB needs 'g' flag
8837 Action::RegexpReplaceSnowflakeToDuckDB
8838 } else if matches!(source, DialectType::Snowflake)
8839 && matches!(target, DialectType::DuckDB)
8840 && name == "REGEXP_REPLACE"
8841 && f.args.len() == 5
8842 {
8843 // Snowflake REGEXP_REPLACE(s, p, r, pos, occ) -> DuckDB
8844 Action::RegexpReplacePositionSnowflakeToDuckDB
8845 } else if matches!(source, DialectType::Snowflake)
8846 && matches!(target, DialectType::DuckDB)
8847 && name == "REGEXP_SUBSTR"
8848 {
8849 // Snowflake REGEXP_SUBSTR -> DuckDB REGEXP_EXTRACT variants
8850 Action::RegexpSubstrSnowflakeToDuckDB
8851 } else if matches!(source, DialectType::Snowflake)
8852 && matches!(target, DialectType::Snowflake)
8853 && (name == "REGEXP_SUBSTR" || name == "REGEXP_SUBSTR_ALL")
8854 && f.args.len() == 6
8855 {
8856 // Snowflake identity: strip trailing group=0
8857 Action::RegexpSubstrSnowflakeIdentity
8858 } else if matches!(source, DialectType::Snowflake)
8859 && matches!(target, DialectType::DuckDB)
8860 && name == "REGEXP_SUBSTR_ALL"
8861 {
8862 // Snowflake REGEXP_SUBSTR_ALL -> DuckDB REGEXP_EXTRACT_ALL variants
8863 Action::RegexpSubstrAllSnowflakeToDuckDB
8864 } else if matches!(source, DialectType::Snowflake)
8865 && matches!(target, DialectType::DuckDB)
8866 && name == "REGEXP_COUNT"
8867 {
8868 // Snowflake REGEXP_COUNT -> DuckDB LENGTH(REGEXP_EXTRACT_ALL(...))
8869 Action::RegexpCountSnowflakeToDuckDB
8870 } else if matches!(source, DialectType::Snowflake)
8871 && matches!(target, DialectType::DuckDB)
8872 && name == "REGEXP_INSTR"
8873 {
8874 // Snowflake REGEXP_INSTR -> DuckDB complex CASE expression
8875 Action::RegexpInstrSnowflakeToDuckDB
8876 } else if matches!(source, DialectType::BigQuery)
8877 && matches!(target, DialectType::Snowflake)
8878 && name == "REGEXP_EXTRACT_ALL"
8879 {
8880 // BigQuery REGEXP_EXTRACT_ALL -> Snowflake REGEXP_SUBSTR_ALL
8881 Action::RegexpExtractAllToSnowflake
8882 } else if name == "_BQ_TO_HEX" {
8883 // Internal marker from TO_HEX conversion - bare (no LOWER/UPPER wrapper)
8884 Action::BigQueryToHexBare
8885 } else if matches!(source, DialectType::BigQuery)
8886 && !matches!(target, DialectType::BigQuery)
8887 {
8888 // BigQuery-specific functions that need to be converted to standard forms
8889 match name.as_str() {
8890 "TIMESTAMP_DIFF" | "DATETIME_DIFF" | "TIME_DIFF"
8891 | "DATE_DIFF"
8892 | "TIMESTAMP_ADD" | "TIMESTAMP_SUB"
8893 | "DATETIME_ADD" | "DATETIME_SUB"
8894 | "TIME_ADD" | "TIME_SUB"
8895 | "DATE_ADD" | "DATE_SUB"
8896 | "SAFE_DIVIDE"
8897 | "GENERATE_UUID"
8898 | "COUNTIF"
8899 | "EDIT_DISTANCE"
8900 | "TIMESTAMP_SECONDS" | "TIMESTAMP_MILLIS" | "TIMESTAMP_MICROS"
8901 | "TIMESTAMP_TRUNC" | "DATETIME_TRUNC" | "DATE_TRUNC"
8902 | "TO_HEX"
8903 | "TO_JSON_STRING"
8904 | "GENERATE_ARRAY" | "GENERATE_TIMESTAMP_ARRAY"
8905 | "DIV"
8906 | "UNIX_DATE" | "UNIX_SECONDS" | "UNIX_MILLIS" | "UNIX_MICROS"
8907 | "LAST_DAY"
8908 | "TIME" | "DATETIME" | "TIMESTAMP" | "STRING"
8909 | "REGEXP_CONTAINS"
8910 | "CONTAINS_SUBSTR"
8911 | "SAFE_ADD" | "SAFE_SUBTRACT" | "SAFE_MULTIPLY"
8912 | "SAFE_CAST"
8913 | "GENERATE_DATE_ARRAY"
8914 | "PARSE_DATE" | "PARSE_TIMESTAMP"
8915 | "FORMAT_DATE" | "FORMAT_DATETIME" | "FORMAT_TIMESTAMP"
8916 | "ARRAY_CONCAT"
8917 | "JSON_QUERY" | "JSON_VALUE_ARRAY"
8918 | "INSTR"
8919 | "MD5" | "SHA1" | "SHA256" | "SHA512"
8920 | "GENERATE_UUID()" // just in case
8921 | "REGEXP_EXTRACT_ALL"
8922 | "REGEXP_EXTRACT"
8923 | "INT64"
8924 | "ARRAY_CONCAT_AGG"
8925 | "DATE_DIFF(" // just in case
8926 | "TO_HEX_MD5" // internal
8927 | "MOD"
8928 | "CONCAT"
8929 | "CURRENT_TIMESTAMP" | "CURRENT_DATE" | "CURRENT_DATETIME" | "CURRENT_TIME"
8930 | "STRUCT"
8931 | "ROUND"
8932 | "MAKE_INTERVAL"
8933 | "ARRAY_TO_STRING"
8934 | "PERCENTILE_CONT"
8935 => Action::BigQueryFunctionNormalize,
8936 "ARRAY" if matches!(target, DialectType::Snowflake)
8937 && f.args.len() == 1
8938 && matches!(&f.args[0], Expression::Select(s) if s.kind.as_deref() == Some("STRUCT"))
8939 => Action::BigQueryArraySelectAsStructToSnowflake,
8940 _ => Action::None,
8941 }
8942 } else if matches!(source, DialectType::BigQuery)
8943 && matches!(target, DialectType::BigQuery)
8944 {
8945 // BigQuery -> BigQuery normalizations
8946 match name.as_str() {
8947 "TIMESTAMP_DIFF"
8948 | "DATETIME_DIFF"
8949 | "TIME_DIFF"
8950 | "DATE_DIFF"
8951 | "DATE_ADD"
8952 | "TO_HEX"
8953 | "CURRENT_TIMESTAMP"
8954 | "CURRENT_DATE"
8955 | "CURRENT_TIME"
8956 | "CURRENT_DATETIME"
8957 | "GENERATE_DATE_ARRAY"
8958 | "INSTR"
8959 | "FORMAT_DATETIME"
8960 | "DATETIME"
8961 | "MAKE_INTERVAL" => Action::BigQueryFunctionNormalize,
8962 _ => Action::None,
8963 }
8964 } else {
8965 // Generic function normalization for non-BigQuery sources
8966 match name.as_str() {
8967 "ARBITRARY" | "AGGREGATE"
8968 | "REGEXP_MATCHES" | "REGEXP_FULL_MATCH"
8969 | "STRUCT_EXTRACT"
8970 | "LIST_FILTER" | "LIST_TRANSFORM" | "LIST_SORT" | "LIST_REVERSE_SORT"
8971 | "STRING_TO_ARRAY" | "STR_SPLIT" | "STR_SPLIT_REGEX" | "SPLIT_TO_ARRAY"
8972 | "SUBSTRINGINDEX"
8973 | "ARRAY_LENGTH" | "SIZE" | "CARDINALITY"
8974 | "UNICODE"
8975 | "XOR"
8976 | "ARRAY_REVERSE_SORT"
8977 | "ENCODE" | "DECODE"
8978 | "QUANTILE"
8979 | "EPOCH" | "EPOCH_MS"
8980 | "HASHBYTES"
8981 | "JSON_EXTRACT_PATH" | "JSON_EXTRACT_PATH_TEXT"
8982 | "APPROX_DISTINCT"
8983 | "DATE_PARSE" | "FORMAT_DATETIME"
8984 | "REGEXP_EXTRACT" | "REGEXP_SUBSTR" | "TO_DAYS"
8985 | "RLIKE"
8986 | "DATEDIFF" | "DATE_DIFF" | "MONTHS_BETWEEN"
8987 | "ADD_MONTHS" | "DATEADD" | "DATE_ADD" | "DATE_SUB" | "DATETRUNC"
8988 | "LAST_DAY" | "LAST_DAY_OF_MONTH" | "EOMONTH"
8989 | "ARRAY_CONSTRUCT" | "ARRAY_CAT" | "ARRAY_COMPACT"
8990 | "ARRAY_FILTER" | "FILTER" | "REDUCE" | "ARRAY_REVERSE"
8991 | "MAP" | "MAP_FROM_ENTRIES"
8992 | "COLLECT_LIST" | "COLLECT_SET"
8993 | "ISNAN" | "IS_NAN"
8994 | "TO_UTC_TIMESTAMP" | "FROM_UTC_TIMESTAMP"
8995 | "FORMAT_NUMBER"
8996 | "TOMONDAY" | "TOSTARTOFWEEK" | "TOSTARTOFMONTH" | "TOSTARTOFYEAR"
8997 | "ELEMENT_AT"
8998 | "EXPLODE" | "EXPLODE_OUTER" | "POSEXPLODE"
8999 | "SPLIT_PART"
9000 // GENERATE_SERIES: handled separately below
9001 | "JSON_EXTRACT" | "JSON_EXTRACT_SCALAR"
9002 | "JSON_QUERY" | "JSON_VALUE"
9003 | "JSON_SEARCH"
9004 | "JSON_EXTRACT_JSON" | "BSON_EXTRACT_BSON"
9005 | "TO_UNIX_TIMESTAMP" | "UNIX_TIMESTAMP"
9006 | "CURDATE" | "CURTIME"
9007 | "ARRAY_TO_STRING"
9008 | "ARRAY_SORT" | "SORT_ARRAY"
9009 | "LEFT" | "RIGHT"
9010 | "MAP_FROM_ARRAYS"
9011 | "LIKE" | "ILIKE"
9012 | "ARRAY_CONCAT" | "LIST_CONCAT"
9013 | "QUANTILE_CONT" | "QUANTILE_DISC"
9014 | "PERCENTILE_CONT" | "PERCENTILE_DISC"
9015 | "PERCENTILE_APPROX" | "APPROX_PERCENTILE"
9016 | "LOCATE" | "STRPOS" | "INSTR"
9017 | "CHAR"
9018 // CONCAT: handled separately for COALESCE wrapping
9019 | "ARRAY_JOIN"
9020 | "ARRAY_CONTAINS" | "HAS" | "CONTAINS"
9021 | "ISNULL"
9022 | "MONTHNAME"
9023 | "TO_TIMESTAMP"
9024 | "TO_DATE"
9025 | "TO_JSON"
9026 | "REGEXP_SPLIT"
9027 | "SPLIT"
9028 | "FORMATDATETIME"
9029 | "ARRAYJOIN"
9030 | "SPLITBYSTRING" | "SPLITBYREGEXP"
9031 | "NVL"
9032 | "TO_CHAR"
9033 | "DBMS_RANDOM.VALUE"
9034 | "REGEXP_LIKE"
9035 | "REPLICATE"
9036 | "LEN"
9037 | "COUNT_BIG"
9038 | "DATEFROMPARTS"
9039 | "DATETIMEFROMPARTS"
9040 | "CONVERT" | "TRY_CONVERT"
9041 | "STRFTIME" | "STRPTIME"
9042 | "DATE_FORMAT" | "FORMAT_DATE"
9043 | "PARSE_TIMESTAMP" | "PARSE_DATE"
9044 | "FROM_BASE64" | "TO_BASE64"
9045 | "GETDATE"
9046 | "TO_HEX" | "FROM_HEX" | "UNHEX" | "HEX"
9047 | "TO_UTF8" | "FROM_UTF8"
9048 | "STARTS_WITH" | "STARTSWITH"
9049 | "APPROX_COUNT_DISTINCT"
9050 | "JSON_FORMAT"
9051 | "SYSDATE"
9052 | "LOGICAL_OR" | "LOGICAL_AND"
9053 | "MONTHS_ADD"
9054 | "SCHEMA_NAME"
9055 | "STRTOL"
9056 | "EDITDIST3"
9057 | "FORMAT"
9058 | "LIST_CONTAINS" | "LIST_HAS"
9059 | "VARIANCE" | "STDDEV"
9060 | "ISINF"
9061 | "TO_UNIXTIME"
9062 | "FROM_UNIXTIME"
9063 | "DATEPART" | "DATE_PART"
9064 | "DATENAME"
9065 | "STRING_AGG"
9066 | "JSON_ARRAYAGG"
9067 | "APPROX_QUANTILE"
9068 | "MAKE_DATE"
9069 | "LIST_HAS_ANY" | "ARRAY_HAS_ANY"
9070 | "RANGE"
9071 | "TRY_ELEMENT_AT"
9072 | "STR_TO_MAP"
9073 | "STRING"
9074 | "STR_TO_TIME"
9075 | "CURRENT_SCHEMA"
9076 | "LTRIM" | "RTRIM"
9077 | "UUID"
9078 | "FARM_FINGERPRINT"
9079 | "JSON_KEYS"
9080 | "WEEKOFYEAR"
9081 | "CONCAT_WS"
9082 | "TRY_DIVIDE"
9083 | "ARRAY_SLICE"
9084 | "ARRAY_PREPEND"
9085 | "ARRAY_REMOVE"
9086 | "GENERATE_DATE_ARRAY"
9087 | "PARSE_JSON"
9088 | "JSON_REMOVE"
9089 | "JSON_SET"
9090 | "LEVENSHTEIN"
9091 | "CURRENT_VERSION"
9092 | "ARRAY_MAX"
9093 | "ARRAY_MIN"
9094 | "JAROWINKLER_SIMILARITY"
9095 | "CURRENT_SCHEMAS"
9096 | "TO_VARIANT"
9097 | "JSON_GROUP_ARRAY" | "JSON_GROUP_OBJECT"
9098 | "ARRAYS_OVERLAP" | "ARRAY_INTERSECTION"
9099 => Action::GenericFunctionNormalize,
9100 // Canonical date functions -> dialect-specific
9101 "TS_OR_DS_TO_DATE" => Action::TsOrDsToDateConvert,
9102 "TS_OR_DS_TO_DATE_STR" if f.args.len() == 1 => Action::TsOrDsToDateStrConvert,
9103 "DATE_STR_TO_DATE" if f.args.len() == 1 => Action::DateStrToDateConvert,
9104 "TIME_STR_TO_DATE" if f.args.len() == 1 => Action::TimeStrToDateConvert,
9105 "TIME_STR_TO_TIME" if f.args.len() <= 2 => Action::TimeStrToTimeConvert,
9106 "TIME_STR_TO_UNIX" if f.args.len() == 1 => Action::TimeStrToUnixConvert,
9107 "TIME_TO_TIME_STR" if f.args.len() == 1 => Action::TimeToTimeStrConvert,
9108 "DATE_TO_DATE_STR" if f.args.len() == 1 => Action::DateToDateStrConvert,
9109 "DATE_TO_DI" if f.args.len() == 1 => Action::DateToDiConvert,
9110 "DI_TO_DATE" if f.args.len() == 1 => Action::DiToDateConvert,
9111 "TS_OR_DI_TO_DI" if f.args.len() == 1 => Action::TsOrDiToDiConvert,
9112 "UNIX_TO_STR" if f.args.len() == 2 => Action::UnixToStrConvert,
9113 "UNIX_TO_TIME" if f.args.len() == 1 => Action::UnixToTimeConvert,
9114 "UNIX_TO_TIME_STR" if f.args.len() == 1 => Action::UnixToTimeStrConvert,
9115 "TIME_TO_UNIX" if f.args.len() == 1 => Action::TimeToUnixConvert,
9116 "TIME_TO_STR" if f.args.len() == 2 => Action::TimeToStrConvert,
9117 "STR_TO_UNIX" if f.args.len() == 2 => Action::StrToUnixConvert,
9118 // STR_TO_DATE(x, fmt) -> dialect-specific
9119 "STR_TO_DATE" if f.args.len() == 2
9120 && matches!(source, DialectType::Generic) => Action::StrToDateConvert,
9121 "STR_TO_DATE" => Action::GenericFunctionNormalize,
9122 // TS_OR_DS_ADD(x, n, 'UNIT') from Generic -> dialect-specific DATE_ADD
9123 "TS_OR_DS_ADD" if f.args.len() == 3
9124 && matches!(source, DialectType::Generic) => Action::TsOrDsAddConvert,
9125 // DATE_FROM_UNIX_DATE(n) -> DATEADD(DAY, n, '1970-01-01')
9126 "DATE_FROM_UNIX_DATE" if f.args.len() == 1 => Action::DateFromUnixDateConvert,
9127 // NVL2(a, b, c) -> CASE WHEN NOT a IS NULL THEN b [ELSE c] END
9128 "NVL2" if (f.args.len() == 2 || f.args.len() == 3) => Action::Nvl2Expand,
9129 // IFNULL(a, b) -> COALESCE(a, b) when coming from Generic source
9130 "IFNULL" if f.args.len() == 2 => Action::IfnullToCoalesce,
9131 // IS_ASCII(x) -> dialect-specific
9132 "IS_ASCII" if f.args.len() == 1 => Action::IsAsciiConvert,
9133 // STR_POSITION(haystack, needle[, pos[, occ]]) -> dialect-specific
9134 "STR_POSITION" => Action::StrPositionConvert,
9135 // ARRAY_SUM -> dialect-specific
9136 "ARRAY_SUM" => Action::ArraySumConvert,
9137 // ARRAY_SIZE -> dialect-specific (Drill only)
9138 "ARRAY_SIZE" if matches!(target, DialectType::Drill) => Action::ArraySizeConvert,
9139 // ARRAY_ANY -> dialect-specific
9140 "ARRAY_ANY" if f.args.len() == 2 => Action::ArrayAnyConvert,
9141 // Functions needing specific cross-dialect transforms
9142 "MAX_BY" | "MIN_BY" if matches!(target, DialectType::ClickHouse | DialectType::Spark | DialectType::Databricks | DialectType::DuckDB) => Action::MaxByMinByConvert,
9143 "STRUCT" if matches!(source, DialectType::Spark | DialectType::Databricks)
9144 && !matches!(target, DialectType::Spark | DialectType::Databricks | DialectType::Hive) => Action::SparkStructConvert,
9145 "ARRAY" if matches!(source, DialectType::BigQuery)
9146 && matches!(target, DialectType::Snowflake)
9147 && f.args.len() == 1
9148 && matches!(&f.args[0], Expression::Select(s) if s.kind.as_deref() == Some("STRUCT")) => Action::BigQueryArraySelectAsStructToSnowflake,
9149 "ARRAY" if matches!(target, DialectType::Presto | DialectType::Trino | DialectType::Athena | DialectType::BigQuery | DialectType::DuckDB | DialectType::Snowflake | DialectType::ClickHouse | DialectType::StarRocks) => Action::ArraySyntaxConvert,
9150 "TRUNC" if f.args.len() == 2 && matches!(&f.args[1], Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_))) && matches!(target, DialectType::Presto | DialectType::Trino | DialectType::ClickHouse) => Action::TruncToDateTrunc,
9151 "TRUNC" | "TRUNCATE" if f.args.len() <= 2 && !f.args.get(1).map_or(false, |a| matches!(a, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)))) => Action::GenericFunctionNormalize,
9152 // DATE_TRUNC('unit', x) from Generic source -> arg swap for BigQuery/Doris/Spark/MySQL
9153 "DATE_TRUNC" if f.args.len() == 2
9154 && matches!(source, DialectType::Generic)
9155 && matches!(target, DialectType::BigQuery | DialectType::Doris | DialectType::StarRocks
9156 | DialectType::Spark | DialectType::Databricks | DialectType::MySQL) => Action::DateTruncSwapArgs,
9157 // TIMESTAMP_TRUNC(x, UNIT) from Generic source -> convert to per-dialect
9158 "TIMESTAMP_TRUNC" if f.args.len() >= 2
9159 && matches!(source, DialectType::Generic) => Action::TimestampTruncConvert,
9160 "UNIFORM" if matches!(target, DialectType::Snowflake) => Action::GenericFunctionNormalize,
9161 // GENERATE_SERIES -> SEQUENCE/UNNEST/EXPLODE for target dialects
9162 "GENERATE_SERIES" if matches!(source, DialectType::PostgreSQL | DialectType::Redshift)
9163 && !matches!(target, DialectType::PostgreSQL | DialectType::Redshift | DialectType::TSQL | DialectType::Fabric) => Action::GenerateSeriesConvert,
9164 // GENERATE_SERIES with interval normalization for PG target
9165 "GENERATE_SERIES" if f.args.len() >= 3
9166 && matches!(source, DialectType::PostgreSQL | DialectType::Redshift)
9167 && matches!(target, DialectType::PostgreSQL | DialectType::Redshift) => Action::GenerateSeriesConvert,
9168 "GENERATE_SERIES" => Action::None, // passthrough for other cases
9169 // CONCAT(a, b) -> COALESCE wrapping for Presto/ClickHouse from PostgreSQL
9170 "CONCAT" if matches!(source, DialectType::PostgreSQL | DialectType::Redshift)
9171 && matches!(target, DialectType::Presto | DialectType::Trino | DialectType::ClickHouse) => Action::ConcatCoalesceWrap,
9172 "CONCAT" => Action::GenericFunctionNormalize,
9173 // DIV(a, b) -> target-specific integer division
9174 "DIV" if f.args.len() == 2
9175 && matches!(source, DialectType::PostgreSQL)
9176 && matches!(target, DialectType::DuckDB | DialectType::BigQuery | DialectType::SQLite) => Action::DivFuncConvert,
9177 // JSON_OBJECT_AGG/JSONB_OBJECT_AGG -> JSON_GROUP_OBJECT for DuckDB
9178 "JSON_OBJECT_AGG" | "JSONB_OBJECT_AGG" if f.args.len() == 2
9179 && matches!(target, DialectType::DuckDB) => Action::JsonObjectAggConvert,
9180 // JSONB_EXISTS -> JSON_EXISTS for DuckDB
9181 "JSONB_EXISTS" if f.args.len() == 2
9182 && matches!(target, DialectType::DuckDB) => Action::JsonbExistsConvert,
9183 // DATE_BIN -> TIME_BUCKET for DuckDB
9184 "DATE_BIN" if matches!(target, DialectType::DuckDB) => Action::DateBinConvert,
9185 // Multi-arg MIN(a,b,c) -> LEAST, MAX(a,b,c) -> GREATEST
9186 "MIN" | "MAX" if f.args.len() > 1 && !matches!(target, DialectType::SQLite) => Action::MinMaxToLeastGreatest,
9187 // ClickHouse uniq -> APPROX_COUNT_DISTINCT for other dialects
9188 "UNIQ" if matches!(source, DialectType::ClickHouse) && !matches!(target, DialectType::ClickHouse) => Action::ClickHouseUniqToApproxCountDistinct,
9189 // ClickHouse any -> ANY_VALUE for other dialects
9190 "ANY" if f.args.len() == 1 && matches!(source, DialectType::ClickHouse) && !matches!(target, DialectType::ClickHouse) => Action::ClickHouseAnyToAnyValue,
9191 _ => Action::None,
9192 }
9193 }
9194 }
9195 Expression::AggregateFunction(af) => {
9196 let name = af.name.to_ascii_uppercase();
9197 match name.as_str() {
9198 "ARBITRARY" | "AGGREGATE" => Action::GenericFunctionNormalize,
9199 "JSON_ARRAYAGG" => Action::GenericFunctionNormalize,
9200 // JSON_OBJECT_AGG/JSONB_OBJECT_AGG -> JSON_GROUP_OBJECT for DuckDB
9201 "JSON_OBJECT_AGG" | "JSONB_OBJECT_AGG"
9202 if matches!(target, DialectType::DuckDB) =>
9203 {
9204 Action::JsonObjectAggConvert
9205 }
9206 "ARRAY_AGG"
9207 if matches!(
9208 target,
9209 DialectType::Hive
9210 | DialectType::Spark
9211 | DialectType::Databricks
9212 ) =>
9213 {
9214 Action::ArrayAggToCollectList
9215 }
9216 "MAX_BY" | "MIN_BY"
9217 if matches!(
9218 target,
9219 DialectType::ClickHouse
9220 | DialectType::Spark
9221 | DialectType::Databricks
9222 | DialectType::DuckDB
9223 ) =>
9224 {
9225 Action::MaxByMinByConvert
9226 }
9227 "COLLECT_LIST"
9228 if matches!(
9229 target,
9230 DialectType::Presto | DialectType::Trino | DialectType::DuckDB
9231 ) =>
9232 {
9233 Action::CollectListToArrayAgg
9234 }
9235 "COLLECT_SET"
9236 if matches!(
9237 target,
9238 DialectType::Presto
9239 | DialectType::Trino
9240 | DialectType::Snowflake
9241 | DialectType::DuckDB
9242 ) =>
9243 {
9244 Action::CollectSetConvert
9245 }
9246 "PERCENTILE"
9247 if matches!(
9248 target,
9249 DialectType::DuckDB | DialectType::Presto | DialectType::Trino
9250 ) =>
9251 {
9252 Action::PercentileConvert
9253 }
9254 // CORR -> CASE WHEN ISNAN(CORR(a,b)) THEN NULL ELSE CORR(a,b) END for DuckDB
9255 "CORR"
9256 if matches!(target, DialectType::DuckDB)
9257 && matches!(source, DialectType::Snowflake) =>
9258 {
9259 Action::CorrIsnanWrap
9260 }
9261 // BigQuery APPROX_QUANTILES(x, n) -> APPROX_QUANTILE(x, [quantiles]) for DuckDB
9262 "APPROX_QUANTILES"
9263 if matches!(source, DialectType::BigQuery)
9264 && matches!(target, DialectType::DuckDB) =>
9265 {
9266 Action::BigQueryApproxQuantiles
9267 }
9268 // BigQuery PERCENTILE_CONT(x, frac RESPECT NULLS) -> QUANTILE_CONT(x, frac) for DuckDB
9269 "PERCENTILE_CONT"
9270 if matches!(source, DialectType::BigQuery)
9271 && matches!(target, DialectType::DuckDB)
9272 && af.args.len() >= 2 =>
9273 {
9274 Action::BigQueryPercentileContToDuckDB
9275 }
9276 _ => Action::None,
9277 }
9278 }
9279 Expression::JSONArrayAgg(_) => match target {
9280 DialectType::PostgreSQL => Action::GenericFunctionNormalize,
9281 _ => Action::None,
9282 },
9283 Expression::ToNumber(tn) => {
9284 // TO_NUMBER(x) with 1 arg -> CAST(x AS DOUBLE) for most targets
9285 if tn.format.is_none() && tn.precision.is_none() && tn.scale.is_none() {
9286 match target {
9287 DialectType::Oracle
9288 | DialectType::Snowflake
9289 | DialectType::Teradata => Action::None,
9290 _ => Action::GenericFunctionNormalize,
9291 }
9292 } else {
9293 Action::None
9294 }
9295 }
9296 Expression::Nvl2(_) => {
9297 // NVL2(a, b, c) -> CASE WHEN NOT a IS NULL THEN b ELSE c END for most dialects
9298 // Keep as NVL2 for dialects that support it natively
9299 match target {
9300 DialectType::Oracle
9301 | DialectType::Snowflake
9302 | DialectType::Teradata
9303 | DialectType::Spark
9304 | DialectType::Databricks
9305 | DialectType::Redshift => Action::None,
9306 _ => Action::Nvl2Expand,
9307 }
9308 }
9309 Expression::Decode(_) | Expression::DecodeCase(_) => {
9310 // DECODE(a, b, c[, d, e[, ...]]) -> CASE WHEN with null-safe comparisons
9311 // Keep as DECODE for Oracle/Snowflake
9312 match target {
9313 DialectType::Oracle | DialectType::Snowflake => Action::None,
9314 _ => Action::DecodeSimplify,
9315 }
9316 }
9317 Expression::Coalesce(ref cf) => {
9318 // IFNULL(a, b) -> COALESCE(a, b): clear original_name for cross-dialect
9319 // BigQuery keeps IFNULL natively when source is also BigQuery
9320 if cf.original_name.as_deref() == Some("IFNULL")
9321 && !(matches!(source, DialectType::BigQuery)
9322 && matches!(target, DialectType::BigQuery))
9323 {
9324 Action::IfnullToCoalesce
9325 } else {
9326 Action::None
9327 }
9328 }
9329 Expression::IfFunc(if_func) => {
9330 if matches!(source, DialectType::Snowflake)
9331 && matches!(
9332 target,
9333 DialectType::Presto | DialectType::Trino | DialectType::SQLite
9334 )
9335 && matches!(if_func.false_value, Some(Expression::Div(_)))
9336 {
9337 Action::Div0TypedDivision
9338 } else {
9339 Action::None
9340 }
9341 }
9342 Expression::ToJson(_) => match target {
9343 DialectType::Presto | DialectType::Trino => Action::ToJsonConvert,
9344 DialectType::BigQuery => Action::ToJsonConvert,
9345 DialectType::DuckDB => Action::ToJsonConvert,
9346 _ => Action::None,
9347 },
9348 Expression::ArrayAgg(ref agg) => {
9349 if matches!(target, DialectType::MySQL | DialectType::SingleStore) {
9350 Action::ArrayAggToGroupConcat
9351 } else if matches!(
9352 target,
9353 DialectType::Hive | DialectType::Spark | DialectType::Databricks
9354 ) {
9355 // Any source -> Hive/Spark: convert ARRAY_AGG to COLLECT_LIST
9356 Action::ArrayAggToCollectList
9357 } else if matches!(
9358 source,
9359 DialectType::Spark | DialectType::Databricks | DialectType::Hive
9360 ) && matches!(target, DialectType::DuckDB)
9361 && agg.filter.is_some()
9362 {
9363 // Spark/Hive ARRAY_AGG excludes NULLs, DuckDB includes them
9364 // Need to add NOT x IS NULL to existing filter
9365 Action::ArrayAggNullFilter
9366 } else if matches!(target, DialectType::DuckDB)
9367 && agg.ignore_nulls == Some(true)
9368 && !agg.order_by.is_empty()
9369 {
9370 // BigQuery ARRAY_AGG(x IGNORE NULLS ORDER BY ...) -> DuckDB ARRAY_AGG(x ORDER BY a NULLS FIRST, ...)
9371 Action::ArrayAggIgnoreNullsDuckDB
9372 } else if !matches!(source, DialectType::Snowflake) {
9373 Action::None
9374 } else if matches!(target, DialectType::Spark | DialectType::Databricks) {
9375 let is_array_agg = agg.name.as_deref().map_or(false, |n| n.eq_ignore_ascii_case("ARRAY_AGG"))
9376 || agg.name.is_none();
9377 if is_array_agg {
9378 Action::ArrayAggCollectList
9379 } else {
9380 Action::None
9381 }
9382 } else if matches!(
9383 target,
9384 DialectType::DuckDB | DialectType::Presto | DialectType::Trino
9385 ) && agg.filter.is_none()
9386 {
9387 Action::ArrayAggFilter
9388 } else {
9389 Action::None
9390 }
9391 }
9392 Expression::WithinGroup(wg) => {
9393 if matches!(source, DialectType::Snowflake)
9394 && matches!(
9395 target,
9396 DialectType::DuckDB | DialectType::Presto | DialectType::Trino
9397 )
9398 && matches!(wg.this, Expression::ArrayAgg(_))
9399 {
9400 Action::ArrayAggWithinGroupFilter
9401 } else if matches!(&wg.this, Expression::AggregateFunction(af) if af.name.eq_ignore_ascii_case("STRING_AGG"))
9402 || matches!(&wg.this, Expression::Function(f) if f.name.eq_ignore_ascii_case("STRING_AGG"))
9403 || matches!(&wg.this, Expression::StringAgg(_))
9404 {
9405 Action::StringAggConvert
9406 } else if matches!(
9407 target,
9408 DialectType::Presto
9409 | DialectType::Trino
9410 | DialectType::Athena
9411 | DialectType::Spark
9412 | DialectType::Databricks
9413 ) && (matches!(&wg.this, Expression::Function(f) if f.name.eq_ignore_ascii_case("PERCENTILE_CONT") || f.name.eq_ignore_ascii_case("PERCENTILE_DISC"))
9414 || matches!(&wg.this, Expression::AggregateFunction(af) if af.name.eq_ignore_ascii_case("PERCENTILE_CONT") || af.name.eq_ignore_ascii_case("PERCENTILE_DISC"))
9415 || matches!(&wg.this, Expression::PercentileCont(_)))
9416 {
9417 Action::PercentileContConvert
9418 } else {
9419 Action::None
9420 }
9421 }
9422 // For BigQuery: CAST(x AS TIMESTAMP) -> CAST(x AS DATETIME)
9423 // because BigQuery's TIMESTAMP is really TIMESTAMPTZ, and
9424 // DATETIME is the timezone-unaware type
9425 Expression::Cast(ref c) => {
9426 if c.format.is_some()
9427 && (matches!(source, DialectType::BigQuery)
9428 || matches!(source, DialectType::Teradata))
9429 {
9430 Action::BigQueryCastFormat
9431 } else if matches!(target, DialectType::BigQuery)
9432 && !matches!(source, DialectType::BigQuery)
9433 && matches!(
9434 c.to,
9435 DataType::Timestamp {
9436 timezone: false,
9437 ..
9438 }
9439 )
9440 {
9441 Action::CastTimestampToDatetime
9442 } else if matches!(target, DialectType::MySQL | DialectType::StarRocks)
9443 && !matches!(source, DialectType::MySQL | DialectType::StarRocks)
9444 && matches!(
9445 c.to,
9446 DataType::Timestamp {
9447 timezone: false,
9448 ..
9449 }
9450 )
9451 {
9452 // Generic/other -> MySQL/StarRocks: CAST(x AS TIMESTAMP) -> CAST(x AS DATETIME)
9453 // but MySQL-native CAST(x AS TIMESTAMP) stays as TIMESTAMP(x) via transform_cast
9454 Action::CastTimestampToDatetime
9455 } else if matches!(
9456 source,
9457 DialectType::Hive | DialectType::Spark | DialectType::Databricks
9458 ) && matches!(
9459 target,
9460 DialectType::Presto
9461 | DialectType::Trino
9462 | DialectType::Athena
9463 | DialectType::DuckDB
9464 | DialectType::Snowflake
9465 | DialectType::BigQuery
9466 | DialectType::Databricks
9467 | DialectType::TSQL
9468 ) {
9469 Action::HiveCastToTryCast
9470 } else if matches!(c.to, DataType::Timestamp { timezone: true, .. })
9471 && matches!(target, DialectType::MySQL | DialectType::StarRocks)
9472 {
9473 // CAST(x AS TIMESTAMPTZ) -> TIMESTAMP(x) function for MySQL/StarRocks
9474 Action::CastTimestamptzToFunc
9475 } else if matches!(c.to, DataType::Timestamp { timezone: true, .. })
9476 && matches!(
9477 target,
9478 DialectType::Hive
9479 | DialectType::Spark
9480 | DialectType::Databricks
9481 | DialectType::BigQuery
9482 )
9483 {
9484 // CAST(x AS TIMESTAMP WITH TIME ZONE) -> CAST(x AS TIMESTAMP) for Hive/Spark/BigQuery
9485 Action::CastTimestampStripTz
9486 } else if matches!(&c.to, DataType::Json)
9487 && matches!(source, DialectType::DuckDB)
9488 && matches!(target, DialectType::Snowflake)
9489 {
9490 Action::DuckDBCastJsonToVariant
9491 } else if matches!(&c.to, DataType::Json)
9492 && matches!(&c.this, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)))
9493 && matches!(
9494 target,
9495 DialectType::Presto
9496 | DialectType::Trino
9497 | DialectType::Athena
9498 | DialectType::Snowflake
9499 )
9500 {
9501 // CAST('x' AS JSON) -> JSON_PARSE('x') for Presto, PARSE_JSON for Snowflake
9502 // Only when the input is a string literal (JSON 'value' syntax)
9503 Action::JsonLiteralToJsonParse
9504 } else if matches!(&c.to, DataType::Json)
9505 && matches!(source, DialectType::DuckDB)
9506 && matches!(
9507 target,
9508 DialectType::Presto | DialectType::Trino | DialectType::Athena
9509 )
9510 {
9511 // DuckDB's CAST(x AS JSON) parses the string value into a JSON value.
9512 // Trino/Presto/Athena's CAST(x AS JSON) instead wraps the value as a
9513 // JSON string (no parsing) — different semantics. Use JSON_PARSE(x)
9514 // in the target to preserve DuckDB's parse semantics.
9515 Action::JsonLiteralToJsonParse
9516 } else if matches!(&c.to, DataType::Json | DataType::JsonB)
9517 && matches!(target, DialectType::Spark | DialectType::Databricks)
9518 {
9519 // CAST(x AS JSON) -> TO_JSON(x) for Spark
9520 Action::CastToJsonForSpark
9521 } else if (matches!(
9522 &c.to,
9523 DataType::Array { .. } | DataType::Map { .. } | DataType::Struct { .. }
9524 )) && matches!(
9525 target,
9526 DialectType::Spark | DialectType::Databricks
9527 ) && (matches!(&c.this, Expression::ParseJson(_))
9528 || matches!(
9529 &c.this,
9530 Expression::Function(f)
9531 if f.name.eq_ignore_ascii_case("JSON_EXTRACT")
9532 || f.name.eq_ignore_ascii_case("JSON_EXTRACT_SCALAR")
9533 || f.name.eq_ignore_ascii_case("GET_JSON_OBJECT")
9534 ))
9535 {
9536 // CAST(JSON_PARSE(...) AS ARRAY/MAP) or CAST(JSON_EXTRACT/GET_JSON_OBJECT(...) AS ARRAY/MAP)
9537 // -> FROM_JSON(..., type_string) for Spark
9538 Action::CastJsonToFromJson
9539 } else if matches!(target, DialectType::Spark | DialectType::Databricks)
9540 && matches!(
9541 c.to,
9542 DataType::Timestamp {
9543 timezone: false,
9544 ..
9545 }
9546 )
9547 && matches!(source, DialectType::DuckDB)
9548 {
9549 Action::StrftimeCastTimestamp
9550 } else if matches!(source, DialectType::DuckDB)
9551 && matches!(
9552 c.to,
9553 DataType::Decimal {
9554 precision: None,
9555 ..
9556 }
9557 )
9558 {
9559 Action::DecimalDefaultPrecision
9560 } else if matches!(source, DialectType::MySQL | DialectType::SingleStore)
9561 && matches!(c.to, DataType::Char { length: None })
9562 && !matches!(target, DialectType::MySQL | DialectType::SingleStore)
9563 {
9564 // MySQL CAST(x AS CHAR) was originally TEXT - convert to target text type
9565 Action::MysqlCastCharToText
9566 } else if matches!(
9567 source,
9568 DialectType::Spark | DialectType::Databricks | DialectType::Hive
9569 ) && matches!(
9570 target,
9571 DialectType::Spark | DialectType::Databricks | DialectType::Hive
9572 ) && Self::has_varchar_char_type(&c.to)
9573 {
9574 // Spark parses VARCHAR(n)/CHAR(n) as TEXT, so normalize back to STRING
9575 Action::SparkCastVarcharToString
9576 } else {
9577 Action::None
9578 }
9579 }
9580 Expression::SafeCast(ref c) => {
9581 if c.format.is_some()
9582 && matches!(source, DialectType::BigQuery)
9583 && !matches!(target, DialectType::BigQuery)
9584 {
9585 Action::BigQueryCastFormat
9586 } else {
9587 Action::None
9588 }
9589 }
9590 Expression::TryCast(ref c) => {
9591 if matches!(&c.to, DataType::Json)
9592 && matches!(source, DialectType::DuckDB)
9593 && matches!(
9594 target,
9595 DialectType::Presto | DialectType::Trino | DialectType::Athena
9596 )
9597 {
9598 // DuckDB's TRY_CAST(x AS JSON) tries to parse x as JSON, returning
9599 // NULL on parse failure. Trino/Presto/Athena's TRY_CAST(x AS JSON)
9600 // wraps the value as a JSON string (no parse). Emit TRY(JSON_PARSE(x))
9601 // to preserve DuckDB's parse-or-null semantics.
9602 Action::DuckDBTryCastJsonToTryJsonParse
9603 } else {
9604 Action::None
9605 }
9606 }
9607 Expression::JSONArray(ref ja)
9608 if matches!(target, DialectType::Snowflake)
9609 && ja.null_handling.is_none()
9610 && ja.return_type.is_none()
9611 && ja.strict.is_none() =>
9612 {
9613 Action::GenericFunctionNormalize
9614 }
9615 Expression::JsonArray(_) if matches!(target, DialectType::Snowflake) => {
9616 Action::GenericFunctionNormalize
9617 }
9618 // For DuckDB: DATE_TRUNC should preserve the input type
9619 Expression::DateTrunc(_) | Expression::TimestampTrunc(_) => {
9620 if matches!(source, DialectType::Snowflake)
9621 && matches!(target, DialectType::DuckDB)
9622 {
9623 Action::DateTruncWrapCast
9624 } else {
9625 Action::None
9626 }
9627 }
9628 // For DuckDB: SET a = 1 -> SET VARIABLE a = 1
9629 Expression::SetStatement(s) => {
9630 if matches!(target, DialectType::DuckDB)
9631 && !matches!(source, DialectType::TSQL | DialectType::Fabric)
9632 && s.items.iter().any(|item| item.kind.is_none())
9633 {
9634 Action::SetToVariable
9635 } else {
9636 Action::None
9637 }
9638 }
9639 // Cross-dialect NULL ordering normalization.
9640 // When nulls_first is not specified, fill in the source dialect's implied
9641 // default so the target generator can correctly add/strip NULLS FIRST/LAST.
9642 Expression::Ordered(o) => {
9643 // MySQL doesn't support NULLS FIRST/LAST - strip or rewrite
9644 if matches!(target, DialectType::MySQL) && o.nulls_first.is_some() {
9645 Action::MysqlNullsOrdering
9646 } else {
9647 // Skip targets that don't support NULLS FIRST/LAST syntax unless
9648 // the generator can preserve semantics with a CASE sort key.
9649 let target_rewrites_nulls =
9650 matches!(target, DialectType::TSQL | DialectType::Fabric);
9651 let target_supports_nulls = !matches!(
9652 target,
9653 DialectType::MySQL
9654 | DialectType::TSQL
9655 | DialectType::Fabric
9656 | DialectType::StarRocks
9657 | DialectType::Doris
9658 );
9659 if o.nulls_first.is_none()
9660 && source != target
9661 && (target_supports_nulls || target_rewrites_nulls)
9662 {
9663 Action::NullsOrdering
9664 } else {
9665 Action::None
9666 }
9667 }
9668 }
9669 // BigQuery data types: convert INT64, BYTES, NUMERIC etc. to standard types
9670 Expression::DataType(dt) => {
9671 if matches!(source, DialectType::BigQuery)
9672 && !matches!(target, DialectType::BigQuery)
9673 {
9674 match dt {
9675 DataType::Custom { ref name }
9676 if name.eq_ignore_ascii_case("INT64")
9677 || name.eq_ignore_ascii_case("FLOAT64")
9678 || name.eq_ignore_ascii_case("BOOL")
9679 || name.eq_ignore_ascii_case("BYTES")
9680 || name.eq_ignore_ascii_case("NUMERIC")
9681 || name.eq_ignore_ascii_case("STRING")
9682 || name.eq_ignore_ascii_case("DATETIME") =>
9683 {
9684 Action::BigQueryCastType
9685 }
9686 _ => Action::None,
9687 }
9688 } else if matches!(source, DialectType::TSQL) {
9689 // For TSQL source -> any target (including TSQL itself for REAL)
9690 match dt {
9691 // REAL -> FLOAT even for TSQL->TSQL
9692 DataType::Custom { ref name }
9693 if name.eq_ignore_ascii_case("REAL") =>
9694 {
9695 Action::TSQLTypeNormalize
9696 }
9697 DataType::Float {
9698 real_spelling: true,
9699 ..
9700 } => Action::TSQLTypeNormalize,
9701 // Other TSQL type normalizations only for non-TSQL targets
9702 DataType::Custom { ref name }
9703 if !matches!(target, DialectType::TSQL)
9704 && (name.eq_ignore_ascii_case("MONEY")
9705 || name.eq_ignore_ascii_case("SMALLMONEY")
9706 || name.eq_ignore_ascii_case("DATETIME2")
9707 || name.eq_ignore_ascii_case("IMAGE")
9708 || name.eq_ignore_ascii_case("BIT")
9709 || name.eq_ignore_ascii_case("ROWVERSION")
9710 || name.eq_ignore_ascii_case("UNIQUEIDENTIFIER")
9711 || name.eq_ignore_ascii_case("DATETIMEOFFSET")
9712 || (name.len() >= 7 && name[..7].eq_ignore_ascii_case("NUMERIC"))
9713 || (name.len() >= 10 && name[..10].eq_ignore_ascii_case("DATETIME2("))
9714 || (name.len() >= 5 && name[..5].eq_ignore_ascii_case("TIME("))) =>
9715 {
9716 Action::TSQLTypeNormalize
9717 }
9718 DataType::Float {
9719 precision: Some(_), ..
9720 } if !matches!(target, DialectType::TSQL) => {
9721 Action::TSQLTypeNormalize
9722 }
9723 DataType::TinyInt { .. }
9724 if !matches!(target, DialectType::TSQL) =>
9725 {
9726 Action::TSQLTypeNormalize
9727 }
9728 // INTEGER -> INT for Databricks/Spark targets
9729 DataType::Int {
9730 integer_spelling: true,
9731 ..
9732 } if matches!(
9733 target,
9734 DialectType::Databricks | DialectType::Spark
9735 ) =>
9736 {
9737 Action::TSQLTypeNormalize
9738 }
9739 _ => Action::None,
9740 }
9741 } else if (matches!(source, DialectType::Oracle)
9742 || matches!(source, DialectType::Generic))
9743 && !matches!(target, DialectType::Oracle)
9744 {
9745 match dt {
9746 DataType::Custom { ref name }
9747 if (name.len() >= 9 && name[..9].eq_ignore_ascii_case("VARCHAR2("))
9748 || (name.len() >= 10 && name[..10].eq_ignore_ascii_case("NVARCHAR2("))
9749 || name.eq_ignore_ascii_case("VARCHAR2")
9750 || name.eq_ignore_ascii_case("NVARCHAR2") =>
9751 {
9752 Action::OracleVarchar2ToVarchar
9753 }
9754 _ => Action::None,
9755 }
9756 } else if matches!(target, DialectType::Snowflake)
9757 && !matches!(source, DialectType::Snowflake)
9758 {
9759 // When target is Snowflake but source is NOT Snowflake,
9760 // protect FLOAT from being converted to DOUBLE by Snowflake's transform.
9761 // Snowflake treats FLOAT=DOUBLE internally, but non-Snowflake sources
9762 // should keep their FLOAT spelling.
9763 match dt {
9764 DataType::Float { .. } => Action::SnowflakeFloatProtect,
9765 _ => Action::None,
9766 }
9767 } else {
9768 Action::None
9769 }
9770 }
9771 // LOWER patterns from BigQuery TO_HEX conversions:
9772 // - LOWER(LOWER(HEX(x))) from non-BQ targets: flatten
9773 // - LOWER(Function("TO_HEX")) for BQ->BQ: strip LOWER
9774 Expression::Lower(uf) => {
9775 if matches!(source, DialectType::BigQuery) {
9776 match &uf.this {
9777 Expression::Lower(_) => Action::BigQueryToHexLower,
9778 Expression::Function(f)
9779 if f.name == "TO_HEX"
9780 && matches!(target, DialectType::BigQuery) =>
9781 {
9782 // BQ->BQ: LOWER(TO_HEX(x)) -> TO_HEX(x)
9783 Action::BigQueryToHexLower
9784 }
9785 _ => Action::None,
9786 }
9787 } else {
9788 Action::None
9789 }
9790 }
9791 // UPPER patterns from BigQuery TO_HEX conversions:
9792 // - UPPER(LOWER(HEX(x))) from non-BQ targets: extract inner
9793 // - UPPER(Function("TO_HEX")) for BQ->BQ: keep as UPPER(TO_HEX(x))
9794 Expression::Upper(uf) => {
9795 if matches!(source, DialectType::BigQuery) {
9796 match &uf.this {
9797 Expression::Lower(_) => Action::BigQueryToHexUpper,
9798 _ => Action::None,
9799 }
9800 } else {
9801 Action::None
9802 }
9803 }
9804 // BigQuery LAST_DAY(date, unit) -> strip unit for non-BigQuery targets
9805 // Snowflake supports LAST_DAY with unit, so keep it there
9806 Expression::LastDay(ld) => {
9807 if matches!(source, DialectType::BigQuery)
9808 && !matches!(target, DialectType::BigQuery | DialectType::Snowflake)
9809 && ld.unit.is_some()
9810 {
9811 Action::BigQueryLastDayStripUnit
9812 } else {
9813 Action::None
9814 }
9815 }
9816 // BigQuery SafeDivide expressions (already parsed as SafeDivide)
9817 Expression::SafeDivide(_) => {
9818 if matches!(source, DialectType::BigQuery)
9819 && !matches!(target, DialectType::BigQuery)
9820 {
9821 Action::BigQuerySafeDivide
9822 } else {
9823 Action::None
9824 }
9825 }
9826 // BigQuery ANY_VALUE(x HAVING MAX/MIN y) -> ARG_MAX_NULL/ARG_MIN_NULL for DuckDB
9827 // ANY_VALUE(x) -> ANY_VALUE(x) IGNORE NULLS for Spark
9828 Expression::AnyValue(ref agg) => {
9829 if matches!(source, DialectType::BigQuery)
9830 && matches!(target, DialectType::DuckDB)
9831 && agg.having_max.is_some()
9832 {
9833 Action::BigQueryAnyValueHaving
9834 } else if matches!(target, DialectType::Spark | DialectType::Databricks)
9835 && !matches!(source, DialectType::Spark | DialectType::Databricks)
9836 && agg.ignore_nulls.is_none()
9837 {
9838 Action::AnyValueIgnoreNulls
9839 } else {
9840 Action::None
9841 }
9842 }
9843 Expression::Any(ref q) => {
9844 if matches!(source, DialectType::PostgreSQL)
9845 && matches!(
9846 target,
9847 DialectType::Spark | DialectType::Databricks | DialectType::Hive
9848 )
9849 && q.op.is_some()
9850 && !matches!(
9851 q.subquery,
9852 Expression::Select(_) | Expression::Subquery(_)
9853 )
9854 {
9855 Action::AnyToExists
9856 } else {
9857 Action::None
9858 }
9859 }
9860 // BigQuery APPROX_QUANTILES(x, n) -> APPROX_QUANTILE(x, [quantiles]) for DuckDB
9861 // Snowflake RLIKE does full-string match; DuckDB REGEXP_FULL_MATCH also does full-string match
9862 Expression::RegexpLike(_)
9863 if matches!(source, DialectType::Snowflake)
9864 && matches!(target, DialectType::DuckDB) =>
9865 {
9866 Action::RlikeSnowflakeToDuckDB
9867 }
9868 // RegexpLike from non-DuckDB/non-Snowflake sources -> REGEXP_MATCHES for DuckDB target
9869 Expression::RegexpLike(_)
9870 if !matches!(source, DialectType::DuckDB)
9871 && matches!(target, DialectType::DuckDB) =>
9872 {
9873 Action::RegexpLikeToDuckDB
9874 }
9875 // RegexpLike -> Exasol: anchor pattern with .*...*
9876 Expression::RegexpLike(_)
9877 if matches!(target, DialectType::Exasol) =>
9878 {
9879 Action::RegexpLikeExasolAnchor
9880 }
9881 // Safe-division source -> non-safe target: NULLIF wrapping and/or CAST
9882 // Safe-division dialects: MySQL, DuckDB, SingleStore, TiDB, ClickHouse, Doris
9883 Expression::Div(ref op)
9884 if matches!(
9885 source,
9886 DialectType::MySQL
9887 | DialectType::DuckDB
9888 | DialectType::SingleStore
9889 | DialectType::TiDB
9890 | DialectType::ClickHouse
9891 | DialectType::Doris
9892 ) && matches!(
9893 target,
9894 DialectType::PostgreSQL
9895 | DialectType::Redshift
9896 | DialectType::Drill
9897 | DialectType::Trino
9898 | DialectType::Presto
9899 | DialectType::Athena
9900 | DialectType::TSQL
9901 | DialectType::Teradata
9902 | DialectType::SQLite
9903 | DialectType::BigQuery
9904 | DialectType::Snowflake
9905 | DialectType::Databricks
9906 | DialectType::Oracle
9907 | DialectType::Materialize
9908 | DialectType::RisingWave
9909 ) =>
9910 {
9911 // Only wrap if RHS is not already NULLIF
9912 if !matches!(&op.right, Expression::Function(f) if f.name.eq_ignore_ascii_case("NULLIF"))
9913 {
9914 Action::MySQLSafeDivide
9915 } else {
9916 Action::None
9917 }
9918 }
9919 // ALTER TABLE ... RENAME TO <schema>.<table> -> strip schema for most targets
9920 // For TSQL/Fabric, convert to sp_rename instead
9921 Expression::AlterTable(ref at) if !at.actions.is_empty() => {
9922 if let Some(crate::expressions::AlterTableAction::RenameTable(
9923 ref new_tbl,
9924 )) = at.actions.first()
9925 {
9926 if matches!(target, DialectType::TSQL | DialectType::Fabric) {
9927 // TSQL: ALTER TABLE RENAME -> EXEC sp_rename
9928 Action::AlterTableToSpRename
9929 } else if new_tbl.schema.is_some()
9930 && matches!(
9931 target,
9932 DialectType::BigQuery
9933 | DialectType::Doris
9934 | DialectType::StarRocks
9935 | DialectType::DuckDB
9936 | DialectType::PostgreSQL
9937 | DialectType::Redshift
9938 )
9939 {
9940 Action::AlterTableRenameStripSchema
9941 } else {
9942 Action::None
9943 }
9944 } else {
9945 Action::None
9946 }
9947 }
9948 // EPOCH(x) expression -> target-specific epoch conversion
9949 Expression::Epoch(_) if !matches!(target, DialectType::DuckDB) => {
9950 Action::EpochConvert
9951 }
9952 // EPOCH_MS(x) expression -> target-specific epoch ms conversion
9953 Expression::EpochMs(_) if !matches!(target, DialectType::DuckDB) => {
9954 Action::EpochMsConvert
9955 }
9956 // STRING_AGG -> GROUP_CONCAT for MySQL/SQLite
9957 Expression::StringAgg(_) => {
9958 if matches!(
9959 target,
9960 DialectType::MySQL
9961 | DialectType::SingleStore
9962 | DialectType::Doris
9963 | DialectType::StarRocks
9964 | DialectType::SQLite
9965 ) {
9966 Action::StringAggConvert
9967 } else if matches!(target, DialectType::Spark | DialectType::Databricks) {
9968 Action::StringAggConvert
9969 } else {
9970 Action::None
9971 }
9972 }
9973 Expression::CombinedParameterizedAgg(_) => Action::GenericFunctionNormalize,
9974 // GROUP_CONCAT -> STRING_AGG for PostgreSQL/Presto/etc.
9975 // Also handles GROUP_CONCAT normalization for MySQL/SQLite targets
9976 Expression::GroupConcat(_) => Action::GroupConcatConvert,
9977 // CARDINALITY/ARRAY_LENGTH/ARRAY_SIZE -> target-specific array length
9978 // DuckDB CARDINALITY -> keep as CARDINALITY for DuckDB target (used for maps)
9979 Expression::Cardinality(_)
9980 if matches!(source, DialectType::DuckDB)
9981 && matches!(target, DialectType::DuckDB) =>
9982 {
9983 Action::None
9984 }
9985 Expression::Cardinality(_) | Expression::ArrayLength(_) => {
9986 Action::ArrayLengthConvert
9987 }
9988 Expression::ArraySize(_) => {
9989 if matches!(target, DialectType::Drill) {
9990 Action::ArraySizeDrill
9991 } else {
9992 Action::ArrayLengthConvert
9993 }
9994 }
9995 // ARRAY_REMOVE(arr, target) -> LIST_FILTER/arrayFilter/ARRAY subquery
9996 Expression::ArrayRemove(_) => match target {
9997 DialectType::DuckDB | DialectType::ClickHouse | DialectType::BigQuery => {
9998 Action::ArrayRemoveConvert
9999 }
10000 _ => Action::None,
10001 },
10002 // ARRAY_REVERSE(x) -> arrayReverse for ClickHouse
10003 Expression::ArrayReverse(_) => match target {
10004 DialectType::ClickHouse => Action::ArrayReverseConvert,
10005 _ => Action::None,
10006 },
10007 // JSON_KEYS(x) -> JSON_OBJECT_KEYS/OBJECT_KEYS for Spark/Databricks/Snowflake
10008 Expression::JsonKeys(_) => match target {
10009 DialectType::Spark | DialectType::Databricks | DialectType::Snowflake => {
10010 Action::JsonKeysConvert
10011 }
10012 _ => Action::None,
10013 },
10014 // PARSE_JSON(x) -> strip for SQLite/Doris/MySQL/StarRocks
10015 Expression::ParseJson(_) => match target {
10016 DialectType::SQLite
10017 | DialectType::Doris
10018 | DialectType::MySQL
10019 | DialectType::StarRocks => Action::ParseJsonStrip,
10020 _ => Action::None,
10021 },
10022 // WeekOfYear -> WEEKISO for Snowflake (cross-dialect only)
10023 Expression::WeekOfYear(_)
10024 if matches!(target, DialectType::Snowflake)
10025 && !matches!(source, DialectType::Snowflake) =>
10026 {
10027 Action::WeekOfYearToWeekIso
10028 }
10029 // NVL: clear original_name so generator uses dialect-specific function names
10030 Expression::Nvl(f) if f.original_name.is_some() => Action::NvlClearOriginal,
10031 // XOR: expand for dialects that don't support the XOR keyword
10032 Expression::Xor(_) => {
10033 let target_supports_xor = matches!(
10034 target,
10035 DialectType::MySQL
10036 | DialectType::SingleStore
10037 | DialectType::Doris
10038 | DialectType::StarRocks
10039 );
10040 if !target_supports_xor {
10041 Action::XorExpand
10042 } else {
10043 Action::None
10044 }
10045 }
10046 // TSQL #table -> temp table normalization (CREATE TABLE)
10047 Expression::CreateTable(ct)
10048 if matches!(source, DialectType::TSQL | DialectType::Fabric)
10049 && !matches!(target, DialectType::TSQL | DialectType::Fabric)
10050 && ct.name.name.name.starts_with('#') =>
10051 {
10052 Action::TempTableHash
10053 }
10054 // TSQL #table -> strip # from table references in SELECT/etc.
10055 Expression::Table(tr)
10056 if matches!(source, DialectType::TSQL | DialectType::Fabric)
10057 && !matches!(target, DialectType::TSQL | DialectType::Fabric)
10058 && tr.name.name.starts_with('#') =>
10059 {
10060 Action::TempTableHash
10061 }
10062 // TSQL #table -> strip # from DROP TABLE names
10063 Expression::DropTable(ref dt)
10064 if matches!(source, DialectType::TSQL | DialectType::Fabric)
10065 && !matches!(target, DialectType::TSQL | DialectType::Fabric)
10066 && dt.names.iter().any(|n| n.name.name.starts_with('#')) =>
10067 {
10068 Action::TempTableHash
10069 }
10070 // JSON_EXTRACT -> ISNULL(JSON_QUERY, JSON_VALUE) for TSQL
10071 Expression::JsonExtract(_)
10072 if matches!(target, DialectType::TSQL | DialectType::Fabric) =>
10073 {
10074 Action::JsonExtractToTsql
10075 }
10076 // JSON_EXTRACT_SCALAR -> ISNULL(JSON_QUERY, JSON_VALUE) for TSQL
10077 Expression::JsonExtractScalar(_)
10078 if matches!(target, DialectType::TSQL | DialectType::Fabric) =>
10079 {
10080 Action::JsonExtractToTsql
10081 }
10082 // JSON_EXTRACT -> JSONExtractString for ClickHouse
10083 Expression::JsonExtract(_) if matches!(target, DialectType::ClickHouse) => {
10084 Action::JsonExtractToClickHouse
10085 }
10086 // JSON_EXTRACT_SCALAR -> JSONExtractString for ClickHouse
10087 Expression::JsonExtractScalar(_)
10088 if matches!(target, DialectType::ClickHouse) =>
10089 {
10090 Action::JsonExtractToClickHouse
10091 }
10092 // JSON_EXTRACT -> arrow syntax for SQLite/DuckDB
10093 Expression::JsonExtract(ref f)
10094 if !f.arrow_syntax
10095 && matches!(target, DialectType::SQLite | DialectType::DuckDB) =>
10096 {
10097 Action::JsonExtractToArrow
10098 }
10099 // JSON_EXTRACT with JSONPath -> JSON_EXTRACT_PATH for PostgreSQL (non-PG sources only)
10100 Expression::JsonExtract(ref f)
10101 if matches!(target, DialectType::PostgreSQL | DialectType::Redshift)
10102 && !matches!(
10103 source,
10104 DialectType::PostgreSQL
10105 | DialectType::Redshift
10106 | DialectType::Materialize
10107 )
10108 && matches!(&f.path, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(s) if s.starts_with('$'))) =>
10109 {
10110 Action::JsonExtractToGetJsonObject
10111 }
10112 // JSON_EXTRACT -> GET_JSON_OBJECT for Hive/Spark
10113 Expression::JsonExtract(_)
10114 if matches!(
10115 target,
10116 DialectType::Hive | DialectType::Spark | DialectType::Databricks
10117 ) =>
10118 {
10119 Action::JsonExtractToGetJsonObject
10120 }
10121 // JSON_EXTRACT_SCALAR -> target-specific for PostgreSQL, Snowflake, SQLite
10122 // Skip if already in arrow/hash_arrow syntax (same-dialect identity case)
10123 Expression::JsonExtractScalar(ref f)
10124 if !f.arrow_syntax
10125 && !f.hash_arrow_syntax
10126 && matches!(
10127 target,
10128 DialectType::PostgreSQL
10129 | DialectType::Redshift
10130 | DialectType::Snowflake
10131 | DialectType::SQLite
10132 | DialectType::DuckDB
10133 ) =>
10134 {
10135 Action::JsonExtractScalarConvert
10136 }
10137 // JSON_EXTRACT_SCALAR -> GET_JSON_OBJECT for Hive/Spark
10138 Expression::JsonExtractScalar(_)
10139 if matches!(
10140 target,
10141 DialectType::Hive | DialectType::Spark | DialectType::Databricks
10142 ) =>
10143 {
10144 Action::JsonExtractScalarToGetJsonObject
10145 }
10146 // JSON_EXTRACT path normalization for BigQuery, MySQL (bracket/wildcard handling)
10147 Expression::JsonExtract(ref f)
10148 if !f.arrow_syntax
10149 && matches!(target, DialectType::BigQuery | DialectType::MySQL) =>
10150 {
10151 Action::JsonPathNormalize
10152 }
10153 // JsonQuery (parsed JSON_QUERY) -> target-specific
10154 Expression::JsonQuery(_) => Action::JsonQueryValueConvert,
10155 // JsonValue (parsed JSON_VALUE) -> target-specific
10156 Expression::JsonValue(_) => Action::JsonQueryValueConvert,
10157 // AT TIME ZONE -> AT_TIMEZONE for Presto, FROM_UTC_TIMESTAMP for Spark,
10158 // TIMESTAMP(DATETIME(...)) for BigQuery, CONVERT_TIMEZONE for Snowflake
10159 Expression::AtTimeZone(_)
10160 if matches!(
10161 target,
10162 DialectType::Presto
10163 | DialectType::Trino
10164 | DialectType::Athena
10165 | DialectType::Spark
10166 | DialectType::Databricks
10167 | DialectType::BigQuery
10168 | DialectType::Snowflake
10169 ) =>
10170 {
10171 Action::AtTimeZoneConvert
10172 }
10173 // DAY_OF_WEEK -> dialect-specific
10174 Expression::DayOfWeek(_)
10175 if matches!(
10176 target,
10177 DialectType::DuckDB | DialectType::Spark | DialectType::Databricks
10178 ) =>
10179 {
10180 Action::DayOfWeekConvert
10181 }
10182 // CURRENT_USER -> CURRENT_USER() for Snowflake
10183 Expression::CurrentUser(_) if matches!(target, DialectType::Snowflake) => {
10184 Action::CurrentUserParens
10185 }
10186 // ELEMENT_AT(arr, idx) -> arr[idx] for PostgreSQL, arr[SAFE_ORDINAL(idx)] for BigQuery
10187 Expression::ElementAt(_)
10188 if matches!(target, DialectType::PostgreSQL | DialectType::BigQuery) =>
10189 {
10190 Action::ElementAtConvert
10191 }
10192 // ARRAY[...] (ArrayFunc bracket_notation=false) -> convert for target dialect
10193 Expression::ArrayFunc(ref arr)
10194 if !arr.bracket_notation
10195 && matches!(
10196 target,
10197 DialectType::Spark
10198 | DialectType::Databricks
10199 | DialectType::Hive
10200 | DialectType::BigQuery
10201 | DialectType::DuckDB
10202 | DialectType::Snowflake
10203 | DialectType::Presto
10204 | DialectType::Trino
10205 | DialectType::Athena
10206 | DialectType::ClickHouse
10207 | DialectType::StarRocks
10208 ) =>
10209 {
10210 Action::ArraySyntaxConvert
10211 }
10212 // VARIANCE expression -> varSamp for ClickHouse
10213 Expression::Variance(_) if matches!(target, DialectType::ClickHouse) => {
10214 Action::VarianceToClickHouse
10215 }
10216 // STDDEV expression -> stddevSamp for ClickHouse
10217 Expression::Stddev(_) if matches!(target, DialectType::ClickHouse) => {
10218 Action::StddevToClickHouse
10219 }
10220 // ApproxQuantile -> APPROX_PERCENTILE for Snowflake
10221 Expression::ApproxQuantile(_) if matches!(target, DialectType::Snowflake) => {
10222 Action::ApproxQuantileConvert
10223 }
10224 // MonthsBetween -> target-specific
10225 Expression::MonthsBetween(_)
10226 if !matches!(
10227 target,
10228 DialectType::Spark | DialectType::Databricks | DialectType::Hive
10229 ) =>
10230 {
10231 Action::MonthsBetweenConvert
10232 }
10233 // AddMonths -> target-specific DATEADD/DATE_ADD
10234 Expression::AddMonths(_) => Action::AddMonthsConvert,
10235 // MapFromArrays -> target-specific (MAP, OBJECT_CONSTRUCT, MAP_FROM_ARRAYS)
10236 Expression::MapFromArrays(_)
10237 if !matches!(target, DialectType::Spark | DialectType::Databricks) =>
10238 {
10239 Action::MapFromArraysConvert
10240 }
10241 // CURRENT_USER -> CURRENT_USER() for Spark
10242 Expression::CurrentUser(_)
10243 if matches!(target, DialectType::Spark | DialectType::Databricks) =>
10244 {
10245 Action::CurrentUserSparkParens
10246 }
10247 // MONTH/YEAR/DAY('string') from Spark -> cast string to DATE for DuckDB/Presto
10248 Expression::Month(ref f) | Expression::Year(ref f) | Expression::Day(ref f)
10249 if matches!(
10250 source,
10251 DialectType::Spark | DialectType::Databricks | DialectType::Hive
10252 ) && matches!(&f.this, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)))
10253 && matches!(
10254 target,
10255 DialectType::DuckDB
10256 | DialectType::Presto
10257 | DialectType::Trino
10258 | DialectType::Athena
10259 | DialectType::PostgreSQL
10260 | DialectType::Redshift
10261 ) =>
10262 {
10263 Action::SparkDateFuncCast
10264 }
10265 // $parameter -> @parameter for BigQuery
10266 Expression::Parameter(ref p)
10267 if matches!(target, DialectType::BigQuery)
10268 && matches!(source, DialectType::DuckDB)
10269 && (p.style == crate::expressions::ParameterStyle::Dollar
10270 || p.style == crate::expressions::ParameterStyle::DoubleDollar) =>
10271 {
10272 Action::DollarParamConvert
10273 }
10274 // EscapeString literal: normalize literal newlines to \n
10275 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::EscapeString(ref s) if s.contains('\n') || s.contains('\r') || s.contains('\t'))
10276 =>
10277 {
10278 Action::EscapeStringNormalize
10279 }
10280 // straight_join: keep lowercase for DuckDB, quote for MySQL
10281 Expression::Column(ref col)
10282 if col.name.name == "STRAIGHT_JOIN"
10283 && col.table.is_none()
10284 && matches!(source, DialectType::DuckDB)
10285 && matches!(target, DialectType::DuckDB | DialectType::MySQL) =>
10286 {
10287 Action::StraightJoinCase
10288 }
10289 // DATE and TIMESTAMP literal type conversions are now handled in the generator directly
10290 // Snowflake INTERVAL format: INTERVAL '2' HOUR -> INTERVAL '2 HOUR'
10291 Expression::Interval(ref iv)
10292 if matches!(
10293 target,
10294 DialectType::Snowflake
10295 | DialectType::PostgreSQL
10296 | DialectType::Redshift
10297 ) && iv.unit.is_some()
10298 && iv.this.as_ref().map_or(false, |t| matches!(t, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)))) =>
10299 {
10300 Action::SnowflakeIntervalFormat
10301 }
10302 // TABLESAMPLE -> TABLESAMPLE RESERVOIR for DuckDB target
10303 Expression::TableSample(ref ts) if matches!(target, DialectType::DuckDB) => {
10304 if let Some(ref sample) = ts.sample {
10305 if !sample.explicit_method {
10306 Action::TablesampleReservoir
10307 } else {
10308 Action::None
10309 }
10310 } else {
10311 Action::None
10312 }
10313 }
10314 // TABLESAMPLE from non-Snowflake source to Snowflake: strip method and PERCENT
10315 // Handles both Expression::TableSample wrapper and Expression::Table with table_sample
10316 Expression::TableSample(ref ts)
10317 if matches!(target, DialectType::Snowflake)
10318 && !matches!(source, DialectType::Snowflake)
10319 && ts.sample.is_some() =>
10320 {
10321 if let Some(ref sample) = ts.sample {
10322 if !sample.explicit_method {
10323 Action::TablesampleSnowflakeStrip
10324 } else {
10325 Action::None
10326 }
10327 } else {
10328 Action::None
10329 }
10330 }
10331 Expression::Table(ref t)
10332 if matches!(target, DialectType::Snowflake)
10333 && !matches!(source, DialectType::Snowflake)
10334 && t.table_sample.is_some() =>
10335 {
10336 if let Some(ref sample) = t.table_sample {
10337 if !sample.explicit_method {
10338 Action::TablesampleSnowflakeStrip
10339 } else {
10340 Action::None
10341 }
10342 } else {
10343 Action::None
10344 }
10345 }
10346 // ALTER TABLE RENAME -> EXEC sp_rename for TSQL
10347 Expression::AlterTable(ref at)
10348 if matches!(target, DialectType::TSQL | DialectType::Fabric)
10349 && !at.actions.is_empty()
10350 && matches!(
10351 at.actions.first(),
10352 Some(crate::expressions::AlterTableAction::RenameTable(_))
10353 ) =>
10354 {
10355 Action::AlterTableToSpRename
10356 }
10357 // Subscript index: 1-based to 0-based for BigQuery/Hive/Spark
10358 Expression::Subscript(ref sub)
10359 if matches!(
10360 target,
10361 DialectType::BigQuery
10362 | DialectType::Hive
10363 | DialectType::Spark
10364 | DialectType::Databricks
10365 ) && matches!(
10366 source,
10367 DialectType::DuckDB
10368 | DialectType::PostgreSQL
10369 | DialectType::Presto
10370 | DialectType::Trino
10371 | DialectType::Redshift
10372 | DialectType::ClickHouse
10373 ) && matches!(&sub.index, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(ref n) if n.parse::<i64>().unwrap_or(0) > 0)) =>
10374 {
10375 Action::ArrayIndexConvert
10376 }
10377 // ANY_VALUE IGNORE NULLS detection moved to the AnyValue arm above
10378 // MysqlNullsOrdering for Ordered is now handled in the Ordered arm above
10379 // RESPECT NULLS handling for SQLite (strip it, add NULLS LAST to ORDER BY)
10380 // and for MySQL (rewrite ORDER BY with CASE WHEN for null ordering)
10381 Expression::WindowFunction(ref wf) => {
10382 // BigQuery doesn't support NULLS FIRST/LAST in window function ORDER BY
10383 // EXCEPT for ROW_NUMBER which keeps NULLS LAST
10384 let is_row_number = matches!(wf.this, Expression::RowNumber(_));
10385 if matches!(target, DialectType::BigQuery)
10386 && !is_row_number
10387 && !wf.over.order_by.is_empty()
10388 && wf.over.order_by.iter().any(|o| o.nulls_first.is_some())
10389 {
10390 Action::BigQueryNullsOrdering
10391 // DuckDB -> MySQL: Add CASE WHEN for NULLS LAST simulation in window ORDER BY
10392 // But NOT when frame is RANGE/GROUPS, since adding CASE WHEN would break value-based frames
10393 } else {
10394 let source_nulls_last = matches!(source, DialectType::DuckDB);
10395 let has_range_frame = wf.over.frame.as_ref().map_or(false, |f| {
10396 matches!(
10397 f.kind,
10398 crate::expressions::WindowFrameKind::Range
10399 | crate::expressions::WindowFrameKind::Groups
10400 )
10401 });
10402 if source_nulls_last
10403 && matches!(target, DialectType::MySQL)
10404 && !wf.over.order_by.is_empty()
10405 && wf.over.order_by.iter().any(|o| !o.desc)
10406 && !has_range_frame
10407 {
10408 Action::MysqlNullsLastRewrite
10409 } else {
10410 // Check for Snowflake window frame handling for FIRST_VALUE/LAST_VALUE/NTH_VALUE
10411 let is_ranking_window_func = matches!(
10412 &wf.this,
10413 Expression::FirstValue(_)
10414 | Expression::LastValue(_)
10415 | Expression::NthValue(_)
10416 );
10417 let has_full_unbounded_frame = wf.over.frame.as_ref().map_or(false, |f| {
10418 matches!(f.kind, crate::expressions::WindowFrameKind::Rows)
10419 && matches!(f.start, crate::expressions::WindowFrameBound::UnboundedPreceding)
10420 && matches!(f.end, Some(crate::expressions::WindowFrameBound::UnboundedFollowing))
10421 && f.exclude.is_none()
10422 });
10423 if is_ranking_window_func && matches!(source, DialectType::Snowflake) {
10424 if has_full_unbounded_frame && matches!(target, DialectType::Snowflake) {
10425 // Strip the default frame for Snowflake target
10426 Action::SnowflakeWindowFrameStrip
10427 } else if !has_full_unbounded_frame && wf.over.frame.is_none() && !matches!(target, DialectType::Snowflake) {
10428 // Add default frame for non-Snowflake target
10429 Action::SnowflakeWindowFrameAdd
10430 } else {
10431 match &wf.this {
10432 Expression::FirstValue(ref vf)
10433 | Expression::LastValue(ref vf)
10434 if vf.ignore_nulls == Some(false) =>
10435 {
10436 match target {
10437 DialectType::SQLite => Action::RespectNullsConvert,
10438 _ => Action::None,
10439 }
10440 }
10441 _ => Action::None,
10442 }
10443 }
10444 } else {
10445 match &wf.this {
10446 Expression::FirstValue(ref vf)
10447 | Expression::LastValue(ref vf)
10448 if vf.ignore_nulls == Some(false) =>
10449 {
10450 // RESPECT NULLS
10451 match target {
10452 DialectType::SQLite | DialectType::PostgreSQL => {
10453 Action::RespectNullsConvert
10454 }
10455 _ => Action::None,
10456 }
10457 }
10458 _ => Action::None,
10459 }
10460 }
10461 }
10462 }
10463 }
10464 // CREATE TABLE a LIKE b -> dialect-specific transformations
10465 Expression::CreateTable(ref ct)
10466 if ct.columns.is_empty()
10467 && ct.constraints.iter().any(|c| {
10468 matches!(c, crate::expressions::TableConstraint::Like { .. })
10469 })
10470 && matches!(
10471 target,
10472 DialectType::DuckDB | DialectType::SQLite | DialectType::Drill
10473 ) =>
10474 {
10475 Action::CreateTableLikeToCtas
10476 }
10477 Expression::CreateTable(ref ct)
10478 if ct.columns.is_empty()
10479 && ct.constraints.iter().any(|c| {
10480 matches!(c, crate::expressions::TableConstraint::Like { .. })
10481 })
10482 && matches!(target, DialectType::TSQL | DialectType::Fabric) =>
10483 {
10484 Action::CreateTableLikeToSelectInto
10485 }
10486 Expression::CreateTable(ref ct)
10487 if ct.columns.is_empty()
10488 && ct.constraints.iter().any(|c| {
10489 matches!(c, crate::expressions::TableConstraint::Like { .. })
10490 })
10491 && matches!(target, DialectType::ClickHouse) =>
10492 {
10493 Action::CreateTableLikeToAs
10494 }
10495 // CREATE TABLE: strip COMMENT column constraint, USING, PARTITIONED BY for DuckDB
10496 Expression::CreateTable(ref ct)
10497 if matches!(target, DialectType::DuckDB)
10498 && matches!(
10499 source,
10500 DialectType::DuckDB
10501 | DialectType::Spark
10502 | DialectType::Databricks
10503 | DialectType::Hive
10504 ) =>
10505 {
10506 let has_comment = ct.columns.iter().any(|c| {
10507 c.comment.is_some()
10508 || c.constraints.iter().any(|con| {
10509 matches!(con, crate::expressions::ColumnConstraint::Comment(_))
10510 })
10511 });
10512 let has_props = !ct.properties.is_empty();
10513 if has_comment || has_props {
10514 Action::CreateTableStripComment
10515 } else {
10516 Action::None
10517 }
10518 }
10519 // Array conversion: Expression::Array -> Expression::ArrayFunc for PostgreSQL
10520 Expression::Array(_)
10521 if matches!(target, DialectType::PostgreSQL | DialectType::Redshift) =>
10522 {
10523 Action::ArrayConcatBracketConvert
10524 }
10525 // ArrayFunc (bracket notation) -> Function("ARRAY") for Redshift (from BigQuery source)
10526 Expression::ArrayFunc(ref arr)
10527 if arr.bracket_notation
10528 && matches!(source, DialectType::BigQuery)
10529 && matches!(target, DialectType::Redshift) =>
10530 {
10531 Action::ArrayConcatBracketConvert
10532 }
10533 // BIT_OR/BIT_AND/BIT_XOR: float/decimal arg cast for DuckDB, or rename for Snowflake
10534 Expression::BitwiseOrAgg(ref f)
10535 | Expression::BitwiseAndAgg(ref f)
10536 | Expression::BitwiseXorAgg(ref f) => {
10537 if matches!(target, DialectType::DuckDB) {
10538 // Check if the arg is CAST(val AS FLOAT/DOUBLE/DECIMAL/REAL)
10539 if let Expression::Cast(ref c) = f.this {
10540 match &c.to {
10541 DataType::Float { .. }
10542 | DataType::Double { .. }
10543 | DataType::Decimal { .. } => Action::BitAggFloatCast,
10544 DataType::Custom { ref name }
10545 if name.eq_ignore_ascii_case("REAL") =>
10546 {
10547 Action::BitAggFloatCast
10548 }
10549 _ => Action::None,
10550 }
10551 } else {
10552 Action::None
10553 }
10554 } else if matches!(target, DialectType::Snowflake) {
10555 Action::BitAggSnowflakeRename
10556 } else {
10557 Action::None
10558 }
10559 }
10560 // FILTER -> IFF for Snowflake (aggregate functions with FILTER clause)
10561 Expression::Filter(ref _f) if matches!(target, DialectType::Snowflake) => {
10562 Action::FilterToIff
10563 }
10564 // AggFunc.filter -> IFF wrapping for Snowflake (e.g., AVG(x) FILTER(WHERE cond))
10565 Expression::Avg(ref f)
10566 | Expression::Sum(ref f)
10567 | Expression::Min(ref f)
10568 | Expression::Max(ref f)
10569 | Expression::CountIf(ref f)
10570 | Expression::Stddev(ref f)
10571 | Expression::StddevPop(ref f)
10572 | Expression::StddevSamp(ref f)
10573 | Expression::Variance(ref f)
10574 | Expression::VarPop(ref f)
10575 | Expression::VarSamp(ref f)
10576 | Expression::Median(ref f)
10577 | Expression::Mode(ref f)
10578 | Expression::First(ref f)
10579 | Expression::Last(ref f)
10580 | Expression::ApproxDistinct(ref f)
10581 if f.filter.is_some() && matches!(target, DialectType::Snowflake) =>
10582 {
10583 Action::AggFilterToIff
10584 }
10585 Expression::Count(ref c)
10586 if c.filter.is_some() && matches!(target, DialectType::Snowflake) =>
10587 {
10588 Action::AggFilterToIff
10589 }
10590 // COUNT(DISTINCT a, b) -> COUNT(DISTINCT CASE WHEN ... END) for dialects that don't support multi-arg DISTINCT
10591 Expression::Count(ref c)
10592 if c.distinct
10593 && matches!(&c.this, Some(Expression::Tuple(_)))
10594 && matches!(
10595 target,
10596 DialectType::Presto
10597 | DialectType::Trino
10598 | DialectType::DuckDB
10599 | DialectType::PostgreSQL
10600 ) =>
10601 {
10602 Action::CountDistinctMultiArg
10603 }
10604 // JSON arrow -> GET_PATH/PARSE_JSON for Snowflake
10605 Expression::JsonExtract(_) if matches!(target, DialectType::Snowflake) => {
10606 Action::JsonToGetPath
10607 }
10608 // DuckDB struct/dict -> BigQuery STRUCT / Presto ROW
10609 Expression::Struct(_)
10610 if matches!(
10611 target,
10612 DialectType::BigQuery | DialectType::Presto | DialectType::Trino
10613 ) && matches!(source, DialectType::DuckDB) =>
10614 {
10615 Action::StructToRow
10616 }
10617 // DuckDB curly-brace dict {'key': value} -> BigQuery STRUCT / Presto ROW
10618 Expression::MapFunc(ref m)
10619 if m.curly_brace_syntax
10620 && matches!(
10621 target,
10622 DialectType::BigQuery | DialectType::Presto | DialectType::Trino
10623 )
10624 && matches!(source, DialectType::DuckDB) =>
10625 {
10626 Action::StructToRow
10627 }
10628 // APPROX_COUNT_DISTINCT -> APPROX_DISTINCT for Presto/Trino
10629 Expression::ApproxCountDistinct(_)
10630 if matches!(
10631 target,
10632 DialectType::Presto | DialectType::Trino | DialectType::Athena
10633 ) =>
10634 {
10635 Action::ApproxCountDistinctToApproxDistinct
10636 }
10637 // ARRAY_CONTAINS(arr, val) -> CONTAINS(arr, val) for Presto, ARRAY_CONTAINS(CAST(val AS VARIANT), arr) for Snowflake
10638 Expression::ArrayContains(_)
10639 if matches!(
10640 target,
10641 DialectType::Presto | DialectType::Trino | DialectType::Snowflake
10642 ) && !(matches!(source, DialectType::Snowflake) && matches!(target, DialectType::Snowflake)) =>
10643 {
10644 Action::ArrayContainsConvert
10645 }
10646 // ARRAY_CONTAINS -> DuckDB NULL-aware CASE (from Snowflake source with check_null semantics)
10647 Expression::ArrayContains(_)
10648 if matches!(target, DialectType::DuckDB)
10649 && matches!(source, DialectType::Snowflake) =>
10650 {
10651 Action::ArrayContainsDuckDBConvert
10652 }
10653 // ARRAY_EXCEPT -> target-specific conversion
10654 Expression::ArrayExcept(_)
10655 if matches!(
10656 target,
10657 DialectType::DuckDB | DialectType::Snowflake | DialectType::Presto | DialectType::Trino | DialectType::Athena
10658 ) =>
10659 {
10660 Action::ArrayExceptConvert
10661 }
10662 // ARRAY_POSITION -> swap args for Snowflake target (only when source is not Snowflake)
10663 Expression::ArrayPosition(_)
10664 if matches!(target, DialectType::Snowflake)
10665 && !matches!(source, DialectType::Snowflake) =>
10666 {
10667 Action::ArrayPositionSnowflakeSwap
10668 }
10669 // ARRAY_POSITION(val, arr) -> ARRAY_POSITION(arr, val) - 1 for DuckDB from Snowflake source
10670 Expression::ArrayPosition(_)
10671 if matches!(target, DialectType::DuckDB)
10672 && matches!(source, DialectType::Snowflake) =>
10673 {
10674 Action::SnowflakeArrayPositionToDuckDB
10675 }
10676 // ARRAY_DISTINCT -> arrayDistinct for ClickHouse
10677 Expression::ArrayDistinct(_)
10678 if matches!(target, DialectType::ClickHouse) =>
10679 {
10680 Action::ArrayDistinctClickHouse
10681 }
10682 // ARRAY_DISTINCT -> DuckDB LIST_DISTINCT with NULL-aware CASE
10683 Expression::ArrayDistinct(_)
10684 if matches!(target, DialectType::DuckDB)
10685 && matches!(source, DialectType::Snowflake) =>
10686 {
10687 Action::ArrayDistinctConvert
10688 }
10689 // StrPosition with position -> complex expansion for Presto/DuckDB
10690 // STRPOS doesn't support a position arg in these dialects
10691 Expression::StrPosition(ref sp)
10692 if sp.position.is_some()
10693 && matches!(
10694 target,
10695 DialectType::Presto
10696 | DialectType::Trino
10697 | DialectType::Athena
10698 | DialectType::DuckDB
10699 ) =>
10700 {
10701 Action::StrPositionExpand
10702 }
10703 // FIRST(col) IGNORE NULLS -> ANY_VALUE(col) for DuckDB
10704 Expression::First(ref f)
10705 if f.ignore_nulls == Some(true)
10706 && matches!(target, DialectType::DuckDB) =>
10707 {
10708 Action::FirstToAnyValue
10709 }
10710 // BEGIN -> START TRANSACTION for Presto/Trino
10711 Expression::Command(ref cmd)
10712 if cmd.this.eq_ignore_ascii_case("BEGIN")
10713 && matches!(
10714 target,
10715 DialectType::Presto | DialectType::Trino | DialectType::Athena
10716 ) =>
10717 {
10718 // Handled inline below
10719 Action::None // We'll handle it directly
10720 }
10721 // Note: PostgreSQL ^ is now parsed as Power directly (not BitwiseXor).
10722 // PostgreSQL # is parsed as BitwiseXor (which is correct).
10723 // a || b (Concat operator) -> CONCAT function for Presto/Trino
10724 Expression::Concat(ref _op)
10725 if matches!(source, DialectType::PostgreSQL | DialectType::Redshift)
10726 && matches!(target, DialectType::Presto | DialectType::Trino) =>
10727 {
10728 Action::PipeConcatToConcat
10729 }
10730 _ => Action::None,
10731 }
10732 };
10733
10734 match action {
10735 Action::None => {
10736 // Handle inline transforms that don't need a dedicated action
10737 if matches!(target, DialectType::TSQL | DialectType::Fabric) {
10738 if let Some(rewritten) = Self::rewrite_tsql_interval_arithmetic(&e) {
10739 return Ok(rewritten);
10740 }
10741 }
10742
10743 // BETWEEN SYMMETRIC/ASYMMETRIC expansion for non-PostgreSQL/Dremio targets
10744 if let Expression::Between(ref b) = e {
10745 if let Some(sym) = b.symmetric {
10746 let keeps_symmetric =
10747 matches!(target, DialectType::PostgreSQL | DialectType::Dremio);
10748 if !keeps_symmetric {
10749 if sym {
10750 // SYMMETRIC: expand to (x BETWEEN a AND b OR x BETWEEN b AND a)
10751 let b = if let Expression::Between(b) = e {
10752 *b
10753 } else {
10754 unreachable!()
10755 };
10756 let between1 = Expression::Between(Box::new(
10757 crate::expressions::Between {
10758 this: b.this.clone(),
10759 low: b.low.clone(),
10760 high: b.high.clone(),
10761 not: b.not,
10762 symmetric: None,
10763 },
10764 ));
10765 let between2 = Expression::Between(Box::new(
10766 crate::expressions::Between {
10767 this: b.this,
10768 low: b.high,
10769 high: b.low,
10770 not: b.not,
10771 symmetric: None,
10772 },
10773 ));
10774 return Ok(Expression::Paren(Box::new(
10775 crate::expressions::Paren {
10776 this: Expression::Or(Box::new(
10777 crate::expressions::BinaryOp::new(
10778 between1, between2,
10779 ),
10780 )),
10781 trailing_comments: vec![],
10782 },
10783 )));
10784 } else {
10785 // ASYMMETRIC: strip qualifier, keep as regular BETWEEN
10786 let b = if let Expression::Between(b) = e {
10787 *b
10788 } else {
10789 unreachable!()
10790 };
10791 return Ok(Expression::Between(Box::new(
10792 crate::expressions::Between {
10793 this: b.this,
10794 low: b.low,
10795 high: b.high,
10796 not: b.not,
10797 symmetric: None,
10798 },
10799 )));
10800 }
10801 }
10802 }
10803 }
10804
10805 // ILIKE -> LOWER(x) LIKE LOWER(y) for StarRocks/Doris
10806 if let Expression::ILike(ref _like) = e {
10807 if matches!(target, DialectType::StarRocks | DialectType::Doris) {
10808 let like = if let Expression::ILike(l) = e {
10809 *l
10810 } else {
10811 unreachable!()
10812 };
10813 let lower_left = Expression::Function(Box::new(Function::new(
10814 "LOWER".to_string(),
10815 vec![like.left],
10816 )));
10817 let lower_right = Expression::Function(Box::new(Function::new(
10818 "LOWER".to_string(),
10819 vec![like.right],
10820 )));
10821 return Ok(Expression::Like(Box::new(crate::expressions::LikeOp {
10822 left: lower_left,
10823 right: lower_right,
10824 escape: like.escape,
10825 quantifier: like.quantifier,
10826 inferred_type: None,
10827 })));
10828 }
10829 }
10830
10831 // Oracle DBMS_RANDOM.VALUE() -> RANDOM() for PostgreSQL, RAND() for others
10832 if let Expression::MethodCall(ref mc) = e {
10833 if matches!(source, DialectType::Oracle)
10834 && mc.method.name.eq_ignore_ascii_case("VALUE")
10835 && mc.args.is_empty()
10836 {
10837 let is_dbms_random = match &mc.this {
10838 Expression::Identifier(id) => {
10839 id.name.eq_ignore_ascii_case("DBMS_RANDOM")
10840 }
10841 Expression::Column(col) => {
10842 col.table.is_none()
10843 && col.name.name.eq_ignore_ascii_case("DBMS_RANDOM")
10844 }
10845 _ => false,
10846 };
10847 if is_dbms_random {
10848 let func_name = match target {
10849 DialectType::PostgreSQL
10850 | DialectType::Redshift
10851 | DialectType::DuckDB
10852 | DialectType::SQLite => "RANDOM",
10853 DialectType::Oracle => "DBMS_RANDOM.VALUE",
10854 _ => "RAND",
10855 };
10856 return Ok(Expression::Function(Box::new(Function::new(
10857 func_name.to_string(),
10858 vec![],
10859 ))));
10860 }
10861 }
10862 }
10863 // TRIM without explicit position -> add BOTH for ClickHouse
10864 if let Expression::Trim(ref trim) = e {
10865 if matches!(target, DialectType::ClickHouse)
10866 && trim.sql_standard_syntax
10867 && trim.characters.is_some()
10868 && !trim.position_explicit
10869 {
10870 let mut new_trim = (**trim).clone();
10871 new_trim.position_explicit = true;
10872 return Ok(Expression::Trim(Box::new(new_trim)));
10873 }
10874 }
10875 // BEGIN -> START TRANSACTION for Presto/Trino
10876 if let Expression::Transaction(ref txn) = e {
10877 if matches!(
10878 target,
10879 DialectType::Presto | DialectType::Trino | DialectType::Athena
10880 ) {
10881 // Convert BEGIN to START TRANSACTION by setting mark to "START"
10882 let mut txn = txn.clone();
10883 txn.mark = Some(Box::new(Expression::Identifier(Identifier::new(
10884 "START".to_string(),
10885 ))));
10886 return Ok(Expression::Transaction(Box::new(*txn)));
10887 }
10888 }
10889 // IS TRUE/FALSE -> simplified forms for Presto/Trino
10890 if matches!(
10891 target,
10892 DialectType::Presto | DialectType::Trino | DialectType::Athena
10893 ) {
10894 match &e {
10895 Expression::IsTrue(itf) if !itf.not => {
10896 // x IS TRUE -> x
10897 return Ok(itf.this.clone());
10898 }
10899 Expression::IsTrue(itf) if itf.not => {
10900 // x IS NOT TRUE -> NOT x
10901 return Ok(Expression::Not(Box::new(
10902 crate::expressions::UnaryOp {
10903 this: itf.this.clone(),
10904 inferred_type: None,
10905 },
10906 )));
10907 }
10908 Expression::IsFalse(itf) if !itf.not => {
10909 // x IS FALSE -> NOT x
10910 return Ok(Expression::Not(Box::new(
10911 crate::expressions::UnaryOp {
10912 this: itf.this.clone(),
10913 inferred_type: None,
10914 },
10915 )));
10916 }
10917 Expression::IsFalse(itf) if itf.not => {
10918 // x IS NOT FALSE -> NOT NOT x
10919 let not_x =
10920 Expression::Not(Box::new(crate::expressions::UnaryOp {
10921 this: itf.this.clone(),
10922 inferred_type: None,
10923 }));
10924 return Ok(Expression::Not(Box::new(
10925 crate::expressions::UnaryOp {
10926 this: not_x,
10927 inferred_type: None,
10928 },
10929 )));
10930 }
10931 _ => {}
10932 }
10933 }
10934 // x IS NOT FALSE -> NOT x IS FALSE for Redshift
10935 if matches!(target, DialectType::Redshift) {
10936 if let Expression::IsFalse(ref itf) = e {
10937 if itf.not {
10938 return Ok(Expression::Not(Box::new(
10939 crate::expressions::UnaryOp {
10940 this: Expression::IsFalse(Box::new(
10941 crate::expressions::IsTrueFalse {
10942 this: itf.this.clone(),
10943 not: false,
10944 },
10945 )),
10946 inferred_type: None,
10947 },
10948 )));
10949 }
10950 }
10951 }
10952 // REGEXP_REPLACE: add 'g' flag when source defaults to global replacement
10953 // Snowflake default is global, PostgreSQL/DuckDB default is first-match-only
10954 if let Expression::Function(ref f) = e {
10955 if f.name.eq_ignore_ascii_case("REGEXP_REPLACE")
10956 && matches!(source, DialectType::Snowflake)
10957 && matches!(target, DialectType::PostgreSQL | DialectType::DuckDB)
10958 {
10959 if f.args.len() == 3 {
10960 let mut args = f.args.clone();
10961 args.push(Expression::string("g"));
10962 return Ok(Expression::Function(Box::new(Function::new(
10963 "REGEXP_REPLACE".to_string(),
10964 args,
10965 ))));
10966 } else if f.args.len() == 4 {
10967 // 4th arg might be position, add 'g' as 5th
10968 let mut args = f.args.clone();
10969 args.push(Expression::string("g"));
10970 return Ok(Expression::Function(Box::new(Function::new(
10971 "REGEXP_REPLACE".to_string(),
10972 args,
10973 ))));
10974 }
10975 }
10976 }
10977 Ok(e)
10978 }
10979
10980 Action::GreatestLeastNull => {
10981 let f = if let Expression::Function(f) = e {
10982 *f
10983 } else {
10984 unreachable!("action only triggered for Function expressions")
10985 };
10986 let mut null_checks: Vec<Expression> = f
10987 .args
10988 .iter()
10989 .map(|a| {
10990 Expression::IsNull(Box::new(IsNull {
10991 this: a.clone(),
10992 not: false,
10993 postfix_form: false,
10994 }))
10995 })
10996 .collect();
10997 let condition = if null_checks.len() == 1 {
10998 null_checks.remove(0)
10999 } else {
11000 let first = null_checks.remove(0);
11001 null_checks.into_iter().fold(first, |acc, check| {
11002 Expression::Or(Box::new(BinaryOp::new(acc, check)))
11003 })
11004 };
11005 Ok(Expression::Case(Box::new(Case {
11006 operand: None,
11007 whens: vec![(condition, Expression::Null(Null))],
11008 else_: Some(Expression::Function(Box::new(Function::new(
11009 f.name, f.args,
11010 )))),
11011 comments: Vec::new(),
11012 inferred_type: None,
11013 })))
11014 }
11015
11016 Action::ArrayGenerateRange => {
11017 let f = if let Expression::Function(f) = e {
11018 *f
11019 } else {
11020 unreachable!("action only triggered for Function expressions")
11021 };
11022 let start = f.args[0].clone();
11023 let end = f.args[1].clone();
11024 let step = f.args.get(2).cloned();
11025
11026 // Helper: compute end - 1 for converting exclusive→inclusive end.
11027 // When end is a literal number, simplify to a computed literal.
11028 fn exclusive_to_inclusive_end(end: &Expression) -> Expression {
11029 // Try to simplify literal numbers
11030 match end {
11031 Expression::Literal(lit)
11032 if matches!(lit.as_ref(), Literal::Number(_)) =>
11033 {
11034 let Literal::Number(n) = lit.as_ref() else {
11035 unreachable!()
11036 };
11037 if let Ok(val) = n.parse::<i64>() {
11038 return Expression::number(val - 1);
11039 }
11040 }
11041 Expression::Neg(u) => {
11042 if let Expression::Literal(lit) = &u.this {
11043 if let Literal::Number(n) = lit.as_ref() {
11044 if let Ok(val) = n.parse::<i64>() {
11045 return Expression::number(-val - 1);
11046 }
11047 }
11048 }
11049 }
11050 _ => {}
11051 }
11052 // Non-literal: produce end - 1 expression
11053 Expression::Sub(Box::new(BinaryOp::new(end.clone(), Expression::number(1))))
11054 }
11055
11056 match target {
11057 // Snowflake ARRAY_GENERATE_RANGE and DuckDB RANGE both use exclusive end,
11058 // so no adjustment needed — just rename the function.
11059 DialectType::Snowflake => {
11060 let mut args = vec![start, end];
11061 if let Some(s) = step {
11062 args.push(s);
11063 }
11064 Ok(Expression::Function(Box::new(Function::new(
11065 "ARRAY_GENERATE_RANGE".to_string(),
11066 args,
11067 ))))
11068 }
11069 DialectType::DuckDB => {
11070 let mut args = vec![start, end];
11071 if let Some(s) = step {
11072 args.push(s);
11073 }
11074 Ok(Expression::Function(Box::new(Function::new(
11075 "RANGE".to_string(),
11076 args,
11077 ))))
11078 }
11079 // These dialects use inclusive end, so convert exclusive→inclusive.
11080 // Presto/Trino: simplify literal numbers (3 → 2).
11081 DialectType::Presto | DialectType::Trino => {
11082 let end_inclusive = exclusive_to_inclusive_end(&end);
11083 let mut args = vec![start, end_inclusive];
11084 if let Some(s) = step {
11085 args.push(s);
11086 }
11087 Ok(Expression::Function(Box::new(Function::new(
11088 "SEQUENCE".to_string(),
11089 args,
11090 ))))
11091 }
11092 // PostgreSQL, Redshift, BigQuery: keep as end - 1 expression form.
11093 DialectType::PostgreSQL | DialectType::Redshift => {
11094 let end_minus_1 = Expression::Sub(Box::new(BinaryOp::new(
11095 end.clone(),
11096 Expression::number(1),
11097 )));
11098 let mut args = vec![start, end_minus_1];
11099 if let Some(s) = step {
11100 args.push(s);
11101 }
11102 Ok(Expression::Function(Box::new(Function::new(
11103 "GENERATE_SERIES".to_string(),
11104 args,
11105 ))))
11106 }
11107 DialectType::BigQuery => {
11108 let end_minus_1 = Expression::Sub(Box::new(BinaryOp::new(
11109 end.clone(),
11110 Expression::number(1),
11111 )));
11112 let mut args = vec![start, end_minus_1];
11113 if let Some(s) = step {
11114 args.push(s);
11115 }
11116 Ok(Expression::Function(Box::new(Function::new(
11117 "GENERATE_ARRAY".to_string(),
11118 args,
11119 ))))
11120 }
11121 _ => Ok(Expression::Function(Box::new(Function::new(
11122 f.name, f.args,
11123 )))),
11124 }
11125 }
11126
11127 Action::Div0TypedDivision => {
11128 let if_func = if let Expression::IfFunc(f) = e {
11129 *f
11130 } else {
11131 unreachable!("action only triggered for IfFunc expressions")
11132 };
11133 if let Some(Expression::Div(div)) = if_func.false_value {
11134 let cast_type = if matches!(target, DialectType::SQLite) {
11135 DataType::Float {
11136 precision: None,
11137 scale: None,
11138 real_spelling: true,
11139 }
11140 } else {
11141 DataType::Double {
11142 precision: None,
11143 scale: None,
11144 }
11145 };
11146 let casted_left = Expression::Cast(Box::new(Cast {
11147 this: div.left,
11148 to: cast_type,
11149 trailing_comments: vec![],
11150 double_colon_syntax: false,
11151 format: None,
11152 default: None,
11153 inferred_type: None,
11154 }));
11155 Ok(Expression::IfFunc(Box::new(crate::expressions::IfFunc {
11156 condition: if_func.condition,
11157 true_value: if_func.true_value,
11158 false_value: Some(Expression::Div(Box::new(BinaryOp::new(
11159 casted_left,
11160 div.right,
11161 )))),
11162 original_name: if_func.original_name,
11163 inferred_type: None,
11164 })))
11165 } else {
11166 // Not actually a Div, reconstruct
11167 Ok(Expression::IfFunc(Box::new(if_func)))
11168 }
11169 }
11170
11171 Action::ArrayAggCollectList => {
11172 let agg = if let Expression::ArrayAgg(a) = e {
11173 *a
11174 } else {
11175 unreachable!("action only triggered for ArrayAgg expressions")
11176 };
11177 Ok(Expression::ArrayAgg(Box::new(AggFunc {
11178 name: Some("COLLECT_LIST".to_string()),
11179 ..agg
11180 })))
11181 }
11182
11183 Action::ArrayAggToGroupConcat => {
11184 let agg = if let Expression::ArrayAgg(a) = e {
11185 *a
11186 } else {
11187 unreachable!("action only triggered for ArrayAgg expressions")
11188 };
11189 Ok(Expression::ArrayAgg(Box::new(AggFunc {
11190 name: Some("GROUP_CONCAT".to_string()),
11191 ..agg
11192 })))
11193 }
11194
11195 Action::ArrayAggWithinGroupFilter => {
11196 let wg = if let Expression::WithinGroup(w) = e {
11197 *w
11198 } else {
11199 unreachable!("action only triggered for WithinGroup expressions")
11200 };
11201 if let Expression::ArrayAgg(inner_agg) = wg.this {
11202 let col = inner_agg.this.clone();
11203 let filter = Expression::IsNull(Box::new(IsNull {
11204 this: col,
11205 not: true,
11206 postfix_form: false,
11207 }));
11208 // For DuckDB, add explicit NULLS FIRST for DESC ordering
11209 let order_by = if matches!(target, DialectType::DuckDB) {
11210 wg.order_by
11211 .into_iter()
11212 .map(|mut o| {
11213 if o.desc && o.nulls_first.is_none() {
11214 o.nulls_first = Some(true);
11215 }
11216 o
11217 })
11218 .collect()
11219 } else {
11220 wg.order_by
11221 };
11222 Ok(Expression::ArrayAgg(Box::new(AggFunc {
11223 this: inner_agg.this,
11224 distinct: inner_agg.distinct,
11225 filter: Some(filter),
11226 order_by,
11227 name: inner_agg.name,
11228 ignore_nulls: inner_agg.ignore_nulls,
11229 having_max: inner_agg.having_max,
11230 limit: inner_agg.limit,
11231 inferred_type: None,
11232 })))
11233 } else {
11234 Ok(Expression::WithinGroup(Box::new(wg)))
11235 }
11236 }
11237
11238 Action::ArrayAggFilter => {
11239 let agg = if let Expression::ArrayAgg(a) = e {
11240 *a
11241 } else {
11242 unreachable!("action only triggered for ArrayAgg expressions")
11243 };
11244 let col = agg.this.clone();
11245 let filter = Expression::IsNull(Box::new(IsNull {
11246 this: col,
11247 not: true,
11248 postfix_form: false,
11249 }));
11250 Ok(Expression::ArrayAgg(Box::new(AggFunc {
11251 filter: Some(filter),
11252 ..agg
11253 })))
11254 }
11255
11256 Action::ArrayAggNullFilter => {
11257 // ARRAY_AGG(x) FILTER(WHERE cond) -> ARRAY_AGG(x) FILTER(WHERE cond AND NOT x IS NULL)
11258 // For source dialects that exclude NULLs (Spark/Hive) targeting DuckDB which includes them
11259 let agg = if let Expression::ArrayAgg(a) = e {
11260 *a
11261 } else {
11262 unreachable!("action only triggered for ArrayAgg expressions")
11263 };
11264 let col = agg.this.clone();
11265 let not_null = Expression::IsNull(Box::new(IsNull {
11266 this: col,
11267 not: true,
11268 postfix_form: true, // Use "NOT x IS NULL" form (prefix NOT)
11269 }));
11270 let new_filter = if let Some(existing_filter) = agg.filter {
11271 // AND the NOT IS NULL with existing filter
11272 Expression::And(Box::new(crate::expressions::BinaryOp::new(
11273 existing_filter,
11274 not_null,
11275 )))
11276 } else {
11277 not_null
11278 };
11279 Ok(Expression::ArrayAgg(Box::new(AggFunc {
11280 filter: Some(new_filter),
11281 ..agg
11282 })))
11283 }
11284
11285 Action::BigQueryArraySelectAsStructToSnowflake => {
11286 // ARRAY(SELECT AS STRUCT x1 AS x1, x2 AS x2 FROM t)
11287 // -> (SELECT ARRAY_AGG(OBJECT_CONSTRUCT('x1', x1, 'x2', x2)) FROM t)
11288 if let Expression::Function(mut f) = e {
11289 let is_match = f.args.len() == 1
11290 && matches!(&f.args[0], Expression::Select(s) if s.kind.as_deref() == Some("STRUCT"));
11291 if is_match {
11292 let inner_select = match f.args.remove(0) {
11293 Expression::Select(s) => *s,
11294 _ => unreachable!(
11295 "argument already verified to be a Select expression"
11296 ),
11297 };
11298 // Build OBJECT_CONSTRUCT args from SELECT expressions
11299 let mut oc_args = Vec::new();
11300 for expr in &inner_select.expressions {
11301 match expr {
11302 Expression::Alias(a) => {
11303 let key = Expression::Literal(Box::new(Literal::String(
11304 a.alias.name.clone(),
11305 )));
11306 let value = a.this.clone();
11307 oc_args.push(key);
11308 oc_args.push(value);
11309 }
11310 Expression::Column(c) => {
11311 let key = Expression::Literal(Box::new(Literal::String(
11312 c.name.name.clone(),
11313 )));
11314 oc_args.push(key);
11315 oc_args.push(expr.clone());
11316 }
11317 _ => {
11318 oc_args.push(expr.clone());
11319 }
11320 }
11321 }
11322 let object_construct = Expression::Function(Box::new(Function::new(
11323 "OBJECT_CONSTRUCT".to_string(),
11324 oc_args,
11325 )));
11326 let array_agg = Expression::Function(Box::new(Function::new(
11327 "ARRAY_AGG".to_string(),
11328 vec![object_construct],
11329 )));
11330 let mut new_select = crate::expressions::Select::new();
11331 new_select.expressions = vec![array_agg];
11332 new_select.from = inner_select.from.clone();
11333 new_select.where_clause = inner_select.where_clause.clone();
11334 new_select.group_by = inner_select.group_by.clone();
11335 new_select.having = inner_select.having.clone();
11336 new_select.joins = inner_select.joins.clone();
11337 Ok(Expression::Subquery(Box::new(
11338 crate::expressions::Subquery {
11339 this: Expression::Select(Box::new(new_select)),
11340 alias: None,
11341 column_aliases: Vec::new(),
11342 alias_explicit_as: false,
11343 alias_keyword: None,
11344 order_by: None,
11345 limit: None,
11346 offset: None,
11347 distribute_by: None,
11348 sort_by: None,
11349 cluster_by: None,
11350 lateral: false,
11351 modifiers_inside: false,
11352 trailing_comments: Vec::new(),
11353 inferred_type: None,
11354 },
11355 )))
11356 } else {
11357 Ok(Expression::Function(f))
11358 }
11359 } else {
11360 Ok(e)
11361 }
11362 }
11363
11364 Action::BigQueryPercentileContToDuckDB => {
11365 // PERCENTILE_CONT(x, frac [RESPECT NULLS]) -> QUANTILE_CONT(x, frac) for DuckDB
11366 if let Expression::AggregateFunction(mut af) = e {
11367 af.name = "QUANTILE_CONT".to_string();
11368 af.ignore_nulls = None; // Strip RESPECT/IGNORE NULLS
11369 // Keep only first 2 args
11370 if af.args.len() > 2 {
11371 af.args.truncate(2);
11372 }
11373 Ok(Expression::AggregateFunction(af))
11374 } else {
11375 Ok(e)
11376 }
11377 }
11378
11379 Action::ArrayAggIgnoreNullsDuckDB => {
11380 // ARRAY_AGG(x IGNORE NULLS ORDER BY a, b DESC) -> ARRAY_AGG(x ORDER BY a NULLS FIRST, b DESC)
11381 // Strip IGNORE NULLS, add NULLS FIRST to first ORDER BY column
11382 let mut agg = if let Expression::ArrayAgg(a) = e {
11383 *a
11384 } else {
11385 unreachable!("action only triggered for ArrayAgg expressions")
11386 };
11387 agg.ignore_nulls = None; // Strip IGNORE NULLS
11388 if !agg.order_by.is_empty() {
11389 agg.order_by[0].nulls_first = Some(true);
11390 }
11391 Ok(Expression::ArrayAgg(Box::new(agg)))
11392 }
11393
11394 Action::CountDistinctMultiArg => {
11395 // COUNT(DISTINCT a, b) -> COUNT(DISTINCT CASE WHEN a IS NULL THEN NULL WHEN b IS NULL THEN NULL ELSE (a, b) END)
11396 if let Expression::Count(c) = e {
11397 if let Some(Expression::Tuple(t)) = c.this {
11398 let args = t.expressions;
11399 // Build CASE expression:
11400 // WHEN a IS NULL THEN NULL WHEN b IS NULL THEN NULL ELSE (a, b) END
11401 let mut whens = Vec::new();
11402 for arg in &args {
11403 whens.push((
11404 Expression::IsNull(Box::new(IsNull {
11405 this: arg.clone(),
11406 not: false,
11407 postfix_form: false,
11408 })),
11409 Expression::Null(crate::expressions::Null),
11410 ));
11411 }
11412 // Build the tuple for ELSE
11413 let tuple_expr =
11414 Expression::Tuple(Box::new(crate::expressions::Tuple {
11415 expressions: args,
11416 }));
11417 let case_expr = Expression::Case(Box::new(crate::expressions::Case {
11418 operand: None,
11419 whens,
11420 else_: Some(tuple_expr),
11421 comments: Vec::new(),
11422 inferred_type: None,
11423 }));
11424 Ok(Expression::Count(Box::new(crate::expressions::CountFunc {
11425 this: Some(case_expr),
11426 star: false,
11427 distinct: true,
11428 filter: c.filter,
11429 ignore_nulls: c.ignore_nulls,
11430 original_name: c.original_name,
11431 inferred_type: None,
11432 })))
11433 } else {
11434 Ok(Expression::Count(c))
11435 }
11436 } else {
11437 Ok(e)
11438 }
11439 }
11440
11441 Action::CastTimestampToDatetime => {
11442 let c = if let Expression::Cast(c) = e {
11443 *c
11444 } else {
11445 unreachable!("action only triggered for Cast expressions")
11446 };
11447 Ok(Expression::Cast(Box::new(Cast {
11448 to: DataType::Custom {
11449 name: "DATETIME".to_string(),
11450 },
11451 ..c
11452 })))
11453 }
11454
11455 Action::CastTimestampStripTz => {
11456 // CAST(x AS TIMESTAMP(n) WITH TIME ZONE) -> CAST(x AS TIMESTAMP) for Hive/Spark/BigQuery
11457 let c = if let Expression::Cast(c) = e {
11458 *c
11459 } else {
11460 unreachable!("action only triggered for Cast expressions")
11461 };
11462 Ok(Expression::Cast(Box::new(Cast {
11463 to: DataType::Timestamp {
11464 precision: None,
11465 timezone: false,
11466 },
11467 ..c
11468 })))
11469 }
11470
11471 Action::CastTimestamptzToFunc => {
11472 // CAST(x AS TIMESTAMPTZ) -> TIMESTAMP(x) function for MySQL/StarRocks
11473 let c = if let Expression::Cast(c) = e {
11474 *c
11475 } else {
11476 unreachable!("action only triggered for Cast expressions")
11477 };
11478 Ok(Expression::Function(Box::new(Function::new(
11479 "TIMESTAMP".to_string(),
11480 vec![c.this],
11481 ))))
11482 }
11483
11484 Action::ToDateToCast => {
11485 // Convert TO_DATE(x) -> CAST(x AS DATE) for DuckDB
11486 if let Expression::Function(f) = e {
11487 let arg = f.args.into_iter().next().unwrap();
11488 Ok(Expression::Cast(Box::new(Cast {
11489 this: arg,
11490 to: DataType::Date,
11491 double_colon_syntax: false,
11492 trailing_comments: vec![],
11493 format: None,
11494 default: None,
11495 inferred_type: None,
11496 })))
11497 } else {
11498 Ok(e)
11499 }
11500 }
11501 Action::DateTruncWrapCast => {
11502 // Handle both Expression::DateTrunc/TimestampTrunc and
11503 // Expression::Function("DATE_TRUNC", [unit, expr])
11504 match e {
11505 Expression::DateTrunc(d) | Expression::TimestampTrunc(d) => {
11506 let input_type = match &d.this {
11507 Expression::Cast(c) => Some(c.to.clone()),
11508 _ => None,
11509 };
11510 if let Some(cast_type) = input_type {
11511 let is_time = matches!(cast_type, DataType::Time { .. });
11512 if is_time {
11513 let date_expr = Expression::Cast(Box::new(Cast {
11514 this: Expression::Literal(Box::new(
11515 crate::expressions::Literal::String(
11516 "1970-01-01".to_string(),
11517 ),
11518 )),
11519 to: DataType::Date,
11520 double_colon_syntax: false,
11521 trailing_comments: vec![],
11522 format: None,
11523 default: None,
11524 inferred_type: None,
11525 }));
11526 let add_expr =
11527 Expression::Add(Box::new(BinaryOp::new(date_expr, d.this)));
11528 let inner = Expression::DateTrunc(Box::new(DateTruncFunc {
11529 this: add_expr,
11530 unit: d.unit,
11531 }));
11532 Ok(Expression::Cast(Box::new(Cast {
11533 this: inner,
11534 to: cast_type,
11535 double_colon_syntax: false,
11536 trailing_comments: vec![],
11537 format: None,
11538 default: None,
11539 inferred_type: None,
11540 })))
11541 } else {
11542 let inner = Expression::DateTrunc(Box::new(*d));
11543 Ok(Expression::Cast(Box::new(Cast {
11544 this: inner,
11545 to: cast_type,
11546 double_colon_syntax: false,
11547 trailing_comments: vec![],
11548 format: None,
11549 default: None,
11550 inferred_type: None,
11551 })))
11552 }
11553 } else {
11554 Ok(Expression::DateTrunc(d))
11555 }
11556 }
11557 Expression::Function(f) if f.args.len() == 2 => {
11558 // Function-based DATE_TRUNC(unit, expr)
11559 let input_type = match &f.args[1] {
11560 Expression::Cast(c) => Some(c.to.clone()),
11561 _ => None,
11562 };
11563 if let Some(cast_type) = input_type {
11564 let is_time = matches!(cast_type, DataType::Time { .. });
11565 if is_time {
11566 let date_expr = Expression::Cast(Box::new(Cast {
11567 this: Expression::Literal(Box::new(
11568 crate::expressions::Literal::String(
11569 "1970-01-01".to_string(),
11570 ),
11571 )),
11572 to: DataType::Date,
11573 double_colon_syntax: false,
11574 trailing_comments: vec![],
11575 format: None,
11576 default: None,
11577 inferred_type: None,
11578 }));
11579 let mut args = f.args;
11580 let unit_arg = args.remove(0);
11581 let time_expr = args.remove(0);
11582 let add_expr = Expression::Add(Box::new(BinaryOp::new(
11583 date_expr, time_expr,
11584 )));
11585 let inner = Expression::Function(Box::new(Function::new(
11586 "DATE_TRUNC".to_string(),
11587 vec![unit_arg, add_expr],
11588 )));
11589 Ok(Expression::Cast(Box::new(Cast {
11590 this: inner,
11591 to: cast_type,
11592 double_colon_syntax: false,
11593 trailing_comments: vec![],
11594 format: None,
11595 default: None,
11596 inferred_type: None,
11597 })))
11598 } else {
11599 // Wrap the function in CAST
11600 Ok(Expression::Cast(Box::new(Cast {
11601 this: Expression::Function(f),
11602 to: cast_type,
11603 double_colon_syntax: false,
11604 trailing_comments: vec![],
11605 format: None,
11606 default: None,
11607 inferred_type: None,
11608 })))
11609 }
11610 } else {
11611 Ok(Expression::Function(f))
11612 }
11613 }
11614 other => Ok(other),
11615 }
11616 }
11617
11618 Action::RegexpReplaceSnowflakeToDuckDB => {
11619 // Snowflake REGEXP_REPLACE(s, p, r, position) -> REGEXP_REPLACE(s, p, r, 'g')
11620 if let Expression::Function(f) = e {
11621 let mut args = f.args;
11622 let subject = args.remove(0);
11623 let pattern = args.remove(0);
11624 let replacement = args.remove(0);
11625 Ok(Expression::Function(Box::new(Function::new(
11626 "REGEXP_REPLACE".to_string(),
11627 vec![
11628 subject,
11629 pattern,
11630 replacement,
11631 Expression::Literal(Box::new(crate::expressions::Literal::String(
11632 "g".to_string(),
11633 ))),
11634 ],
11635 ))))
11636 } else {
11637 Ok(e)
11638 }
11639 }
11640
11641 Action::RegexpReplacePositionSnowflakeToDuckDB => {
11642 // Snowflake REGEXP_REPLACE(s, p, r, pos, occ) -> DuckDB form
11643 // pos=1, occ=1 -> REGEXP_REPLACE(s, p, r) (single replace, no 'g')
11644 // pos>1, occ=0 -> SUBSTRING(s, 1, pos-1) || REGEXP_REPLACE(SUBSTRING(s, pos), p, r, 'g')
11645 // pos>1, occ=1 -> SUBSTRING(s, 1, pos-1) || REGEXP_REPLACE(SUBSTRING(s, pos), p, r)
11646 // pos=1, occ=0 -> REGEXP_REPLACE(s, p, r, 'g') (replace all)
11647 if let Expression::Function(f) = e {
11648 let mut args = f.args;
11649 let subject = args.remove(0);
11650 let pattern = args.remove(0);
11651 let replacement = args.remove(0);
11652 let position = args.remove(0);
11653 let occurrence = args.remove(0);
11654
11655 let is_pos_1 = matches!(&position, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(n) if n == "1"));
11656 let is_occ_0 = matches!(&occurrence, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(n) if n == "0"));
11657 let is_occ_1 = matches!(&occurrence, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(n) if n == "1"));
11658
11659 if is_pos_1 && is_occ_1 {
11660 // REGEXP_REPLACE(s, p, r) - single replace, no flags
11661 Ok(Expression::Function(Box::new(Function::new(
11662 "REGEXP_REPLACE".to_string(),
11663 vec![subject, pattern, replacement],
11664 ))))
11665 } else if is_pos_1 && is_occ_0 {
11666 // REGEXP_REPLACE(s, p, r, 'g') - global replace
11667 Ok(Expression::Function(Box::new(Function::new(
11668 "REGEXP_REPLACE".to_string(),
11669 vec![
11670 subject,
11671 pattern,
11672 replacement,
11673 Expression::Literal(Box::new(Literal::String("g".to_string()))),
11674 ],
11675 ))))
11676 } else {
11677 // pos>1: SUBSTRING(s, 1, pos-1) || REGEXP_REPLACE(SUBSTRING(s, pos), p, r[, 'g'])
11678 // Pre-compute pos-1 when position is a numeric literal
11679 let pos_minus_1 = if let Expression::Literal(ref lit) = position {
11680 if let Literal::Number(ref n) = lit.as_ref() {
11681 if let Ok(val) = n.parse::<i64>() {
11682 Expression::number(val - 1)
11683 } else {
11684 Expression::Sub(Box::new(BinaryOp::new(
11685 position.clone(),
11686 Expression::number(1),
11687 )))
11688 }
11689 } else {
11690 position.clone()
11691 }
11692 } else {
11693 Expression::Sub(Box::new(BinaryOp::new(
11694 position.clone(),
11695 Expression::number(1),
11696 )))
11697 };
11698 let prefix = Expression::Function(Box::new(Function::new(
11699 "SUBSTRING".to_string(),
11700 vec![subject.clone(), Expression::number(1), pos_minus_1],
11701 )));
11702 let suffix_subject = Expression::Function(Box::new(Function::new(
11703 "SUBSTRING".to_string(),
11704 vec![subject, position],
11705 )));
11706 let mut replace_args = vec![suffix_subject, pattern, replacement];
11707 if is_occ_0 {
11708 replace_args.push(Expression::Literal(Box::new(Literal::String(
11709 "g".to_string(),
11710 ))));
11711 }
11712 let replace_expr = Expression::Function(Box::new(Function::new(
11713 "REGEXP_REPLACE".to_string(),
11714 replace_args,
11715 )));
11716 Ok(Expression::DPipe(Box::new(crate::expressions::DPipe {
11717 this: Box::new(prefix),
11718 expression: Box::new(replace_expr),
11719 safe: None,
11720 })))
11721 }
11722 } else {
11723 Ok(e)
11724 }
11725 }
11726
11727 Action::RegexpSubstrSnowflakeToDuckDB => {
11728 // Snowflake REGEXP_SUBSTR -> DuckDB REGEXP_EXTRACT variants
11729 if let Expression::Function(f) = e {
11730 let mut args = f.args;
11731 let arg_count = args.len();
11732 match arg_count {
11733 // REGEXP_SUBSTR(s, p) -> REGEXP_EXTRACT(s, p)
11734 0..=2 => Ok(Expression::Function(Box::new(Function::new(
11735 "REGEXP_EXTRACT".to_string(),
11736 args,
11737 )))),
11738 // REGEXP_SUBSTR(s, p, pos) -> REGEXP_EXTRACT(NULLIF(SUBSTRING(s, pos), ''), p)
11739 3 => {
11740 let subject = args.remove(0);
11741 let pattern = args.remove(0);
11742 let position = args.remove(0);
11743 let is_pos_1 = matches!(&position, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(n) if n == "1"));
11744 if is_pos_1 {
11745 Ok(Expression::Function(Box::new(Function::new(
11746 "REGEXP_EXTRACT".to_string(),
11747 vec![subject, pattern],
11748 ))))
11749 } else {
11750 let substring_expr =
11751 Expression::Function(Box::new(Function::new(
11752 "SUBSTRING".to_string(),
11753 vec![subject, position],
11754 )));
11755 let nullif_expr =
11756 Expression::Function(Box::new(Function::new(
11757 "NULLIF".to_string(),
11758 vec![
11759 substring_expr,
11760 Expression::Literal(Box::new(Literal::String(
11761 String::new(),
11762 ))),
11763 ],
11764 )));
11765 Ok(Expression::Function(Box::new(Function::new(
11766 "REGEXP_EXTRACT".to_string(),
11767 vec![nullif_expr, pattern],
11768 ))))
11769 }
11770 }
11771 // REGEXP_SUBSTR(s, p, pos, occ) -> depends on pos and occ
11772 4 => {
11773 let subject = args.remove(0);
11774 let pattern = args.remove(0);
11775 let position = args.remove(0);
11776 let occurrence = args.remove(0);
11777 let is_pos_1 = matches!(&position, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(n) if n == "1"));
11778 let is_occ_1 = matches!(&occurrence, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(n) if n == "1"));
11779
11780 let effective_subject = if is_pos_1 {
11781 subject
11782 } else {
11783 let substring_expr =
11784 Expression::Function(Box::new(Function::new(
11785 "SUBSTRING".to_string(),
11786 vec![subject, position],
11787 )));
11788 Expression::Function(Box::new(Function::new(
11789 "NULLIF".to_string(),
11790 vec![
11791 substring_expr,
11792 Expression::Literal(Box::new(Literal::String(
11793 String::new(),
11794 ))),
11795 ],
11796 )))
11797 };
11798
11799 if is_occ_1 {
11800 Ok(Expression::Function(Box::new(Function::new(
11801 "REGEXP_EXTRACT".to_string(),
11802 vec![effective_subject, pattern],
11803 ))))
11804 } else {
11805 // ARRAY_EXTRACT(REGEXP_EXTRACT_ALL(s, p), occ)
11806 let extract_all =
11807 Expression::Function(Box::new(Function::new(
11808 "REGEXP_EXTRACT_ALL".to_string(),
11809 vec![effective_subject, pattern],
11810 )));
11811 Ok(Expression::Function(Box::new(Function::new(
11812 "ARRAY_EXTRACT".to_string(),
11813 vec![extract_all, occurrence],
11814 ))))
11815 }
11816 }
11817 // REGEXP_SUBSTR(s, p, 1, 1, 'e') -> REGEXP_EXTRACT(s, p)
11818 5 => {
11819 let subject = args.remove(0);
11820 let pattern = args.remove(0);
11821 let _position = args.remove(0);
11822 let _occurrence = args.remove(0);
11823 let _flags = args.remove(0);
11824 // Strip 'e' flag, convert to REGEXP_EXTRACT
11825 Ok(Expression::Function(Box::new(Function::new(
11826 "REGEXP_EXTRACT".to_string(),
11827 vec![subject, pattern],
11828 ))))
11829 }
11830 // REGEXP_SUBSTR(s, p, 1, 1, 'e', group) -> REGEXP_EXTRACT(s, p[, group])
11831 _ => {
11832 let subject = args.remove(0);
11833 let pattern = args.remove(0);
11834 let _position = args.remove(0);
11835 let _occurrence = args.remove(0);
11836 let _flags = args.remove(0);
11837 let group = args.remove(0);
11838 let is_group_0 = matches!(&group, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(n) if n == "0"));
11839 if is_group_0 {
11840 // Strip group=0 (default)
11841 Ok(Expression::Function(Box::new(Function::new(
11842 "REGEXP_EXTRACT".to_string(),
11843 vec![subject, pattern],
11844 ))))
11845 } else {
11846 Ok(Expression::Function(Box::new(Function::new(
11847 "REGEXP_EXTRACT".to_string(),
11848 vec![subject, pattern, group],
11849 ))))
11850 }
11851 }
11852 }
11853 } else {
11854 Ok(e)
11855 }
11856 }
11857
11858 Action::RegexpSubstrSnowflakeIdentity => {
11859 // Snowflake→Snowflake: REGEXP_SUBSTR/REGEXP_SUBSTR_ALL with 6 args
11860 // Strip trailing group=0
11861 if let Expression::Function(f) = e {
11862 let func_name = f.name.clone();
11863 let mut args = f.args;
11864 if args.len() == 6 {
11865 let is_group_0 = matches!(&args[5], Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(n) if n == "0"));
11866 if is_group_0 {
11867 args.truncate(5);
11868 }
11869 }
11870 Ok(Expression::Function(Box::new(Function::new(
11871 func_name, args,
11872 ))))
11873 } else {
11874 Ok(e)
11875 }
11876 }
11877
11878 Action::RegexpSubstrAllSnowflakeToDuckDB => {
11879 // Snowflake REGEXP_SUBSTR_ALL -> DuckDB REGEXP_EXTRACT_ALL variants
11880 if let Expression::Function(f) = e {
11881 let mut args = f.args;
11882 let arg_count = args.len();
11883 match arg_count {
11884 // REGEXP_SUBSTR_ALL(s, p) -> REGEXP_EXTRACT_ALL(s, p)
11885 0..=2 => Ok(Expression::Function(Box::new(Function::new(
11886 "REGEXP_EXTRACT_ALL".to_string(),
11887 args,
11888 )))),
11889 // REGEXP_SUBSTR_ALL(s, p, pos) -> REGEXP_EXTRACT_ALL(SUBSTRING(s, pos), p)
11890 3 => {
11891 let subject = args.remove(0);
11892 let pattern = args.remove(0);
11893 let position = args.remove(0);
11894 let is_pos_1 = matches!(&position, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(n) if n == "1"));
11895 if is_pos_1 {
11896 Ok(Expression::Function(Box::new(Function::new(
11897 "REGEXP_EXTRACT_ALL".to_string(),
11898 vec![subject, pattern],
11899 ))))
11900 } else {
11901 let substring_expr =
11902 Expression::Function(Box::new(Function::new(
11903 "SUBSTRING".to_string(),
11904 vec![subject, position],
11905 )));
11906 Ok(Expression::Function(Box::new(Function::new(
11907 "REGEXP_EXTRACT_ALL".to_string(),
11908 vec![substring_expr, pattern],
11909 ))))
11910 }
11911 }
11912 // REGEXP_SUBSTR_ALL(s, p, 1, occ) -> REGEXP_EXTRACT_ALL(s, p)[occ:]
11913 4 => {
11914 let subject = args.remove(0);
11915 let pattern = args.remove(0);
11916 let position = args.remove(0);
11917 let occurrence = args.remove(0);
11918 let is_pos_1 = matches!(&position, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(n) if n == "1"));
11919 let is_occ_1 = matches!(&occurrence, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(n) if n == "1"));
11920
11921 let effective_subject = if is_pos_1 {
11922 subject
11923 } else {
11924 Expression::Function(Box::new(Function::new(
11925 "SUBSTRING".to_string(),
11926 vec![subject, position],
11927 )))
11928 };
11929
11930 if is_occ_1 {
11931 Ok(Expression::Function(Box::new(Function::new(
11932 "REGEXP_EXTRACT_ALL".to_string(),
11933 vec![effective_subject, pattern],
11934 ))))
11935 } else {
11936 // REGEXP_EXTRACT_ALL(s, p)[occ:]
11937 let extract_all =
11938 Expression::Function(Box::new(Function::new(
11939 "REGEXP_EXTRACT_ALL".to_string(),
11940 vec![effective_subject, pattern],
11941 )));
11942 Ok(Expression::ArraySlice(Box::new(
11943 crate::expressions::ArraySlice {
11944 this: extract_all,
11945 start: Some(occurrence),
11946 end: None,
11947 },
11948 )))
11949 }
11950 }
11951 // REGEXP_SUBSTR_ALL(s, p, 1, 1, 'e') -> REGEXP_EXTRACT_ALL(s, p)
11952 5 => {
11953 let subject = args.remove(0);
11954 let pattern = args.remove(0);
11955 let _position = args.remove(0);
11956 let _occurrence = args.remove(0);
11957 let _flags = args.remove(0);
11958 Ok(Expression::Function(Box::new(Function::new(
11959 "REGEXP_EXTRACT_ALL".to_string(),
11960 vec![subject, pattern],
11961 ))))
11962 }
11963 // REGEXP_SUBSTR_ALL(s, p, 1, 1, 'e', 0) -> REGEXP_EXTRACT_ALL(s, p)
11964 _ => {
11965 let subject = args.remove(0);
11966 let pattern = args.remove(0);
11967 let _position = args.remove(0);
11968 let _occurrence = args.remove(0);
11969 let _flags = args.remove(0);
11970 let group = args.remove(0);
11971 let is_group_0 = matches!(&group, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(n) if n == "0"));
11972 if is_group_0 {
11973 Ok(Expression::Function(Box::new(Function::new(
11974 "REGEXP_EXTRACT_ALL".to_string(),
11975 vec![subject, pattern],
11976 ))))
11977 } else {
11978 Ok(Expression::Function(Box::new(Function::new(
11979 "REGEXP_EXTRACT_ALL".to_string(),
11980 vec![subject, pattern, group],
11981 ))))
11982 }
11983 }
11984 }
11985 } else {
11986 Ok(e)
11987 }
11988 }
11989
11990 Action::RegexpCountSnowflakeToDuckDB => {
11991 // Snowflake REGEXP_COUNT(s, p[, pos[, flags]]) ->
11992 // DuckDB: CASE WHEN p = '' THEN 0 ELSE LENGTH(REGEXP_EXTRACT_ALL(s, p)) END
11993 if let Expression::Function(f) = e {
11994 let mut args = f.args;
11995 let arg_count = args.len();
11996 let subject = args.remove(0);
11997 let pattern = args.remove(0);
11998
11999 // Handle position arg
12000 let effective_subject = if arg_count >= 3 {
12001 let position = args.remove(0);
12002 Expression::Function(Box::new(Function::new(
12003 "SUBSTRING".to_string(),
12004 vec![subject, position],
12005 )))
12006 } else {
12007 subject
12008 };
12009
12010 // Handle flags arg -> embed as (?flags) prefix in pattern
12011 let effective_pattern = if arg_count >= 4 {
12012 let flags = args.remove(0);
12013 match &flags {
12014 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(f_str) if !f_str.is_empty()) =>
12015 {
12016 let Literal::String(f_str) = lit.as_ref() else {
12017 unreachable!()
12018 };
12019 // Always use concatenation: '(?flags)' || pattern
12020 let prefix = Expression::Literal(Box::new(Literal::String(
12021 format!("(?{})", f_str),
12022 )));
12023 Expression::DPipe(Box::new(crate::expressions::DPipe {
12024 this: Box::new(prefix),
12025 expression: Box::new(pattern.clone()),
12026 safe: None,
12027 }))
12028 }
12029 _ => pattern.clone(),
12030 }
12031 } else {
12032 pattern.clone()
12033 };
12034
12035 // Build: CASE WHEN p = '' THEN 0 ELSE LENGTH(REGEXP_EXTRACT_ALL(s, p)) END
12036 let extract_all = Expression::Function(Box::new(Function::new(
12037 "REGEXP_EXTRACT_ALL".to_string(),
12038 vec![effective_subject, effective_pattern.clone()],
12039 )));
12040 let length_expr =
12041 Expression::Length(Box::new(crate::expressions::UnaryFunc {
12042 this: extract_all,
12043 original_name: None,
12044 inferred_type: None,
12045 }));
12046 let condition = Expression::Eq(Box::new(BinaryOp::new(
12047 effective_pattern,
12048 Expression::Literal(Box::new(Literal::String(String::new()))),
12049 )));
12050 Ok(Expression::Case(Box::new(Case {
12051 operand: None,
12052 whens: vec![(condition, Expression::number(0))],
12053 else_: Some(length_expr),
12054 comments: vec![],
12055 inferred_type: None,
12056 })))
12057 } else {
12058 Ok(e)
12059 }
12060 }
12061
12062 Action::RegexpInstrSnowflakeToDuckDB => {
12063 // Snowflake REGEXP_INSTR(s, p[, pos[, occ[, option[, flags[, group]]]]]) ->
12064 // DuckDB: CASE WHEN s IS NULL OR p IS NULL [OR ...] THEN NULL
12065 // WHEN p = '' THEN 0
12066 // WHEN LENGTH(REGEXP_EXTRACT_ALL(eff_s, eff_p)) < occ THEN 0
12067 // ELSE 1 + COALESCE(LIST_SUM(LIST_TRANSFORM(STRING_SPLIT_REGEX(eff_s, eff_p)[1:occ], x -> LENGTH(x))), 0)
12068 // + COALESCE(LIST_SUM(LIST_TRANSFORM(REGEXP_EXTRACT_ALL(eff_s, eff_p)[1:occ - 1], x -> LENGTH(x))), 0)
12069 // + pos_offset
12070 // END
12071 if let Expression::Function(f) = e {
12072 let mut args = f.args;
12073 let subject = args.remove(0);
12074 let pattern = if !args.is_empty() {
12075 args.remove(0)
12076 } else {
12077 Expression::Literal(Box::new(Literal::String(String::new())))
12078 };
12079
12080 // Collect all original args for NULL checks
12081 let position = if !args.is_empty() {
12082 Some(args.remove(0))
12083 } else {
12084 None
12085 };
12086 let occurrence = if !args.is_empty() {
12087 Some(args.remove(0))
12088 } else {
12089 None
12090 };
12091 let option = if !args.is_empty() {
12092 Some(args.remove(0))
12093 } else {
12094 None
12095 };
12096 let flags = if !args.is_empty() {
12097 Some(args.remove(0))
12098 } else {
12099 None
12100 };
12101 let _group = if !args.is_empty() {
12102 Some(args.remove(0))
12103 } else {
12104 None
12105 };
12106
12107 let is_pos_1 = position.as_ref().map_or(true, |p| matches!(p, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(n) if n == "1")));
12108 let occurrence_expr = occurrence.clone().unwrap_or(Expression::number(1));
12109
12110 // Build NULL check: subject IS NULL OR pattern IS NULL [OR pos IS NULL ...]
12111 let mut null_checks: Vec<Expression> = vec![
12112 Expression::Is(Box::new(BinaryOp::new(
12113 subject.clone(),
12114 Expression::Null(Null),
12115 ))),
12116 Expression::Is(Box::new(BinaryOp::new(
12117 pattern.clone(),
12118 Expression::Null(Null),
12119 ))),
12120 ];
12121 // Add NULL checks for all provided optional args
12122 for opt_arg in [&position, &occurrence, &option, &flags].iter() {
12123 if let Some(arg) = opt_arg {
12124 null_checks.push(Expression::Is(Box::new(BinaryOp::new(
12125 (*arg).clone(),
12126 Expression::Null(Null),
12127 ))));
12128 }
12129 }
12130 // Chain with OR
12131 let null_condition = null_checks
12132 .into_iter()
12133 .reduce(|a, b| Expression::Or(Box::new(BinaryOp::new(a, b))))
12134 .unwrap();
12135
12136 // Effective subject (apply position offset)
12137 let effective_subject = if is_pos_1 {
12138 subject.clone()
12139 } else {
12140 let pos = position.clone().unwrap_or(Expression::number(1));
12141 Expression::Function(Box::new(Function::new(
12142 "SUBSTRING".to_string(),
12143 vec![subject.clone(), pos],
12144 )))
12145 };
12146
12147 // Effective pattern (apply flags if present)
12148 let effective_pattern = if let Some(ref fl) = flags {
12149 if let Expression::Literal(lit) = fl {
12150 if let Literal::String(f_str) = lit.as_ref() {
12151 if !f_str.is_empty() {
12152 let prefix = Expression::Literal(Box::new(
12153 Literal::String(format!("(?{})", f_str)),
12154 ));
12155 Expression::DPipe(Box::new(crate::expressions::DPipe {
12156 this: Box::new(prefix),
12157 expression: Box::new(pattern.clone()),
12158 safe: None,
12159 }))
12160 } else {
12161 pattern.clone()
12162 }
12163 } else {
12164 fl.clone()
12165 }
12166 } else {
12167 pattern.clone()
12168 }
12169 } else {
12170 pattern.clone()
12171 };
12172
12173 // WHEN pattern = '' THEN 0
12174 let empty_pattern_check = Expression::Eq(Box::new(BinaryOp::new(
12175 effective_pattern.clone(),
12176 Expression::Literal(Box::new(Literal::String(String::new()))),
12177 )));
12178
12179 // WHEN LENGTH(REGEXP_EXTRACT_ALL(eff_s, eff_p)) < occ THEN 0
12180 let match_count_check = Expression::Lt(Box::new(BinaryOp::new(
12181 Expression::Length(Box::new(crate::expressions::UnaryFunc {
12182 this: Expression::Function(Box::new(Function::new(
12183 "REGEXP_EXTRACT_ALL".to_string(),
12184 vec![effective_subject.clone(), effective_pattern.clone()],
12185 ))),
12186 original_name: None,
12187 inferred_type: None,
12188 })),
12189 occurrence_expr.clone(),
12190 )));
12191
12192 // Helper: build LENGTH lambda for LIST_TRANSFORM
12193 let make_len_lambda = || {
12194 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
12195 parameters: vec![crate::expressions::Identifier::new("x")],
12196 body: Expression::Length(Box::new(crate::expressions::UnaryFunc {
12197 this: Expression::Identifier(
12198 crate::expressions::Identifier::new("x"),
12199 ),
12200 original_name: None,
12201 inferred_type: None,
12202 })),
12203 colon: false,
12204 parameter_types: vec![],
12205 }))
12206 };
12207
12208 // COALESCE(LIST_SUM(LIST_TRANSFORM(STRING_SPLIT_REGEX(s, p)[1:occ], x -> LENGTH(x))), 0)
12209 let split_sliced =
12210 Expression::ArraySlice(Box::new(crate::expressions::ArraySlice {
12211 this: Expression::Function(Box::new(Function::new(
12212 "STRING_SPLIT_REGEX".to_string(),
12213 vec![effective_subject.clone(), effective_pattern.clone()],
12214 ))),
12215 start: Some(Expression::number(1)),
12216 end: Some(occurrence_expr.clone()),
12217 }));
12218 let split_sum = Expression::Function(Box::new(Function::new(
12219 "COALESCE".to_string(),
12220 vec![
12221 Expression::Function(Box::new(Function::new(
12222 "LIST_SUM".to_string(),
12223 vec![Expression::Function(Box::new(Function::new(
12224 "LIST_TRANSFORM".to_string(),
12225 vec![split_sliced, make_len_lambda()],
12226 )))],
12227 ))),
12228 Expression::number(0),
12229 ],
12230 )));
12231
12232 // COALESCE(LIST_SUM(LIST_TRANSFORM(REGEXP_EXTRACT_ALL(s, p)[1:occ - 1], x -> LENGTH(x))), 0)
12233 let extract_sliced =
12234 Expression::ArraySlice(Box::new(crate::expressions::ArraySlice {
12235 this: Expression::Function(Box::new(Function::new(
12236 "REGEXP_EXTRACT_ALL".to_string(),
12237 vec![effective_subject.clone(), effective_pattern.clone()],
12238 ))),
12239 start: Some(Expression::number(1)),
12240 end: Some(Expression::Sub(Box::new(BinaryOp::new(
12241 occurrence_expr.clone(),
12242 Expression::number(1),
12243 )))),
12244 }));
12245 let extract_sum = Expression::Function(Box::new(Function::new(
12246 "COALESCE".to_string(),
12247 vec![
12248 Expression::Function(Box::new(Function::new(
12249 "LIST_SUM".to_string(),
12250 vec![Expression::Function(Box::new(Function::new(
12251 "LIST_TRANSFORM".to_string(),
12252 vec![extract_sliced, make_len_lambda()],
12253 )))],
12254 ))),
12255 Expression::number(0),
12256 ],
12257 )));
12258
12259 // Position offset: pos - 1 when pos > 1, else 0
12260 let pos_offset: Expression = if !is_pos_1 {
12261 let pos = position.clone().unwrap_or(Expression::number(1));
12262 Expression::Sub(Box::new(BinaryOp::new(pos, Expression::number(1))))
12263 } else {
12264 Expression::number(0)
12265 };
12266
12267 // ELSE: 1 + split_sum + extract_sum + pos_offset
12268 let else_expr = Expression::Add(Box::new(BinaryOp::new(
12269 Expression::Add(Box::new(BinaryOp::new(
12270 Expression::Add(Box::new(BinaryOp::new(
12271 Expression::number(1),
12272 split_sum,
12273 ))),
12274 extract_sum,
12275 ))),
12276 pos_offset,
12277 )));
12278
12279 Ok(Expression::Case(Box::new(Case {
12280 operand: None,
12281 whens: vec![
12282 (null_condition, Expression::Null(Null)),
12283 (empty_pattern_check, Expression::number(0)),
12284 (match_count_check, Expression::number(0)),
12285 ],
12286 else_: Some(else_expr),
12287 comments: vec![],
12288 inferred_type: None,
12289 })))
12290 } else {
12291 Ok(e)
12292 }
12293 }
12294
12295 Action::RlikeSnowflakeToDuckDB => {
12296 // Snowflake RLIKE(a, b[, flags]) -> DuckDB REGEXP_FULL_MATCH(a, b[, flags])
12297 // Both do full-string matching, so no anchoring needed
12298 let (subject, pattern, flags) = match e {
12299 Expression::RegexpLike(ref rl) => {
12300 (rl.this.clone(), rl.pattern.clone(), rl.flags.clone())
12301 }
12302 Expression::Function(ref f) if f.args.len() >= 2 => {
12303 let s = f.args[0].clone();
12304 let p = f.args[1].clone();
12305 let fl = f.args.get(2).cloned();
12306 (s, p, fl)
12307 }
12308 _ => return Ok(e),
12309 };
12310
12311 let mut result_args = vec![subject, pattern];
12312 if let Some(fl) = flags {
12313 result_args.push(fl);
12314 }
12315 Ok(Expression::Function(Box::new(Function::new(
12316 "REGEXP_FULL_MATCH".to_string(),
12317 result_args,
12318 ))))
12319 }
12320
12321 Action::RegexpExtractAllToSnowflake => {
12322 // BigQuery REGEXP_EXTRACT_ALL(s, p) -> Snowflake REGEXP_SUBSTR_ALL(s, p)
12323 // With capture group: REGEXP_SUBSTR_ALL(s, p, 1, 1, 'c', 1)
12324 if let Expression::Function(f) = e {
12325 let mut args = f.args;
12326 if args.len() >= 2 {
12327 let str_expr = args.remove(0);
12328 let pattern = args.remove(0);
12329
12330 let has_groups = match &pattern {
12331 Expression::Literal(lit)
12332 if matches!(lit.as_ref(), Literal::String(_)) =>
12333 {
12334 let Literal::String(s) = lit.as_ref() else {
12335 unreachable!()
12336 };
12337 s.contains('(') && s.contains(')')
12338 }
12339 _ => false,
12340 };
12341
12342 if has_groups {
12343 Ok(Expression::Function(Box::new(Function::new(
12344 "REGEXP_SUBSTR_ALL".to_string(),
12345 vec![
12346 str_expr,
12347 pattern,
12348 Expression::number(1),
12349 Expression::number(1),
12350 Expression::Literal(Box::new(Literal::String(
12351 "c".to_string(),
12352 ))),
12353 Expression::number(1),
12354 ],
12355 ))))
12356 } else {
12357 Ok(Expression::Function(Box::new(Function::new(
12358 "REGEXP_SUBSTR_ALL".to_string(),
12359 vec![str_expr, pattern],
12360 ))))
12361 }
12362 } else {
12363 Ok(Expression::Function(Box::new(Function::new(
12364 "REGEXP_SUBSTR_ALL".to_string(),
12365 args,
12366 ))))
12367 }
12368 } else {
12369 Ok(e)
12370 }
12371 }
12372
12373 Action::SetToVariable => {
12374 // For DuckDB: SET a = 1 -> SET VARIABLE a = 1
12375 if let Expression::SetStatement(mut s) = e {
12376 for item in &mut s.items {
12377 if item.kind.is_none() {
12378 // Check if name already has VARIABLE prefix (from DuckDB source parsing)
12379 let already_variable = match &item.name {
12380 Expression::Identifier(id) => id.name.starts_with("VARIABLE "),
12381 _ => false,
12382 };
12383 if already_variable {
12384 // Extract the actual name and set kind
12385 if let Expression::Identifier(ref mut id) = item.name {
12386 let actual_name = id.name["VARIABLE ".len()..].to_string();
12387 id.name = actual_name;
12388 }
12389 }
12390 item.kind = Some("VARIABLE".to_string());
12391 }
12392 }
12393 Ok(Expression::SetStatement(s))
12394 } else {
12395 Ok(e)
12396 }
12397 }
12398
12399 Action::ConvertTimezoneToExpr => {
12400 // Convert Function("CONVERT_TIMEZONE", args) to Expression::ConvertTimezone
12401 // This prevents Redshift's transform_expr from expanding 2-arg to 3-arg with 'UTC'
12402 if let Expression::Function(f) = e {
12403 if f.args.len() == 2 {
12404 let mut args = f.args;
12405 let target_tz = args.remove(0);
12406 let timestamp = args.remove(0);
12407 Ok(Expression::ConvertTimezone(Box::new(ConvertTimezone {
12408 source_tz: None,
12409 target_tz: Some(Box::new(target_tz)),
12410 timestamp: Some(Box::new(timestamp)),
12411 options: vec![],
12412 })))
12413 } else if f.args.len() == 3 {
12414 let mut args = f.args;
12415 let source_tz = args.remove(0);
12416 let target_tz = args.remove(0);
12417 let timestamp = args.remove(0);
12418 Ok(Expression::ConvertTimezone(Box::new(ConvertTimezone {
12419 source_tz: Some(Box::new(source_tz)),
12420 target_tz: Some(Box::new(target_tz)),
12421 timestamp: Some(Box::new(timestamp)),
12422 options: vec![],
12423 })))
12424 } else {
12425 Ok(Expression::Function(f))
12426 }
12427 } else {
12428 Ok(e)
12429 }
12430 }
12431
12432 Action::BigQueryCastType => {
12433 // Convert BigQuery types to standard SQL types
12434 if let Expression::DataType(dt) = e {
12435 match dt {
12436 DataType::Custom { ref name } if name.eq_ignore_ascii_case("INT64") => {
12437 Ok(Expression::DataType(DataType::BigInt { length: None }))
12438 }
12439 DataType::Custom { ref name }
12440 if name.eq_ignore_ascii_case("FLOAT64") =>
12441 {
12442 Ok(Expression::DataType(DataType::Double {
12443 precision: None,
12444 scale: None,
12445 }))
12446 }
12447 DataType::Custom { ref name } if name.eq_ignore_ascii_case("BOOL") => {
12448 Ok(Expression::DataType(DataType::Boolean))
12449 }
12450 DataType::Custom { ref name } if name.eq_ignore_ascii_case("BYTES") => {
12451 Ok(Expression::DataType(DataType::VarBinary { length: None }))
12452 }
12453 DataType::Custom { ref name }
12454 if name.eq_ignore_ascii_case("NUMERIC") =>
12455 {
12456 // For DuckDB target, use Custom("DECIMAL") to avoid DuckDB's
12457 // default precision (18, 3) being added to bare DECIMAL
12458 if matches!(target, DialectType::DuckDB) {
12459 Ok(Expression::DataType(DataType::Custom {
12460 name: "DECIMAL".to_string(),
12461 }))
12462 } else {
12463 Ok(Expression::DataType(DataType::Decimal {
12464 precision: None,
12465 scale: None,
12466 }))
12467 }
12468 }
12469 DataType::Custom { ref name }
12470 if name.eq_ignore_ascii_case("STRING") =>
12471 {
12472 Ok(Expression::DataType(DataType::String { length: None }))
12473 }
12474 DataType::Custom { ref name }
12475 if name.eq_ignore_ascii_case("DATETIME") =>
12476 {
12477 Ok(Expression::DataType(DataType::Timestamp {
12478 precision: None,
12479 timezone: false,
12480 }))
12481 }
12482 _ => Ok(Expression::DataType(dt)),
12483 }
12484 } else {
12485 Ok(e)
12486 }
12487 }
12488
12489 Action::BigQuerySafeDivide => {
12490 // Convert SafeDivide expression to IF/CASE form for most targets
12491 if let Expression::SafeDivide(sd) = e {
12492 let x = *sd.this;
12493 let y = *sd.expression;
12494 // Wrap x and y in parens if they're complex expressions
12495 let y_ref = match &y {
12496 Expression::Column(_)
12497 | Expression::Literal(_)
12498 | Expression::Identifier(_) => y.clone(),
12499 _ => Expression::Paren(Box::new(Paren {
12500 this: y.clone(),
12501 trailing_comments: vec![],
12502 })),
12503 };
12504 let x_ref = match &x {
12505 Expression::Column(_)
12506 | Expression::Literal(_)
12507 | Expression::Identifier(_) => x.clone(),
12508 _ => Expression::Paren(Box::new(Paren {
12509 this: x.clone(),
12510 trailing_comments: vec![],
12511 })),
12512 };
12513 let condition = Expression::Neq(Box::new(BinaryOp::new(
12514 y_ref.clone(),
12515 Expression::number(0),
12516 )));
12517 let div_expr = Expression::Div(Box::new(BinaryOp::new(x_ref, y_ref)));
12518
12519 if matches!(target, DialectType::Spark | DialectType::Databricks) {
12520 Ok(Expression::Function(Box::new(Function::new(
12521 "TRY_DIVIDE".to_string(),
12522 vec![x, y],
12523 ))))
12524 } else if matches!(target, DialectType::Presto | DialectType::Trino) {
12525 // Presto/Trino: IF(y <> 0, CAST(x AS DOUBLE) / y, NULL)
12526 let cast_x = Expression::Cast(Box::new(Cast {
12527 this: match &x {
12528 Expression::Column(_)
12529 | Expression::Literal(_)
12530 | Expression::Identifier(_) => x,
12531 _ => Expression::Paren(Box::new(Paren {
12532 this: x,
12533 trailing_comments: vec![],
12534 })),
12535 },
12536 to: DataType::Double {
12537 precision: None,
12538 scale: None,
12539 },
12540 trailing_comments: vec![],
12541 double_colon_syntax: false,
12542 format: None,
12543 default: None,
12544 inferred_type: None,
12545 }));
12546 let cast_div = Expression::Div(Box::new(BinaryOp::new(
12547 cast_x,
12548 match &y {
12549 Expression::Column(_)
12550 | Expression::Literal(_)
12551 | Expression::Identifier(_) => y,
12552 _ => Expression::Paren(Box::new(Paren {
12553 this: y,
12554 trailing_comments: vec![],
12555 })),
12556 },
12557 )));
12558 Ok(Expression::IfFunc(Box::new(crate::expressions::IfFunc {
12559 condition,
12560 true_value: cast_div,
12561 false_value: Some(Expression::Null(Null)),
12562 original_name: None,
12563 inferred_type: None,
12564 })))
12565 } else if matches!(target, DialectType::PostgreSQL) {
12566 // PostgreSQL: CASE WHEN y <> 0 THEN CAST(x AS DOUBLE PRECISION) / y ELSE NULL END
12567 let cast_x = Expression::Cast(Box::new(Cast {
12568 this: match &x {
12569 Expression::Column(_)
12570 | Expression::Literal(_)
12571 | Expression::Identifier(_) => x,
12572 _ => Expression::Paren(Box::new(Paren {
12573 this: x,
12574 trailing_comments: vec![],
12575 })),
12576 },
12577 to: DataType::Custom {
12578 name: "DOUBLE PRECISION".to_string(),
12579 },
12580 trailing_comments: vec![],
12581 double_colon_syntax: false,
12582 format: None,
12583 default: None,
12584 inferred_type: None,
12585 }));
12586 let y_paren = match &y {
12587 Expression::Column(_)
12588 | Expression::Literal(_)
12589 | Expression::Identifier(_) => y,
12590 _ => Expression::Paren(Box::new(Paren {
12591 this: y,
12592 trailing_comments: vec![],
12593 })),
12594 };
12595 let cast_div =
12596 Expression::Div(Box::new(BinaryOp::new(cast_x, y_paren)));
12597 Ok(Expression::Case(Box::new(Case {
12598 operand: None,
12599 whens: vec![(condition, cast_div)],
12600 else_: Some(Expression::Null(Null)),
12601 comments: Vec::new(),
12602 inferred_type: None,
12603 })))
12604 } else if matches!(target, DialectType::DuckDB) {
12605 // DuckDB: CASE WHEN y <> 0 THEN x / y ELSE NULL END
12606 Ok(Expression::Case(Box::new(Case {
12607 operand: None,
12608 whens: vec![(condition, div_expr)],
12609 else_: Some(Expression::Null(Null)),
12610 comments: Vec::new(),
12611 inferred_type: None,
12612 })))
12613 } else if matches!(target, DialectType::Snowflake) {
12614 // Snowflake: IFF(y <> 0, x / y, NULL)
12615 Ok(Expression::IfFunc(Box::new(crate::expressions::IfFunc {
12616 condition,
12617 true_value: div_expr,
12618 false_value: Some(Expression::Null(Null)),
12619 original_name: Some("IFF".to_string()),
12620 inferred_type: None,
12621 })))
12622 } else {
12623 // All others: IF(y <> 0, x / y, NULL)
12624 Ok(Expression::IfFunc(Box::new(crate::expressions::IfFunc {
12625 condition,
12626 true_value: div_expr,
12627 false_value: Some(Expression::Null(Null)),
12628 original_name: None,
12629 inferred_type: None,
12630 })))
12631 }
12632 } else {
12633 Ok(e)
12634 }
12635 }
12636
12637 Action::BigQueryLastDayStripUnit => {
12638 if let Expression::LastDay(mut ld) = e {
12639 ld.unit = None; // Strip the unit (MONTH is default)
12640 match target {
12641 DialectType::PostgreSQL => {
12642 // LAST_DAY(date) -> CAST(DATE_TRUNC('MONTH', date) + INTERVAL '1 MONTH' - INTERVAL '1 DAY' AS DATE)
12643 let date_trunc = Expression::Function(Box::new(Function::new(
12644 "DATE_TRUNC".to_string(),
12645 vec![
12646 Expression::Literal(Box::new(
12647 crate::expressions::Literal::String(
12648 "MONTH".to_string(),
12649 ),
12650 )),
12651 ld.this.clone(),
12652 ],
12653 )));
12654 let plus_month =
12655 Expression::Add(Box::new(crate::expressions::BinaryOp::new(
12656 date_trunc,
12657 Expression::Interval(Box::new(
12658 crate::expressions::Interval {
12659 this: Some(Expression::Literal(Box::new(
12660 crate::expressions::Literal::String(
12661 "1 MONTH".to_string(),
12662 ),
12663 ))),
12664 unit: None,
12665 },
12666 )),
12667 )));
12668 let minus_day =
12669 Expression::Sub(Box::new(crate::expressions::BinaryOp::new(
12670 plus_month,
12671 Expression::Interval(Box::new(
12672 crate::expressions::Interval {
12673 this: Some(Expression::Literal(Box::new(
12674 crate::expressions::Literal::String(
12675 "1 DAY".to_string(),
12676 ),
12677 ))),
12678 unit: None,
12679 },
12680 )),
12681 )));
12682 Ok(Expression::Cast(Box::new(Cast {
12683 this: minus_day,
12684 to: DataType::Date,
12685 trailing_comments: vec![],
12686 double_colon_syntax: false,
12687 format: None,
12688 default: None,
12689 inferred_type: None,
12690 })))
12691 }
12692 DialectType::Presto => {
12693 // LAST_DAY(date) -> LAST_DAY_OF_MONTH(date)
12694 Ok(Expression::Function(Box::new(Function::new(
12695 "LAST_DAY_OF_MONTH".to_string(),
12696 vec![ld.this],
12697 ))))
12698 }
12699 DialectType::ClickHouse => {
12700 // ClickHouse LAST_DAY(CAST(x AS Nullable(DATE)))
12701 // Need to wrap the DATE type in Nullable
12702 let nullable_date = match ld.this {
12703 Expression::Cast(mut c) => {
12704 c.to = DataType::Nullable {
12705 inner: Box::new(DataType::Date),
12706 };
12707 Expression::Cast(c)
12708 }
12709 other => other,
12710 };
12711 ld.this = nullable_date;
12712 Ok(Expression::LastDay(ld))
12713 }
12714 _ => Ok(Expression::LastDay(ld)),
12715 }
12716 } else {
12717 Ok(e)
12718 }
12719 }
12720
12721 Action::BigQueryCastFormat => {
12722 // CAST(x AS DATE FORMAT 'fmt') -> PARSE_DATE('%m/%d/%Y', x) for BigQuery
12723 // CAST(x AS TIMESTAMP FORMAT 'fmt') -> PARSE_TIMESTAMP(...) for BigQuery
12724 // SAFE_CAST(x AS DATE FORMAT 'fmt') -> CAST(TRY_STRPTIME(x, ...) AS DATE) for DuckDB
12725 let (this, to, format_expr, is_safe) = match e {
12726 Expression::Cast(ref c) if c.format.is_some() => (
12727 c.this.clone(),
12728 c.to.clone(),
12729 c.format.as_ref().unwrap().as_ref().clone(),
12730 false,
12731 ),
12732 Expression::SafeCast(ref c) if c.format.is_some() => (
12733 c.this.clone(),
12734 c.to.clone(),
12735 c.format.as_ref().unwrap().as_ref().clone(),
12736 true,
12737 ),
12738 _ => return Ok(e),
12739 };
12740 // For CAST(x AS STRING FORMAT ...) when target is BigQuery, keep as-is
12741 if matches!(target, DialectType::BigQuery) {
12742 match &to {
12743 DataType::String { .. } | DataType::VarChar { .. } | DataType::Text => {
12744 // CAST(x AS STRING FORMAT 'fmt') stays as CAST expression for BigQuery
12745 return Ok(e);
12746 }
12747 _ => {}
12748 }
12749 }
12750 // Extract timezone from format if AT TIME ZONE is present
12751 let (actual_format_expr, timezone) = match &format_expr {
12752 Expression::AtTimeZone(ref atz) => {
12753 (atz.this.clone(), Some(atz.zone.clone()))
12754 }
12755 _ => (format_expr.clone(), None),
12756 };
12757 let strftime_fmt = Self::bq_cast_format_to_strftime(&actual_format_expr);
12758 match target {
12759 DialectType::BigQuery => {
12760 // CAST(x AS DATE FORMAT 'fmt') -> PARSE_DATE(strftime_fmt, x)
12761 // CAST(x AS TIMESTAMP FORMAT 'fmt' AT TIME ZONE 'tz') -> PARSE_TIMESTAMP(strftime_fmt, x, tz)
12762 let func_name = match &to {
12763 DataType::Date => "PARSE_DATE",
12764 DataType::Timestamp { .. } => "PARSE_TIMESTAMP",
12765 DataType::Time { .. } => "PARSE_TIMESTAMP",
12766 _ => "PARSE_TIMESTAMP",
12767 };
12768 let mut func_args = vec![strftime_fmt, this];
12769 if let Some(tz) = timezone {
12770 func_args.push(tz);
12771 }
12772 Ok(Expression::Function(Box::new(Function::new(
12773 func_name.to_string(),
12774 func_args,
12775 ))))
12776 }
12777 DialectType::DuckDB => {
12778 // SAFE_CAST(x AS DATE FORMAT 'fmt') -> CAST(TRY_STRPTIME(x, fmt) AS DATE)
12779 // CAST(x AS DATE FORMAT 'fmt') -> CAST(STRPTIME(x, fmt) AS DATE)
12780 let duck_fmt = Self::bq_format_to_duckdb(&strftime_fmt);
12781 let parse_fn_name = if is_safe { "TRY_STRPTIME" } else { "STRPTIME" };
12782 let parse_call = Expression::Function(Box::new(Function::new(
12783 parse_fn_name.to_string(),
12784 vec![this, duck_fmt],
12785 )));
12786 Ok(Expression::Cast(Box::new(Cast {
12787 this: parse_call,
12788 to,
12789 trailing_comments: vec![],
12790 double_colon_syntax: false,
12791 format: None,
12792 default: None,
12793 inferred_type: None,
12794 })))
12795 }
12796 _ => Ok(e),
12797 }
12798 }
12799
12800 Action::BigQueryFunctionNormalize => {
12801 Self::normalize_bigquery_function(e, source, target)
12802 }
12803
12804 Action::BigQueryToHexBare => {
12805 // Not used anymore - handled directly in normalize_bigquery_function
12806 Ok(e)
12807 }
12808
12809 Action::BigQueryToHexLower => {
12810 if let Expression::Lower(uf) = e {
12811 match uf.this {
12812 // BQ->BQ: LOWER(TO_HEX(x)) -> TO_HEX(x)
12813 Expression::Function(f)
12814 if matches!(target, DialectType::BigQuery)
12815 && f.name == "TO_HEX" =>
12816 {
12817 Ok(Expression::Function(f))
12818 }
12819 // LOWER(LOWER(HEX/TO_HEX(x))) patterns
12820 Expression::Lower(inner_uf) => {
12821 if matches!(target, DialectType::BigQuery) {
12822 // BQ->BQ: extract TO_HEX
12823 if let Expression::Function(f) = inner_uf.this {
12824 Ok(Expression::Function(Box::new(Function::new(
12825 "TO_HEX".to_string(),
12826 f.args,
12827 ))))
12828 } else {
12829 Ok(Expression::Lower(inner_uf))
12830 }
12831 } else {
12832 // Flatten: LOWER(LOWER(x)) -> LOWER(x)
12833 Ok(Expression::Lower(inner_uf))
12834 }
12835 }
12836 other => {
12837 Ok(Expression::Lower(Box::new(crate::expressions::UnaryFunc {
12838 this: other,
12839 original_name: None,
12840 inferred_type: None,
12841 })))
12842 }
12843 }
12844 } else {
12845 Ok(e)
12846 }
12847 }
12848
12849 Action::BigQueryToHexUpper => {
12850 // UPPER(LOWER(HEX(x))) -> HEX(x) (UPPER cancels LOWER, HEX is already uppercase)
12851 // UPPER(LOWER(TO_HEX(x))) -> TO_HEX(x) for Presto/Trino
12852 if let Expression::Upper(uf) = e {
12853 if let Expression::Lower(inner_uf) = uf.this {
12854 // For BQ->BQ: UPPER(TO_HEX(x)) should stay as UPPER(TO_HEX(x))
12855 if matches!(target, DialectType::BigQuery) {
12856 // Restore TO_HEX name in inner function
12857 if let Expression::Function(f) = inner_uf.this {
12858 let restored = Expression::Function(Box::new(Function::new(
12859 "TO_HEX".to_string(),
12860 f.args,
12861 )));
12862 Ok(Expression::Upper(Box::new(
12863 crate::expressions::UnaryFunc::new(restored),
12864 )))
12865 } else {
12866 Ok(Expression::Upper(inner_uf))
12867 }
12868 } else {
12869 // Extract the inner HEX/TO_HEX function (UPPER(LOWER(x)) = x when HEX is uppercase)
12870 Ok(inner_uf.this)
12871 }
12872 } else {
12873 Ok(Expression::Upper(uf))
12874 }
12875 } else {
12876 Ok(e)
12877 }
12878 }
12879
12880 Action::BigQueryAnyValueHaving => {
12881 // ANY_VALUE(x HAVING MAX y) -> ARG_MAX_NULL(x, y)
12882 // ANY_VALUE(x HAVING MIN y) -> ARG_MIN_NULL(x, y)
12883 if let Expression::AnyValue(agg) = e {
12884 if let Some((having_expr, is_max)) = agg.having_max {
12885 let func_name = if is_max {
12886 "ARG_MAX_NULL"
12887 } else {
12888 "ARG_MIN_NULL"
12889 };
12890 Ok(Expression::Function(Box::new(Function::new(
12891 func_name.to_string(),
12892 vec![agg.this, *having_expr],
12893 ))))
12894 } else {
12895 Ok(Expression::AnyValue(agg))
12896 }
12897 } else {
12898 Ok(e)
12899 }
12900 }
12901
12902 Action::BigQueryApproxQuantiles => {
12903 // APPROX_QUANTILES(x, n) -> APPROX_QUANTILE(x, [0, 1/n, 2/n, ..., 1])
12904 // APPROX_QUANTILES(DISTINCT x, n) -> APPROX_QUANTILE(DISTINCT x, [0, 1/n, ..., 1])
12905 if let Expression::AggregateFunction(agg) = e {
12906 if agg.args.len() >= 2 {
12907 let x_expr = agg.args[0].clone();
12908 let n_expr = &agg.args[1];
12909
12910 // Extract the numeric value from n_expr
12911 let n = match n_expr {
12912 Expression::Literal(lit)
12913 if matches!(
12914 lit.as_ref(),
12915 crate::expressions::Literal::Number(_)
12916 ) =>
12917 {
12918 let crate::expressions::Literal::Number(s) = lit.as_ref()
12919 else {
12920 unreachable!()
12921 };
12922 s.parse::<usize>().unwrap_or(2)
12923 }
12924 _ => 2,
12925 };
12926
12927 // Generate quantile array: [0, 1/n, 2/n, ..., 1]
12928 let mut quantiles = Vec::new();
12929 for i in 0..=n {
12930 let q = i as f64 / n as f64;
12931 // Format nicely: 0 -> 0, 0.25 -> 0.25, 1 -> 1
12932 if q == 0.0 {
12933 quantiles.push(Expression::number(0));
12934 } else if q == 1.0 {
12935 quantiles.push(Expression::number(1));
12936 } else {
12937 quantiles.push(Expression::Literal(Box::new(
12938 crate::expressions::Literal::Number(format!("{}", q)),
12939 )));
12940 }
12941 }
12942
12943 let array_expr =
12944 Expression::Array(Box::new(crate::expressions::Array {
12945 expressions: quantiles,
12946 }));
12947
12948 // Preserve DISTINCT modifier
12949 let mut new_func = Function::new(
12950 "APPROX_QUANTILE".to_string(),
12951 vec![x_expr, array_expr],
12952 );
12953 new_func.distinct = agg.distinct;
12954 Ok(Expression::Function(Box::new(new_func)))
12955 } else {
12956 Ok(Expression::AggregateFunction(agg))
12957 }
12958 } else {
12959 Ok(e)
12960 }
12961 }
12962
12963 Action::GenericFunctionNormalize => {
12964 // Helper closure to convert ARBITRARY to target-specific function
12965 fn convert_arbitrary(arg: Expression, target: DialectType) -> Expression {
12966 let name = match target {
12967 DialectType::ClickHouse => "any",
12968 DialectType::TSQL | DialectType::SQLite => "MAX",
12969 DialectType::Hive => "FIRST",
12970 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
12971 "ARBITRARY"
12972 }
12973 _ => "ANY_VALUE",
12974 };
12975 Expression::Function(Box::new(Function::new(name.to_string(), vec![arg])))
12976 }
12977
12978 if let Expression::Function(f) = e {
12979 let name = f.name.to_ascii_uppercase();
12980 match name.as_str() {
12981 "ARBITRARY" if f.args.len() == 1 => {
12982 let arg = f.args.into_iter().next().unwrap();
12983 Ok(convert_arbitrary(arg, target))
12984 }
12985 "TO_NUMBER" if f.args.len() == 1 => {
12986 let arg = f.args.into_iter().next().unwrap();
12987 match target {
12988 DialectType::Oracle | DialectType::Snowflake => {
12989 Ok(Expression::Function(Box::new(Function::new(
12990 "TO_NUMBER".to_string(),
12991 vec![arg],
12992 ))))
12993 }
12994 _ => Ok(Expression::Cast(Box::new(crate::expressions::Cast {
12995 this: arg,
12996 to: crate::expressions::DataType::Double {
12997 precision: None,
12998 scale: None,
12999 },
13000 double_colon_syntax: false,
13001 trailing_comments: Vec::new(),
13002 format: None,
13003 default: None,
13004 inferred_type: None,
13005 }))),
13006 }
13007 }
13008 "AGGREGATE" if f.args.len() >= 3 => match target {
13009 DialectType::DuckDB
13010 | DialectType::Hive
13011 | DialectType::Presto
13012 | DialectType::Trino => Ok(Expression::Function(Box::new(
13013 Function::new("REDUCE".to_string(), f.args),
13014 ))),
13015 _ => Ok(Expression::Function(f)),
13016 },
13017 // REGEXP_MATCHES(x, y) -> RegexpLike for most targets, keep as-is for DuckDB
13018 "REGEXP_MATCHES" if f.args.len() >= 2 => {
13019 if matches!(target, DialectType::DuckDB) {
13020 Ok(Expression::Function(f))
13021 } else {
13022 let mut args = f.args;
13023 let this = args.remove(0);
13024 let pattern = args.remove(0);
13025 let flags = if args.is_empty() {
13026 None
13027 } else {
13028 Some(args.remove(0))
13029 };
13030 Ok(Expression::RegexpLike(Box::new(
13031 crate::expressions::RegexpFunc {
13032 this,
13033 pattern,
13034 flags,
13035 },
13036 )))
13037 }
13038 }
13039 // REGEXP_FULL_MATCH (Hive REGEXP) -> RegexpLike
13040 "REGEXP_FULL_MATCH" if f.args.len() >= 2 => {
13041 if matches!(target, DialectType::DuckDB) {
13042 Ok(Expression::Function(f))
13043 } else {
13044 let mut args = f.args;
13045 let this = args.remove(0);
13046 let pattern = args.remove(0);
13047 let flags = if args.is_empty() {
13048 None
13049 } else {
13050 Some(args.remove(0))
13051 };
13052 Ok(Expression::RegexpLike(Box::new(
13053 crate::expressions::RegexpFunc {
13054 this,
13055 pattern,
13056 flags,
13057 },
13058 )))
13059 }
13060 }
13061 // STRUCT_EXTRACT(x, 'field') -> x.field (StructExtract expression)
13062 "STRUCT_EXTRACT" if f.args.len() == 2 => {
13063 let mut args = f.args;
13064 let this = args.remove(0);
13065 let field_expr = args.remove(0);
13066 // Extract string literal to get field name
13067 let field_name = match &field_expr {
13068 Expression::Literal(lit)
13069 if matches!(
13070 lit.as_ref(),
13071 crate::expressions::Literal::String(_)
13072 ) =>
13073 {
13074 let crate::expressions::Literal::String(s) = lit.as_ref()
13075 else {
13076 unreachable!()
13077 };
13078 s.clone()
13079 }
13080 Expression::Identifier(id) => id.name.clone(),
13081 _ => {
13082 return Ok(Expression::Function(Box::new(Function::new(
13083 "STRUCT_EXTRACT".to_string(),
13084 vec![this, field_expr],
13085 ))))
13086 }
13087 };
13088 Ok(Expression::StructExtract(Box::new(
13089 crate::expressions::StructExtractFunc {
13090 this,
13091 field: crate::expressions::Identifier::new(field_name),
13092 },
13093 )))
13094 }
13095 // LIST_FILTER([4,5,6], x -> x > 4) -> FILTER(ARRAY(4,5,6), x -> x > 4)
13096 "LIST_FILTER" if f.args.len() == 2 => {
13097 let name = match target {
13098 DialectType::DuckDB => "LIST_FILTER",
13099 _ => "FILTER",
13100 };
13101 Ok(Expression::Function(Box::new(Function::new(
13102 name.to_string(),
13103 f.args,
13104 ))))
13105 }
13106 // LIST_TRANSFORM(x, y -> y + 1) -> TRANSFORM(x, y -> y + 1)
13107 "LIST_TRANSFORM" if f.args.len() == 2 => {
13108 let name = match target {
13109 DialectType::DuckDB => "LIST_TRANSFORM",
13110 _ => "TRANSFORM",
13111 };
13112 Ok(Expression::Function(Box::new(Function::new(
13113 name.to_string(),
13114 f.args,
13115 ))))
13116 }
13117 // LIST_SORT(x) -> LIST_SORT(x) for DuckDB, ARRAY_SORT(x) for Presto/Trino, SORT_ARRAY(x) for others
13118 "LIST_SORT" if f.args.len() >= 1 => {
13119 let name = match target {
13120 DialectType::DuckDB => "LIST_SORT",
13121 DialectType::Presto | DialectType::Trino => "ARRAY_SORT",
13122 _ => "SORT_ARRAY",
13123 };
13124 Ok(Expression::Function(Box::new(Function::new(
13125 name.to_string(),
13126 f.args,
13127 ))))
13128 }
13129 // LIST_REVERSE_SORT(x) -> SORT_ARRAY(x, FALSE) for Spark/Hive, ARRAY_SORT(x, lambda) for Presto
13130 "LIST_REVERSE_SORT" if f.args.len() >= 1 => {
13131 match target {
13132 DialectType::DuckDB => Ok(Expression::Function(Box::new(
13133 Function::new("ARRAY_REVERSE_SORT".to_string(), f.args),
13134 ))),
13135 DialectType::Spark
13136 | DialectType::Databricks
13137 | DialectType::Hive => {
13138 let mut args = f.args;
13139 args.push(Expression::Identifier(
13140 crate::expressions::Identifier::new("FALSE"),
13141 ));
13142 Ok(Expression::Function(Box::new(Function::new(
13143 "SORT_ARRAY".to_string(),
13144 args,
13145 ))))
13146 }
13147 DialectType::Presto
13148 | DialectType::Trino
13149 | DialectType::Athena => {
13150 // ARRAY_SORT(x, (a, b) -> CASE WHEN a < b THEN 1 WHEN a > b THEN -1 ELSE 0 END)
13151 let arr = f.args.into_iter().next().unwrap();
13152 let lambda = Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
13153 parameters: vec![
13154 crate::expressions::Identifier::new("a"),
13155 crate::expressions::Identifier::new("b"),
13156 ],
13157 body: Expression::Case(Box::new(Case {
13158 operand: None,
13159 whens: vec![
13160 (
13161 Expression::Lt(Box::new(BinaryOp::new(
13162 Expression::Identifier(crate::expressions::Identifier::new("a")),
13163 Expression::Identifier(crate::expressions::Identifier::new("b")),
13164 ))),
13165 Expression::number(1),
13166 ),
13167 (
13168 Expression::Gt(Box::new(BinaryOp::new(
13169 Expression::Identifier(crate::expressions::Identifier::new("a")),
13170 Expression::Identifier(crate::expressions::Identifier::new("b")),
13171 ))),
13172 Expression::Literal(Box::new(Literal::Number("-1".to_string()))),
13173 ),
13174 ],
13175 else_: Some(Expression::number(0)),
13176 comments: Vec::new(),
13177 inferred_type: None,
13178 })),
13179 colon: false,
13180 parameter_types: Vec::new(),
13181 }));
13182 Ok(Expression::Function(Box::new(Function::new(
13183 "ARRAY_SORT".to_string(),
13184 vec![arr, lambda],
13185 ))))
13186 }
13187 _ => Ok(Expression::Function(Box::new(Function::new(
13188 "LIST_REVERSE_SORT".to_string(),
13189 f.args,
13190 )))),
13191 }
13192 }
13193 // SPLIT_TO_ARRAY(x) with 1 arg -> add default ',' separator and rename
13194 "SPLIT_TO_ARRAY" if f.args.len() == 1 => {
13195 let mut args = f.args;
13196 args.push(Expression::string(","));
13197 let name = match target {
13198 DialectType::DuckDB => "STR_SPLIT",
13199 DialectType::Presto | DialectType::Trino => "SPLIT",
13200 DialectType::Spark
13201 | DialectType::Databricks
13202 | DialectType::Hive => "SPLIT",
13203 DialectType::PostgreSQL => "STRING_TO_ARRAY",
13204 DialectType::Redshift => "SPLIT_TO_ARRAY",
13205 _ => "SPLIT",
13206 };
13207 Ok(Expression::Function(Box::new(Function::new(
13208 name.to_string(),
13209 args,
13210 ))))
13211 }
13212 // SPLIT_TO_ARRAY(x, sep) with 2 args -> rename based on target
13213 "SPLIT_TO_ARRAY" if f.args.len() == 2 => {
13214 let name = match target {
13215 DialectType::DuckDB => "STR_SPLIT",
13216 DialectType::Presto | DialectType::Trino => "SPLIT",
13217 DialectType::Spark
13218 | DialectType::Databricks
13219 | DialectType::Hive => "SPLIT",
13220 DialectType::PostgreSQL => "STRING_TO_ARRAY",
13221 DialectType::Redshift => "SPLIT_TO_ARRAY",
13222 _ => "SPLIT",
13223 };
13224 Ok(Expression::Function(Box::new(Function::new(
13225 name.to_string(),
13226 f.args,
13227 ))))
13228 }
13229 // STRING_TO_ARRAY/STR_SPLIT -> target-specific split function
13230 "STRING_TO_ARRAY" | "STR_SPLIT" if f.args.len() >= 2 => {
13231 let name = match target {
13232 DialectType::DuckDB => "STR_SPLIT",
13233 DialectType::Presto | DialectType::Trino => "SPLIT",
13234 DialectType::Spark
13235 | DialectType::Databricks
13236 | DialectType::Hive => "SPLIT",
13237 DialectType::Doris | DialectType::StarRocks => {
13238 "SPLIT_BY_STRING"
13239 }
13240 DialectType::PostgreSQL | DialectType::Redshift => {
13241 "STRING_TO_ARRAY"
13242 }
13243 _ => "SPLIT",
13244 };
13245 // For Spark/Hive, SPLIT uses regex - need to escape literal with \Q...\E
13246 if matches!(
13247 target,
13248 DialectType::Spark
13249 | DialectType::Databricks
13250 | DialectType::Hive
13251 ) {
13252 let mut args = f.args;
13253 let x = args.remove(0);
13254 let sep = args.remove(0);
13255 // Wrap separator in CONCAT('\\Q', sep, '\\E')
13256 let escaped_sep =
13257 Expression::Function(Box::new(Function::new(
13258 "CONCAT".to_string(),
13259 vec![
13260 Expression::string("\\Q"),
13261 sep,
13262 Expression::string("\\E"),
13263 ],
13264 )));
13265 Ok(Expression::Function(Box::new(Function::new(
13266 name.to_string(),
13267 vec![x, escaped_sep],
13268 ))))
13269 } else {
13270 Ok(Expression::Function(Box::new(Function::new(
13271 name.to_string(),
13272 f.args,
13273 ))))
13274 }
13275 }
13276 // STR_SPLIT_REGEX(x, 'a') / REGEXP_SPLIT(x, 'a') -> target-specific regex split
13277 "STR_SPLIT_REGEX" | "REGEXP_SPLIT" if f.args.len() == 2 => {
13278 let name = match target {
13279 DialectType::DuckDB => "STR_SPLIT_REGEX",
13280 DialectType::Presto | DialectType::Trino => "REGEXP_SPLIT",
13281 DialectType::Spark
13282 | DialectType::Databricks
13283 | DialectType::Hive => "SPLIT",
13284 _ => "REGEXP_SPLIT",
13285 };
13286 Ok(Expression::Function(Box::new(Function::new(
13287 name.to_string(),
13288 f.args,
13289 ))))
13290 }
13291 // SPLIT(str, delim) from Snowflake -> DuckDB with CASE wrapper
13292 "SPLIT"
13293 if f.args.len() == 2
13294 && matches!(source, DialectType::Snowflake)
13295 && matches!(target, DialectType::DuckDB) =>
13296 {
13297 let mut args = f.args;
13298 let str_arg = args.remove(0);
13299 let delim_arg = args.remove(0);
13300
13301 // STR_SPLIT(str, delim) as the base
13302 let base_func = Expression::Function(Box::new(Function::new(
13303 "STR_SPLIT".to_string(),
13304 vec![str_arg.clone(), delim_arg.clone()],
13305 )));
13306
13307 // [str] - array with single element
13308 let array_with_input =
13309 Expression::Array(Box::new(crate::expressions::Array {
13310 expressions: vec![str_arg],
13311 }));
13312
13313 // CASE
13314 // WHEN delim IS NULL THEN NULL
13315 // WHEN delim = '' THEN [str]
13316 // ELSE STR_SPLIT(str, delim)
13317 // END
13318 Ok(Expression::Case(Box::new(Case {
13319 operand: None,
13320 whens: vec![
13321 (
13322 Expression::Is(Box::new(BinaryOp {
13323 left: delim_arg.clone(),
13324 right: Expression::Null(Null),
13325 left_comments: vec![],
13326 operator_comments: vec![],
13327 trailing_comments: vec![],
13328 inferred_type: None,
13329 })),
13330 Expression::Null(Null),
13331 ),
13332 (
13333 Expression::Eq(Box::new(BinaryOp {
13334 left: delim_arg,
13335 right: Expression::string(""),
13336 left_comments: vec![],
13337 operator_comments: vec![],
13338 trailing_comments: vec![],
13339 inferred_type: None,
13340 })),
13341 array_with_input,
13342 ),
13343 ],
13344 else_: Some(base_func),
13345 comments: vec![],
13346 inferred_type: None,
13347 })))
13348 }
13349 // SPLIT(x, sep) from Presto/StarRocks/Doris -> target-specific split with regex escaping for Hive/Spark
13350 "SPLIT"
13351 if f.args.len() == 2
13352 && matches!(
13353 source,
13354 DialectType::Presto
13355 | DialectType::Trino
13356 | DialectType::Athena
13357 | DialectType::StarRocks
13358 | DialectType::Doris
13359 )
13360 && matches!(
13361 target,
13362 DialectType::Spark
13363 | DialectType::Databricks
13364 | DialectType::Hive
13365 ) =>
13366 {
13367 // Presto/StarRocks SPLIT is literal, Hive/Spark SPLIT is regex
13368 let mut args = f.args;
13369 let x = args.remove(0);
13370 let sep = args.remove(0);
13371 let escaped_sep = Expression::Function(Box::new(Function::new(
13372 "CONCAT".to_string(),
13373 vec![Expression::string("\\Q"), sep, Expression::string("\\E")],
13374 )));
13375 Ok(Expression::Function(Box::new(Function::new(
13376 "SPLIT".to_string(),
13377 vec![x, escaped_sep],
13378 ))))
13379 }
13380 // SUBSTRINGINDEX -> SUBSTRING_INDEX (ClickHouse camelCase to standard)
13381 // For ClickHouse target, preserve original name to maintain camelCase
13382 "SUBSTRINGINDEX" => {
13383 let name = if matches!(target, DialectType::ClickHouse) {
13384 f.name.clone()
13385 } else {
13386 "SUBSTRING_INDEX".to_string()
13387 };
13388 Ok(Expression::Function(Box::new(Function::new(name, f.args))))
13389 }
13390 // ARRAY_LENGTH/SIZE/CARDINALITY -> target-specific array length function
13391 "ARRAY_LENGTH" | "SIZE" | "CARDINALITY" => {
13392 // DuckDB source CARDINALITY -> DuckDB target: keep as CARDINALITY (used for maps)
13393 if name == "CARDINALITY"
13394 && matches!(source, DialectType::DuckDB)
13395 && matches!(target, DialectType::DuckDB)
13396 {
13397 return Ok(Expression::Function(f));
13398 }
13399 // Get the array argument (first arg, drop dimension args)
13400 let mut args = f.args;
13401 let arr = if args.is_empty() {
13402 return Ok(Expression::Function(Box::new(Function::new(
13403 name.to_string(),
13404 args,
13405 ))));
13406 } else {
13407 args.remove(0)
13408 };
13409 let name =
13410 match target {
13411 DialectType::Spark
13412 | DialectType::Databricks
13413 | DialectType::Hive => "SIZE",
13414 DialectType::Presto | DialectType::Trino => "CARDINALITY",
13415 DialectType::BigQuery => "ARRAY_LENGTH",
13416 DialectType::DuckDB => {
13417 // DuckDB: use ARRAY_LENGTH with all args
13418 let mut all_args = vec![arr];
13419 all_args.extend(args);
13420 return Ok(Expression::Function(Box::new(
13421 Function::new("ARRAY_LENGTH".to_string(), all_args),
13422 )));
13423 }
13424 DialectType::PostgreSQL | DialectType::Redshift => {
13425 // Keep ARRAY_LENGTH with dimension arg
13426 let mut all_args = vec![arr];
13427 all_args.extend(args);
13428 return Ok(Expression::Function(Box::new(
13429 Function::new("ARRAY_LENGTH".to_string(), all_args),
13430 )));
13431 }
13432 DialectType::ClickHouse => "LENGTH",
13433 _ => "ARRAY_LENGTH",
13434 };
13435 Ok(Expression::Function(Box::new(Function::new(
13436 name.to_string(),
13437 vec![arr],
13438 ))))
13439 }
13440 // TO_VARIANT(x) -> CAST(x AS VARIANT) for DuckDB
13441 "TO_VARIANT" if f.args.len() == 1 => match target {
13442 DialectType::DuckDB => {
13443 let arg = f.args.into_iter().next().unwrap();
13444 Ok(Expression::Cast(Box::new(Cast {
13445 this: arg,
13446 to: DataType::Custom {
13447 name: "VARIANT".to_string(),
13448 },
13449 double_colon_syntax: false,
13450 trailing_comments: Vec::new(),
13451 format: None,
13452 default: None,
13453 inferred_type: None,
13454 })))
13455 }
13456 _ => Ok(Expression::Function(f)),
13457 },
13458 // JSON_GROUP_ARRAY(x) -> JSON_AGG(x) for PostgreSQL
13459 "JSON_GROUP_ARRAY" if f.args.len() == 1 => match target {
13460 DialectType::PostgreSQL => Ok(Expression::Function(Box::new(
13461 Function::new("JSON_AGG".to_string(), f.args),
13462 ))),
13463 _ => Ok(Expression::Function(f)),
13464 },
13465 // JSON_GROUP_OBJECT(key, value) -> JSON_OBJECT_AGG(key, value) for PostgreSQL
13466 "JSON_GROUP_OBJECT" if f.args.len() == 2 => match target {
13467 DialectType::PostgreSQL => Ok(Expression::Function(Box::new(
13468 Function::new("JSON_OBJECT_AGG".to_string(), f.args),
13469 ))),
13470 _ => Ok(Expression::Function(f)),
13471 },
13472 // UNICODE(x) -> target-specific codepoint function
13473 "UNICODE" if f.args.len() == 1 => {
13474 match target {
13475 DialectType::SQLite | DialectType::DuckDB => {
13476 Ok(Expression::Function(Box::new(Function::new(
13477 "UNICODE".to_string(),
13478 f.args,
13479 ))))
13480 }
13481 DialectType::Oracle => {
13482 // ASCII(UNISTR(x))
13483 let inner = Expression::Function(Box::new(Function::new(
13484 "UNISTR".to_string(),
13485 f.args,
13486 )));
13487 Ok(Expression::Function(Box::new(Function::new(
13488 "ASCII".to_string(),
13489 vec![inner],
13490 ))))
13491 }
13492 DialectType::MySQL => {
13493 // ORD(CONVERT(x USING utf32))
13494 let arg = f.args.into_iter().next().unwrap();
13495 let convert_expr = Expression::ConvertToCharset(Box::new(
13496 crate::expressions::ConvertToCharset {
13497 this: Box::new(arg),
13498 dest: Some(Box::new(Expression::Identifier(
13499 crate::expressions::Identifier::new("utf32"),
13500 ))),
13501 source: None,
13502 },
13503 ));
13504 Ok(Expression::Function(Box::new(Function::new(
13505 "ORD".to_string(),
13506 vec![convert_expr],
13507 ))))
13508 }
13509 _ => Ok(Expression::Function(Box::new(Function::new(
13510 "ASCII".to_string(),
13511 f.args,
13512 )))),
13513 }
13514 }
13515 // XOR(a, b, ...) -> a XOR b XOR ... for MySQL, BITWISE_XOR for Presto/Trino, # for PostgreSQL, ^ for BigQuery
13516 "XOR" if f.args.len() >= 2 => {
13517 match target {
13518 DialectType::ClickHouse => {
13519 // ClickHouse: keep as xor() function with lowercase name
13520 Ok(Expression::Function(Box::new(Function::new(
13521 "xor".to_string(),
13522 f.args,
13523 ))))
13524 }
13525 DialectType::Presto | DialectType::Trino => {
13526 if f.args.len() == 2 {
13527 Ok(Expression::Function(Box::new(Function::new(
13528 "BITWISE_XOR".to_string(),
13529 f.args,
13530 ))))
13531 } else {
13532 // Nest: BITWISE_XOR(BITWISE_XOR(a, b), c)
13533 let mut args = f.args;
13534 let first = args.remove(0);
13535 let second = args.remove(0);
13536 let mut result =
13537 Expression::Function(Box::new(Function::new(
13538 "BITWISE_XOR".to_string(),
13539 vec![first, second],
13540 )));
13541 for arg in args {
13542 result =
13543 Expression::Function(Box::new(Function::new(
13544 "BITWISE_XOR".to_string(),
13545 vec![result, arg],
13546 )));
13547 }
13548 Ok(result)
13549 }
13550 }
13551 DialectType::MySQL
13552 | DialectType::SingleStore
13553 | DialectType::Doris
13554 | DialectType::StarRocks => {
13555 // Convert XOR(a, b, c) -> Expression::Xor with expressions list
13556 let args = f.args;
13557 Ok(Expression::Xor(Box::new(crate::expressions::Xor {
13558 this: None,
13559 expression: None,
13560 expressions: args,
13561 })))
13562 }
13563 DialectType::PostgreSQL | DialectType::Redshift => {
13564 // PostgreSQL: a # b (hash operator for XOR)
13565 let mut args = f.args;
13566 let first = args.remove(0);
13567 let second = args.remove(0);
13568 let mut result = Expression::BitwiseXor(Box::new(
13569 BinaryOp::new(first, second),
13570 ));
13571 for arg in args {
13572 result = Expression::BitwiseXor(Box::new(
13573 BinaryOp::new(result, arg),
13574 ));
13575 }
13576 Ok(result)
13577 }
13578 DialectType::DuckDB => {
13579 // DuckDB: keep as XOR function (DuckDB ^ is Power, not XOR)
13580 Ok(Expression::Function(Box::new(Function::new(
13581 "XOR".to_string(),
13582 f.args,
13583 ))))
13584 }
13585 DialectType::BigQuery => {
13586 // BigQuery: a ^ b (caret operator for XOR)
13587 let mut args = f.args;
13588 let first = args.remove(0);
13589 let second = args.remove(0);
13590 let mut result = Expression::BitwiseXor(Box::new(
13591 BinaryOp::new(first, second),
13592 ));
13593 for arg in args {
13594 result = Expression::BitwiseXor(Box::new(
13595 BinaryOp::new(result, arg),
13596 ));
13597 }
13598 Ok(result)
13599 }
13600 _ => Ok(Expression::Function(Box::new(Function::new(
13601 "XOR".to_string(),
13602 f.args,
13603 )))),
13604 }
13605 }
13606 // ARRAY_REVERSE_SORT(x) -> SORT_ARRAY(x, FALSE) for Spark/Hive, ARRAY_SORT(x, lambda) for Presto
13607 "ARRAY_REVERSE_SORT" if f.args.len() >= 1 => {
13608 match target {
13609 DialectType::Spark
13610 | DialectType::Databricks
13611 | DialectType::Hive => {
13612 let mut args = f.args;
13613 args.push(Expression::Identifier(
13614 crate::expressions::Identifier::new("FALSE"),
13615 ));
13616 Ok(Expression::Function(Box::new(Function::new(
13617 "SORT_ARRAY".to_string(),
13618 args,
13619 ))))
13620 }
13621 DialectType::Presto
13622 | DialectType::Trino
13623 | DialectType::Athena => {
13624 // ARRAY_SORT(x, (a, b) -> CASE WHEN a < b THEN 1 WHEN a > b THEN -1 ELSE 0 END)
13625 let arr = f.args.into_iter().next().unwrap();
13626 let lambda = Expression::Lambda(Box::new(
13627 crate::expressions::LambdaExpr {
13628 parameters: vec![
13629 Identifier::new("a"),
13630 Identifier::new("b"),
13631 ],
13632 colon: false,
13633 parameter_types: Vec::new(),
13634 body: Expression::Case(Box::new(Case {
13635 operand: None,
13636 whens: vec![
13637 (
13638 Expression::Lt(Box::new(
13639 BinaryOp::new(
13640 Expression::Identifier(
13641 Identifier::new("a"),
13642 ),
13643 Expression::Identifier(
13644 Identifier::new("b"),
13645 ),
13646 ),
13647 )),
13648 Expression::number(1),
13649 ),
13650 (
13651 Expression::Gt(Box::new(
13652 BinaryOp::new(
13653 Expression::Identifier(
13654 Identifier::new("a"),
13655 ),
13656 Expression::Identifier(
13657 Identifier::new("b"),
13658 ),
13659 ),
13660 )),
13661 Expression::Neg(Box::new(
13662 crate::expressions::UnaryOp {
13663 this: Expression::number(1),
13664 inferred_type: None,
13665 },
13666 )),
13667 ),
13668 ],
13669 else_: Some(Expression::number(0)),
13670 comments: Vec::new(),
13671 inferred_type: None,
13672 })),
13673 },
13674 ));
13675 Ok(Expression::Function(Box::new(Function::new(
13676 "ARRAY_SORT".to_string(),
13677 vec![arr, lambda],
13678 ))))
13679 }
13680 _ => Ok(Expression::Function(Box::new(Function::new(
13681 "ARRAY_REVERSE_SORT".to_string(),
13682 f.args,
13683 )))),
13684 }
13685 }
13686 // ENCODE(x) -> ENCODE(x, 'utf-8') for Spark/Hive, TO_UTF8(x) for Presto
13687 "ENCODE" if f.args.len() == 1 => match target {
13688 DialectType::Spark
13689 | DialectType::Databricks
13690 | DialectType::Hive => {
13691 let mut args = f.args;
13692 args.push(Expression::string("utf-8"));
13693 Ok(Expression::Function(Box::new(Function::new(
13694 "ENCODE".to_string(),
13695 args,
13696 ))))
13697 }
13698 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
13699 Ok(Expression::Function(Box::new(Function::new(
13700 "TO_UTF8".to_string(),
13701 f.args,
13702 ))))
13703 }
13704 _ => Ok(Expression::Function(Box::new(Function::new(
13705 "ENCODE".to_string(),
13706 f.args,
13707 )))),
13708 },
13709 // DECODE(x) -> DECODE(x, 'utf-8') for Spark/Hive, FROM_UTF8(x) for Presto
13710 "DECODE" if f.args.len() == 1 => match target {
13711 DialectType::Spark
13712 | DialectType::Databricks
13713 | DialectType::Hive => {
13714 let mut args = f.args;
13715 args.push(Expression::string("utf-8"));
13716 Ok(Expression::Function(Box::new(Function::new(
13717 "DECODE".to_string(),
13718 args,
13719 ))))
13720 }
13721 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
13722 Ok(Expression::Function(Box::new(Function::new(
13723 "FROM_UTF8".to_string(),
13724 f.args,
13725 ))))
13726 }
13727 _ => Ok(Expression::Function(Box::new(Function::new(
13728 "DECODE".to_string(),
13729 f.args,
13730 )))),
13731 },
13732 // QUANTILE(x, p) -> PERCENTILE(x, p) for Spark/Hive
13733 "QUANTILE" if f.args.len() == 2 => {
13734 let name = match target {
13735 DialectType::Spark
13736 | DialectType::Databricks
13737 | DialectType::Hive => "PERCENTILE",
13738 DialectType::Presto | DialectType::Trino => "APPROX_PERCENTILE",
13739 DialectType::BigQuery => "PERCENTILE_CONT",
13740 _ => "QUANTILE",
13741 };
13742 Ok(Expression::Function(Box::new(Function::new(
13743 name.to_string(),
13744 f.args,
13745 ))))
13746 }
13747 // QUANTILE_CONT(x, q) -> PERCENTILE_CONT(q) WITHIN GROUP (ORDER BY x) for PostgreSQL/Snowflake
13748 "QUANTILE_CONT" if f.args.len() == 2 => {
13749 let mut args = f.args;
13750 let column = args.remove(0);
13751 let quantile = args.remove(0);
13752 match target {
13753 DialectType::DuckDB => {
13754 Ok(Expression::Function(Box::new(Function::new(
13755 "QUANTILE_CONT".to_string(),
13756 vec![column, quantile],
13757 ))))
13758 }
13759 DialectType::PostgreSQL
13760 | DialectType::Redshift
13761 | DialectType::Snowflake => {
13762 // PERCENTILE_CONT(q) WITHIN GROUP (ORDER BY x)
13763 let inner = Expression::PercentileCont(Box::new(
13764 crate::expressions::PercentileFunc {
13765 this: column.clone(),
13766 percentile: quantile,
13767 order_by: None,
13768 filter: None,
13769 },
13770 ));
13771 Ok(Expression::WithinGroup(Box::new(
13772 crate::expressions::WithinGroup {
13773 this: inner,
13774 order_by: vec![crate::expressions::Ordered {
13775 this: column,
13776 desc: false,
13777 nulls_first: None,
13778 explicit_asc: false,
13779 with_fill: None,
13780 }],
13781 },
13782 )))
13783 }
13784 _ => Ok(Expression::Function(Box::new(Function::new(
13785 "QUANTILE_CONT".to_string(),
13786 vec![column, quantile],
13787 )))),
13788 }
13789 }
13790 // QUANTILE_DISC(x, q) -> PERCENTILE_DISC(q) WITHIN GROUP (ORDER BY x) for PostgreSQL/Snowflake
13791 "QUANTILE_DISC" if f.args.len() == 2 => {
13792 let mut args = f.args;
13793 let column = args.remove(0);
13794 let quantile = args.remove(0);
13795 match target {
13796 DialectType::DuckDB => {
13797 Ok(Expression::Function(Box::new(Function::new(
13798 "QUANTILE_DISC".to_string(),
13799 vec![column, quantile],
13800 ))))
13801 }
13802 DialectType::PostgreSQL
13803 | DialectType::Redshift
13804 | DialectType::Snowflake => {
13805 // PERCENTILE_DISC(q) WITHIN GROUP (ORDER BY x)
13806 let inner = Expression::PercentileDisc(Box::new(
13807 crate::expressions::PercentileFunc {
13808 this: column.clone(),
13809 percentile: quantile,
13810 order_by: None,
13811 filter: None,
13812 },
13813 ));
13814 Ok(Expression::WithinGroup(Box::new(
13815 crate::expressions::WithinGroup {
13816 this: inner,
13817 order_by: vec![crate::expressions::Ordered {
13818 this: column,
13819 desc: false,
13820 nulls_first: None,
13821 explicit_asc: false,
13822 with_fill: None,
13823 }],
13824 },
13825 )))
13826 }
13827 _ => Ok(Expression::Function(Box::new(Function::new(
13828 "QUANTILE_DISC".to_string(),
13829 vec![column, quantile],
13830 )))),
13831 }
13832 }
13833 // PERCENTILE_APPROX(x, p) / APPROX_PERCENTILE(x, p) -> target-specific
13834 "PERCENTILE_APPROX" | "APPROX_PERCENTILE" if f.args.len() >= 2 => {
13835 let name = match target {
13836 DialectType::Presto
13837 | DialectType::Trino
13838 | DialectType::Athena => "APPROX_PERCENTILE",
13839 DialectType::Spark
13840 | DialectType::Databricks
13841 | DialectType::Hive => "PERCENTILE_APPROX",
13842 DialectType::DuckDB => "APPROX_QUANTILE",
13843 DialectType::PostgreSQL | DialectType::Redshift => {
13844 "PERCENTILE_CONT"
13845 }
13846 _ => &f.name,
13847 };
13848 Ok(Expression::Function(Box::new(Function::new(
13849 name.to_string(),
13850 f.args,
13851 ))))
13852 }
13853 // EPOCH(x) -> UNIX_TIMESTAMP(x) for Spark/Hive
13854 "EPOCH" if f.args.len() == 1 => {
13855 let name = match target {
13856 DialectType::Spark
13857 | DialectType::Databricks
13858 | DialectType::Hive => "UNIX_TIMESTAMP",
13859 DialectType::Presto | DialectType::Trino => "TO_UNIXTIME",
13860 _ => "EPOCH",
13861 };
13862 Ok(Expression::Function(Box::new(Function::new(
13863 name.to_string(),
13864 f.args,
13865 ))))
13866 }
13867 // EPOCH_MS(x) -> target-specific epoch milliseconds conversion
13868 "EPOCH_MS" if f.args.len() == 1 => {
13869 match target {
13870 DialectType::Spark | DialectType::Databricks => {
13871 Ok(Expression::Function(Box::new(Function::new(
13872 "TIMESTAMP_MILLIS".to_string(),
13873 f.args,
13874 ))))
13875 }
13876 DialectType::Hive => {
13877 // Hive: FROM_UNIXTIME(x / 1000)
13878 let arg = f.args.into_iter().next().unwrap();
13879 let div_expr = Expression::Div(Box::new(
13880 crate::expressions::BinaryOp::new(
13881 arg,
13882 Expression::number(1000),
13883 ),
13884 ));
13885 Ok(Expression::Function(Box::new(Function::new(
13886 "FROM_UNIXTIME".to_string(),
13887 vec![div_expr],
13888 ))))
13889 }
13890 DialectType::Presto | DialectType::Trino => {
13891 Ok(Expression::Function(Box::new(Function::new(
13892 "FROM_UNIXTIME".to_string(),
13893 vec![Expression::Div(Box::new(
13894 crate::expressions::BinaryOp::new(
13895 f.args.into_iter().next().unwrap(),
13896 Expression::number(1000),
13897 ),
13898 ))],
13899 ))))
13900 }
13901 _ => Ok(Expression::Function(Box::new(Function::new(
13902 "EPOCH_MS".to_string(),
13903 f.args,
13904 )))),
13905 }
13906 }
13907 // HASHBYTES('algorithm', x) -> target-specific hash function
13908 "HASHBYTES" if f.args.len() == 2 => {
13909 // Keep HASHBYTES as-is for TSQL target
13910 if matches!(target, DialectType::TSQL) {
13911 return Ok(Expression::Function(f));
13912 }
13913 let algo_expr = &f.args[0];
13914 let algo = match algo_expr {
13915 Expression::Literal(lit)
13916 if matches!(
13917 lit.as_ref(),
13918 crate::expressions::Literal::String(_)
13919 ) =>
13920 {
13921 let crate::expressions::Literal::String(s) = lit.as_ref()
13922 else {
13923 unreachable!()
13924 };
13925 s.to_ascii_uppercase()
13926 }
13927 _ => return Ok(Expression::Function(f)),
13928 };
13929 let data_arg = f.args.into_iter().nth(1).unwrap();
13930 match algo.as_str() {
13931 "SHA1" => {
13932 let name = match target {
13933 DialectType::Spark | DialectType::Databricks => "SHA",
13934 DialectType::Hive => "SHA1",
13935 _ => "SHA1",
13936 };
13937 Ok(Expression::Function(Box::new(Function::new(
13938 name.to_string(),
13939 vec![data_arg],
13940 ))))
13941 }
13942 "SHA2_256" => {
13943 Ok(Expression::Function(Box::new(Function::new(
13944 "SHA2".to_string(),
13945 vec![data_arg, Expression::number(256)],
13946 ))))
13947 }
13948 "SHA2_512" => {
13949 Ok(Expression::Function(Box::new(Function::new(
13950 "SHA2".to_string(),
13951 vec![data_arg, Expression::number(512)],
13952 ))))
13953 }
13954 "MD5" => Ok(Expression::Function(Box::new(Function::new(
13955 "MD5".to_string(),
13956 vec![data_arg],
13957 )))),
13958 _ => Ok(Expression::Function(Box::new(Function::new(
13959 "HASHBYTES".to_string(),
13960 vec![Expression::string(&algo), data_arg],
13961 )))),
13962 }
13963 }
13964 // JSON_EXTRACT_PATH(json, key1, key2, ...) -> target-specific JSON extraction
13965 "JSON_EXTRACT_PATH" | "JSON_EXTRACT_PATH_TEXT" if f.args.len() >= 2 => {
13966 let is_text = name == "JSON_EXTRACT_PATH_TEXT";
13967 let mut args = f.args;
13968 let json_expr = args.remove(0);
13969 // Build JSON path from remaining keys: $.key1.key2 or $.key1[0]
13970 let mut json_path = "$".to_string();
13971 for a in &args {
13972 match a {
13973 Expression::Literal(lit)
13974 if matches!(
13975 lit.as_ref(),
13976 crate::expressions::Literal::String(_)
13977 ) =>
13978 {
13979 let crate::expressions::Literal::String(s) =
13980 lit.as_ref()
13981 else {
13982 unreachable!()
13983 };
13984 // Numeric string keys become array indices: [0]
13985 if s.chars().all(|c| c.is_ascii_digit()) {
13986 json_path.push('[');
13987 json_path.push_str(s);
13988 json_path.push(']');
13989 } else {
13990 json_path.push('.');
13991 json_path.push_str(s);
13992 }
13993 }
13994 _ => {
13995 json_path.push_str(".?");
13996 }
13997 }
13998 }
13999 match target {
14000 DialectType::Spark
14001 | DialectType::Databricks
14002 | DialectType::Hive => {
14003 Ok(Expression::Function(Box::new(Function::new(
14004 "GET_JSON_OBJECT".to_string(),
14005 vec![json_expr, Expression::string(&json_path)],
14006 ))))
14007 }
14008 DialectType::Presto | DialectType::Trino => {
14009 let func_name = if is_text {
14010 "JSON_EXTRACT_SCALAR"
14011 } else {
14012 "JSON_EXTRACT"
14013 };
14014 Ok(Expression::Function(Box::new(Function::new(
14015 func_name.to_string(),
14016 vec![json_expr, Expression::string(&json_path)],
14017 ))))
14018 }
14019 DialectType::BigQuery | DialectType::MySQL => {
14020 let func_name = if is_text {
14021 "JSON_EXTRACT_SCALAR"
14022 } else {
14023 "JSON_EXTRACT"
14024 };
14025 Ok(Expression::Function(Box::new(Function::new(
14026 func_name.to_string(),
14027 vec![json_expr, Expression::string(&json_path)],
14028 ))))
14029 }
14030 DialectType::PostgreSQL | DialectType::Materialize => {
14031 // Keep as JSON_EXTRACT_PATH_TEXT / JSON_EXTRACT_PATH for PostgreSQL/Materialize
14032 let func_name = if is_text {
14033 "JSON_EXTRACT_PATH_TEXT"
14034 } else {
14035 "JSON_EXTRACT_PATH"
14036 };
14037 let mut new_args = vec![json_expr];
14038 new_args.extend(args);
14039 Ok(Expression::Function(Box::new(Function::new(
14040 func_name.to_string(),
14041 new_args,
14042 ))))
14043 }
14044 DialectType::DuckDB | DialectType::SQLite => {
14045 // Use -> for JSON_EXTRACT_PATH, ->> for JSON_EXTRACT_PATH_TEXT
14046 if is_text {
14047 Ok(Expression::JsonExtractScalar(Box::new(
14048 crate::expressions::JsonExtractFunc {
14049 this: json_expr,
14050 path: Expression::string(&json_path),
14051 returning: None,
14052 arrow_syntax: true,
14053 hash_arrow_syntax: false,
14054 wrapper_option: None,
14055 quotes_option: None,
14056 on_scalar_string: false,
14057 on_error: None,
14058 },
14059 )))
14060 } else {
14061 Ok(Expression::JsonExtract(Box::new(
14062 crate::expressions::JsonExtractFunc {
14063 this: json_expr,
14064 path: Expression::string(&json_path),
14065 returning: None,
14066 arrow_syntax: true,
14067 hash_arrow_syntax: false,
14068 wrapper_option: None,
14069 quotes_option: None,
14070 on_scalar_string: false,
14071 on_error: None,
14072 },
14073 )))
14074 }
14075 }
14076 DialectType::Redshift => {
14077 // Keep as JSON_EXTRACT_PATH_TEXT for Redshift
14078 let mut new_args = vec![json_expr];
14079 new_args.extend(args);
14080 Ok(Expression::Function(Box::new(Function::new(
14081 "JSON_EXTRACT_PATH_TEXT".to_string(),
14082 new_args,
14083 ))))
14084 }
14085 DialectType::TSQL => {
14086 // ISNULL(JSON_QUERY(json, '$.path'), JSON_VALUE(json, '$.path'))
14087 let jq = Expression::Function(Box::new(Function::new(
14088 "JSON_QUERY".to_string(),
14089 vec![json_expr.clone(), Expression::string(&json_path)],
14090 )));
14091 let jv = Expression::Function(Box::new(Function::new(
14092 "JSON_VALUE".to_string(),
14093 vec![json_expr, Expression::string(&json_path)],
14094 )));
14095 Ok(Expression::Function(Box::new(Function::new(
14096 "ISNULL".to_string(),
14097 vec![jq, jv],
14098 ))))
14099 }
14100 DialectType::ClickHouse => {
14101 let func_name = if is_text {
14102 "JSONExtractString"
14103 } else {
14104 "JSONExtractRaw"
14105 };
14106 let mut new_args = vec![json_expr];
14107 new_args.extend(args);
14108 Ok(Expression::Function(Box::new(Function::new(
14109 func_name.to_string(),
14110 new_args,
14111 ))))
14112 }
14113 _ => {
14114 let func_name = if is_text {
14115 "JSON_EXTRACT_SCALAR"
14116 } else {
14117 "JSON_EXTRACT"
14118 };
14119 Ok(Expression::Function(Box::new(Function::new(
14120 func_name.to_string(),
14121 vec![json_expr, Expression::string(&json_path)],
14122 ))))
14123 }
14124 }
14125 }
14126 // APPROX_DISTINCT(x) -> APPROX_COUNT_DISTINCT(x) for Spark/Hive/BigQuery
14127 "APPROX_DISTINCT" if f.args.len() >= 1 => {
14128 let name = match target {
14129 DialectType::Spark
14130 | DialectType::Databricks
14131 | DialectType::Hive
14132 | DialectType::BigQuery => "APPROX_COUNT_DISTINCT",
14133 _ => "APPROX_DISTINCT",
14134 };
14135 let mut args = f.args;
14136 // Hive doesn't support the accuracy parameter
14137 if name == "APPROX_COUNT_DISTINCT"
14138 && matches!(target, DialectType::Hive)
14139 {
14140 args.truncate(1);
14141 }
14142 Ok(Expression::Function(Box::new(Function::new(
14143 name.to_string(),
14144 args,
14145 ))))
14146 }
14147 // REGEXP_EXTRACT(x, pattern) - normalize default group index
14148 "REGEXP_EXTRACT" if f.args.len() == 2 => {
14149 // Determine source default group index
14150 let source_default = match source {
14151 DialectType::Presto
14152 | DialectType::Trino
14153 | DialectType::DuckDB => 0,
14154 _ => 1, // Hive/Spark/Databricks default = 1
14155 };
14156 // Determine target default group index
14157 let target_default = match target {
14158 DialectType::Presto
14159 | DialectType::Trino
14160 | DialectType::DuckDB
14161 | DialectType::BigQuery => 0,
14162 DialectType::Snowflake => {
14163 // Snowflake uses REGEXP_SUBSTR
14164 return Ok(Expression::Function(Box::new(Function::new(
14165 "REGEXP_SUBSTR".to_string(),
14166 f.args,
14167 ))));
14168 }
14169 _ => 1, // Hive/Spark/Databricks default = 1
14170 };
14171 if source_default != target_default {
14172 let mut args = f.args;
14173 args.push(Expression::number(source_default));
14174 Ok(Expression::Function(Box::new(Function::new(
14175 "REGEXP_EXTRACT".to_string(),
14176 args,
14177 ))))
14178 } else {
14179 Ok(Expression::Function(Box::new(Function::new(
14180 "REGEXP_EXTRACT".to_string(),
14181 f.args,
14182 ))))
14183 }
14184 }
14185 // RLIKE(str, pattern) -> RegexpLike expression (generates as target-specific form)
14186 "RLIKE" if f.args.len() == 2 => {
14187 let mut args = f.args;
14188 let str_expr = args.remove(0);
14189 let pattern = args.remove(0);
14190 match target {
14191 DialectType::DuckDB => {
14192 // REGEXP_MATCHES(str, pattern)
14193 Ok(Expression::Function(Box::new(Function::new(
14194 "REGEXP_MATCHES".to_string(),
14195 vec![str_expr, pattern],
14196 ))))
14197 }
14198 _ => {
14199 // Convert to RegexpLike which generates as RLIKE/~/REGEXP_LIKE per dialect
14200 Ok(Expression::RegexpLike(Box::new(
14201 crate::expressions::RegexpFunc {
14202 this: str_expr,
14203 pattern,
14204 flags: None,
14205 },
14206 )))
14207 }
14208 }
14209 }
14210 // EOMONTH(date[, month_offset]) -> target-specific
14211 "EOMONTH" if f.args.len() >= 1 => {
14212 let mut args = f.args;
14213 let date_arg = args.remove(0);
14214 let month_offset = if !args.is_empty() {
14215 Some(args.remove(0))
14216 } else {
14217 None
14218 };
14219
14220 // Helper: wrap date in CAST to DATE
14221 let cast_to_date = |e: Expression| -> Expression {
14222 Expression::Cast(Box::new(Cast {
14223 this: e,
14224 to: DataType::Date,
14225 trailing_comments: vec![],
14226 double_colon_syntax: false,
14227 format: None,
14228 default: None,
14229 inferred_type: None,
14230 }))
14231 };
14232
14233 match target {
14234 DialectType::TSQL | DialectType::Fabric => {
14235 // TSQL: EOMONTH(CAST(date AS DATE)) or EOMONTH(DATEADD(MONTH, offset, CAST(date AS DATE)))
14236 let date = cast_to_date(date_arg);
14237 let date = if let Some(offset) = month_offset {
14238 Expression::Function(Box::new(Function::new(
14239 "DATEADD".to_string(),
14240 vec![
14241 Expression::Identifier(Identifier::new(
14242 "MONTH",
14243 )),
14244 offset,
14245 date,
14246 ],
14247 )))
14248 } else {
14249 date
14250 };
14251 Ok(Expression::Function(Box::new(Function::new(
14252 "EOMONTH".to_string(),
14253 vec![date],
14254 ))))
14255 }
14256 DialectType::Presto
14257 | DialectType::Trino
14258 | DialectType::Athena => {
14259 // Presto: LAST_DAY_OF_MONTH(CAST(CAST(date AS TIMESTAMP) AS DATE))
14260 // or with offset: LAST_DAY_OF_MONTH(DATE_ADD('MONTH', offset, CAST(CAST(date AS TIMESTAMP) AS DATE)))
14261 let cast_ts = Expression::Cast(Box::new(Cast {
14262 this: date_arg,
14263 to: DataType::Timestamp {
14264 timezone: false,
14265 precision: None,
14266 },
14267 trailing_comments: vec![],
14268 double_colon_syntax: false,
14269 format: None,
14270 default: None,
14271 inferred_type: None,
14272 }));
14273 let date = cast_to_date(cast_ts);
14274 let date = if let Some(offset) = month_offset {
14275 Expression::Function(Box::new(Function::new(
14276 "DATE_ADD".to_string(),
14277 vec![Expression::string("MONTH"), offset, date],
14278 )))
14279 } else {
14280 date
14281 };
14282 Ok(Expression::Function(Box::new(Function::new(
14283 "LAST_DAY_OF_MONTH".to_string(),
14284 vec![date],
14285 ))))
14286 }
14287 DialectType::PostgreSQL => {
14288 // PostgreSQL: CAST(DATE_TRUNC('MONTH', CAST(date AS DATE) [+ INTERVAL 'offset MONTH']) + INTERVAL '1 MONTH' - INTERVAL '1 DAY' AS DATE)
14289 let date = cast_to_date(date_arg);
14290 let date = if let Some(offset) = month_offset {
14291 let interval_str = format!(
14292 "{} MONTH",
14293 Self::expr_to_string_static(&offset)
14294 );
14295 Expression::Add(Box::new(
14296 crate::expressions::BinaryOp::new(
14297 date,
14298 Expression::Interval(Box::new(
14299 crate::expressions::Interval {
14300 this: Some(Expression::string(
14301 &interval_str,
14302 )),
14303 unit: None,
14304 },
14305 )),
14306 ),
14307 ))
14308 } else {
14309 date
14310 };
14311 let truncated =
14312 Expression::Function(Box::new(Function::new(
14313 "DATE_TRUNC".to_string(),
14314 vec![Expression::string("MONTH"), date],
14315 )));
14316 let plus_month = Expression::Add(Box::new(
14317 crate::expressions::BinaryOp::new(
14318 truncated,
14319 Expression::Interval(Box::new(
14320 crate::expressions::Interval {
14321 this: Some(Expression::string("1 MONTH")),
14322 unit: None,
14323 },
14324 )),
14325 ),
14326 ));
14327 let minus_day = Expression::Sub(Box::new(
14328 crate::expressions::BinaryOp::new(
14329 plus_month,
14330 Expression::Interval(Box::new(
14331 crate::expressions::Interval {
14332 this: Some(Expression::string("1 DAY")),
14333 unit: None,
14334 },
14335 )),
14336 ),
14337 ));
14338 Ok(Expression::Cast(Box::new(Cast {
14339 this: minus_day,
14340 to: DataType::Date,
14341 trailing_comments: vec![],
14342 double_colon_syntax: false,
14343 format: None,
14344 default: None,
14345 inferred_type: None,
14346 })))
14347 }
14348 DialectType::DuckDB => {
14349 // DuckDB: LAST_DAY(CAST(date AS DATE) [+ INTERVAL (offset) MONTH])
14350 let date = cast_to_date(date_arg);
14351 let date = if let Some(offset) = month_offset {
14352 // Wrap negative numbers in parentheses for DuckDB INTERVAL
14353 let interval_val =
14354 if matches!(&offset, Expression::Neg(_)) {
14355 Expression::Paren(Box::new(
14356 crate::expressions::Paren {
14357 this: offset,
14358 trailing_comments: Vec::new(),
14359 },
14360 ))
14361 } else {
14362 offset
14363 };
14364 Expression::Add(Box::new(crate::expressions::BinaryOp::new(
14365 date,
14366 Expression::Interval(Box::new(crate::expressions::Interval {
14367 this: Some(interval_val),
14368 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
14369 unit: crate::expressions::IntervalUnit::Month,
14370 use_plural: false,
14371 }),
14372 })),
14373 )))
14374 } else {
14375 date
14376 };
14377 Ok(Expression::Function(Box::new(Function::new(
14378 "LAST_DAY".to_string(),
14379 vec![date],
14380 ))))
14381 }
14382 DialectType::Snowflake | DialectType::Redshift => {
14383 // Snowflake/Redshift: LAST_DAY(TO_DATE(date) or CAST(date AS DATE))
14384 // With offset: LAST_DAY(DATEADD(MONTH, offset, TO_DATE(date)))
14385 let date = if matches!(target, DialectType::Snowflake) {
14386 Expression::Function(Box::new(Function::new(
14387 "TO_DATE".to_string(),
14388 vec![date_arg],
14389 )))
14390 } else {
14391 cast_to_date(date_arg)
14392 };
14393 let date = if let Some(offset) = month_offset {
14394 Expression::Function(Box::new(Function::new(
14395 "DATEADD".to_string(),
14396 vec![
14397 Expression::Identifier(Identifier::new(
14398 "MONTH",
14399 )),
14400 offset,
14401 date,
14402 ],
14403 )))
14404 } else {
14405 date
14406 };
14407 Ok(Expression::Function(Box::new(Function::new(
14408 "LAST_DAY".to_string(),
14409 vec![date],
14410 ))))
14411 }
14412 DialectType::Spark | DialectType::Databricks => {
14413 // Spark: LAST_DAY(TO_DATE(date))
14414 // With offset: LAST_DAY(ADD_MONTHS(TO_DATE(date), offset))
14415 let date = Expression::Function(Box::new(Function::new(
14416 "TO_DATE".to_string(),
14417 vec![date_arg],
14418 )));
14419 let date = if let Some(offset) = month_offset {
14420 Expression::Function(Box::new(Function::new(
14421 "ADD_MONTHS".to_string(),
14422 vec![date, offset],
14423 )))
14424 } else {
14425 date
14426 };
14427 Ok(Expression::Function(Box::new(Function::new(
14428 "LAST_DAY".to_string(),
14429 vec![date],
14430 ))))
14431 }
14432 DialectType::MySQL => {
14433 // MySQL: LAST_DAY(DATE(date)) - no offset
14434 // With offset: LAST_DAY(DATE_ADD(date, INTERVAL offset MONTH)) - no DATE() wrapper
14435 let date = if let Some(offset) = month_offset {
14436 let iu = crate::expressions::IntervalUnit::Month;
14437 Expression::DateAdd(Box::new(
14438 crate::expressions::DateAddFunc {
14439 this: date_arg,
14440 interval: offset,
14441 unit: iu,
14442 },
14443 ))
14444 } else {
14445 Expression::Function(Box::new(Function::new(
14446 "DATE".to_string(),
14447 vec![date_arg],
14448 )))
14449 };
14450 Ok(Expression::Function(Box::new(Function::new(
14451 "LAST_DAY".to_string(),
14452 vec![date],
14453 ))))
14454 }
14455 DialectType::BigQuery => {
14456 // BigQuery: LAST_DAY(CAST(date AS DATE))
14457 // With offset: LAST_DAY(DATE_ADD(CAST(date AS DATE), INTERVAL offset MONTH))
14458 let date = cast_to_date(date_arg);
14459 let date = if let Some(offset) = month_offset {
14460 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
14461 this: Some(offset),
14462 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
14463 unit: crate::expressions::IntervalUnit::Month,
14464 use_plural: false,
14465 }),
14466 }));
14467 Expression::Function(Box::new(Function::new(
14468 "DATE_ADD".to_string(),
14469 vec![date, interval],
14470 )))
14471 } else {
14472 date
14473 };
14474 Ok(Expression::Function(Box::new(Function::new(
14475 "LAST_DAY".to_string(),
14476 vec![date],
14477 ))))
14478 }
14479 DialectType::ClickHouse => {
14480 // ClickHouse: LAST_DAY(CAST(date AS Nullable(DATE)))
14481 let date = Expression::Cast(Box::new(Cast {
14482 this: date_arg,
14483 to: DataType::Nullable {
14484 inner: Box::new(DataType::Date),
14485 },
14486 trailing_comments: vec![],
14487 double_colon_syntax: false,
14488 format: None,
14489 default: None,
14490 inferred_type: None,
14491 }));
14492 let date = if let Some(offset) = month_offset {
14493 Expression::Function(Box::new(Function::new(
14494 "DATE_ADD".to_string(),
14495 vec![
14496 Expression::Identifier(Identifier::new(
14497 "MONTH",
14498 )),
14499 offset,
14500 date,
14501 ],
14502 )))
14503 } else {
14504 date
14505 };
14506 Ok(Expression::Function(Box::new(Function::new(
14507 "LAST_DAY".to_string(),
14508 vec![date],
14509 ))))
14510 }
14511 DialectType::Hive => {
14512 // Hive: LAST_DAY(date)
14513 let date = if let Some(offset) = month_offset {
14514 Expression::Function(Box::new(Function::new(
14515 "ADD_MONTHS".to_string(),
14516 vec![date_arg, offset],
14517 )))
14518 } else {
14519 date_arg
14520 };
14521 Ok(Expression::Function(Box::new(Function::new(
14522 "LAST_DAY".to_string(),
14523 vec![date],
14524 ))))
14525 }
14526 _ => {
14527 // Default: LAST_DAY(date)
14528 let date = if let Some(offset) = month_offset {
14529 let unit =
14530 Expression::Identifier(Identifier::new("MONTH"));
14531 Expression::Function(Box::new(Function::new(
14532 "DATEADD".to_string(),
14533 vec![unit, offset, date_arg],
14534 )))
14535 } else {
14536 date_arg
14537 };
14538 Ok(Expression::Function(Box::new(Function::new(
14539 "LAST_DAY".to_string(),
14540 vec![date],
14541 ))))
14542 }
14543 }
14544 }
14545 // LAST_DAY(x) / LAST_DAY_OF_MONTH(x) -> target-specific
14546 "LAST_DAY" | "LAST_DAY_OF_MONTH"
14547 if !matches!(source, DialectType::BigQuery)
14548 && f.args.len() >= 1 =>
14549 {
14550 let first_arg = f.args.into_iter().next().unwrap();
14551 match target {
14552 DialectType::TSQL | DialectType::Fabric => {
14553 Ok(Expression::Function(Box::new(Function::new(
14554 "EOMONTH".to_string(),
14555 vec![first_arg],
14556 ))))
14557 }
14558 DialectType::Presto
14559 | DialectType::Trino
14560 | DialectType::Athena => {
14561 Ok(Expression::Function(Box::new(Function::new(
14562 "LAST_DAY_OF_MONTH".to_string(),
14563 vec![first_arg],
14564 ))))
14565 }
14566 _ => Ok(Expression::Function(Box::new(Function::new(
14567 "LAST_DAY".to_string(),
14568 vec![first_arg],
14569 )))),
14570 }
14571 }
14572 // MAP(keys_array, vals_array) from Presto (2-arg form) -> target-specific
14573 "MAP"
14574 if f.args.len() == 2
14575 && matches!(
14576 source,
14577 DialectType::Presto
14578 | DialectType::Trino
14579 | DialectType::Athena
14580 ) =>
14581 {
14582 let keys_arg = f.args[0].clone();
14583 let vals_arg = f.args[1].clone();
14584
14585 // Helper: extract array elements from Array/ArrayFunc/Function("ARRAY") expressions
14586 fn extract_array_elements(
14587 expr: &Expression,
14588 ) -> Option<&Vec<Expression>> {
14589 match expr {
14590 Expression::Array(arr) => Some(&arr.expressions),
14591 Expression::ArrayFunc(arr) => Some(&arr.expressions),
14592 Expression::Function(f)
14593 if f.name.eq_ignore_ascii_case("ARRAY") =>
14594 {
14595 Some(&f.args)
14596 }
14597 _ => None,
14598 }
14599 }
14600
14601 match target {
14602 DialectType::Spark | DialectType::Databricks => {
14603 // Presto MAP(keys, vals) -> Spark MAP_FROM_ARRAYS(keys, vals)
14604 Ok(Expression::Function(Box::new(Function::new(
14605 "MAP_FROM_ARRAYS".to_string(),
14606 f.args,
14607 ))))
14608 }
14609 DialectType::Hive => {
14610 // Presto MAP(ARRAY[k1,k2], ARRAY[v1,v2]) -> Hive MAP(k1, v1, k2, v2)
14611 if let (Some(keys), Some(vals)) = (
14612 extract_array_elements(&keys_arg),
14613 extract_array_elements(&vals_arg),
14614 ) {
14615 if keys.len() == vals.len() {
14616 let mut interleaved = Vec::new();
14617 for (k, v) in keys.iter().zip(vals.iter()) {
14618 interleaved.push(k.clone());
14619 interleaved.push(v.clone());
14620 }
14621 Ok(Expression::Function(Box::new(Function::new(
14622 "MAP".to_string(),
14623 interleaved,
14624 ))))
14625 } else {
14626 Ok(Expression::Function(Box::new(Function::new(
14627 "MAP".to_string(),
14628 f.args,
14629 ))))
14630 }
14631 } else {
14632 Ok(Expression::Function(Box::new(Function::new(
14633 "MAP".to_string(),
14634 f.args,
14635 ))))
14636 }
14637 }
14638 DialectType::Snowflake => {
14639 // Presto MAP(ARRAY[k1,k2], ARRAY[v1,v2]) -> Snowflake OBJECT_CONSTRUCT(k1, v1, k2, v2)
14640 if let (Some(keys), Some(vals)) = (
14641 extract_array_elements(&keys_arg),
14642 extract_array_elements(&vals_arg),
14643 ) {
14644 if keys.len() == vals.len() {
14645 let mut interleaved = Vec::new();
14646 for (k, v) in keys.iter().zip(vals.iter()) {
14647 interleaved.push(k.clone());
14648 interleaved.push(v.clone());
14649 }
14650 Ok(Expression::Function(Box::new(Function::new(
14651 "OBJECT_CONSTRUCT".to_string(),
14652 interleaved,
14653 ))))
14654 } else {
14655 Ok(Expression::Function(Box::new(Function::new(
14656 "MAP".to_string(),
14657 f.args,
14658 ))))
14659 }
14660 } else {
14661 Ok(Expression::Function(Box::new(Function::new(
14662 "MAP".to_string(),
14663 f.args,
14664 ))))
14665 }
14666 }
14667 _ => Ok(Expression::Function(f)),
14668 }
14669 }
14670 // MAP() with 0 args from Spark -> MAP(ARRAY[], ARRAY[]) for Presto/Trino
14671 "MAP"
14672 if f.args.is_empty()
14673 && matches!(
14674 source,
14675 DialectType::Hive
14676 | DialectType::Spark
14677 | DialectType::Databricks
14678 )
14679 && matches!(
14680 target,
14681 DialectType::Presto
14682 | DialectType::Trino
14683 | DialectType::Athena
14684 ) =>
14685 {
14686 let empty_keys =
14687 Expression::Array(Box::new(crate::expressions::Array {
14688 expressions: vec![],
14689 }));
14690 let empty_vals =
14691 Expression::Array(Box::new(crate::expressions::Array {
14692 expressions: vec![],
14693 }));
14694 Ok(Expression::Function(Box::new(Function::new(
14695 "MAP".to_string(),
14696 vec![empty_keys, empty_vals],
14697 ))))
14698 }
14699 // MAP(k1, v1, k2, v2, ...) from Hive/Spark -> target-specific
14700 "MAP"
14701 if f.args.len() >= 2
14702 && f.args.len() % 2 == 0
14703 && matches!(
14704 source,
14705 DialectType::Hive
14706 | DialectType::Spark
14707 | DialectType::Databricks
14708 | DialectType::ClickHouse
14709 ) =>
14710 {
14711 let args = f.args;
14712 match target {
14713 DialectType::DuckDB => {
14714 // MAP([k1, k2], [v1, v2])
14715 let mut keys = Vec::new();
14716 let mut vals = Vec::new();
14717 for (i, arg) in args.into_iter().enumerate() {
14718 if i % 2 == 0 {
14719 keys.push(arg);
14720 } else {
14721 vals.push(arg);
14722 }
14723 }
14724 let keys_arr = Expression::Array(Box::new(
14725 crate::expressions::Array { expressions: keys },
14726 ));
14727 let vals_arr = Expression::Array(Box::new(
14728 crate::expressions::Array { expressions: vals },
14729 ));
14730 Ok(Expression::Function(Box::new(Function::new(
14731 "MAP".to_string(),
14732 vec![keys_arr, vals_arr],
14733 ))))
14734 }
14735 DialectType::Presto | DialectType::Trino => {
14736 // MAP(ARRAY[k1, k2], ARRAY[v1, v2])
14737 let mut keys = Vec::new();
14738 let mut vals = Vec::new();
14739 for (i, arg) in args.into_iter().enumerate() {
14740 if i % 2 == 0 {
14741 keys.push(arg);
14742 } else {
14743 vals.push(arg);
14744 }
14745 }
14746 let keys_arr = Expression::Array(Box::new(
14747 crate::expressions::Array { expressions: keys },
14748 ));
14749 let vals_arr = Expression::Array(Box::new(
14750 crate::expressions::Array { expressions: vals },
14751 ));
14752 Ok(Expression::Function(Box::new(Function::new(
14753 "MAP".to_string(),
14754 vec![keys_arr, vals_arr],
14755 ))))
14756 }
14757 DialectType::Snowflake => Ok(Expression::Function(Box::new(
14758 Function::new("OBJECT_CONSTRUCT".to_string(), args),
14759 ))),
14760 DialectType::ClickHouse => Ok(Expression::Function(Box::new(
14761 Function::new("map".to_string(), args),
14762 ))),
14763 _ => Ok(Expression::Function(Box::new(Function::new(
14764 "MAP".to_string(),
14765 args,
14766 )))),
14767 }
14768 }
14769 // COLLECT_LIST(x) -> ARRAY_AGG(x) for most targets
14770 "COLLECT_LIST" if f.args.len() >= 1 => {
14771 let name = match target {
14772 DialectType::Spark
14773 | DialectType::Databricks
14774 | DialectType::Hive => "COLLECT_LIST",
14775 DialectType::DuckDB
14776 | DialectType::PostgreSQL
14777 | DialectType::Redshift
14778 | DialectType::Snowflake
14779 | DialectType::BigQuery => "ARRAY_AGG",
14780 DialectType::Presto | DialectType::Trino => "ARRAY_AGG",
14781 _ => "ARRAY_AGG",
14782 };
14783 Ok(Expression::Function(Box::new(Function::new(
14784 name.to_string(),
14785 f.args,
14786 ))))
14787 }
14788 // COLLECT_SET(x) -> target-specific distinct array aggregation
14789 "COLLECT_SET" if f.args.len() >= 1 => {
14790 let name = match target {
14791 DialectType::Spark
14792 | DialectType::Databricks
14793 | DialectType::Hive => "COLLECT_SET",
14794 DialectType::Presto
14795 | DialectType::Trino
14796 | DialectType::Athena => "SET_AGG",
14797 DialectType::Snowflake => "ARRAY_UNIQUE_AGG",
14798 _ => "ARRAY_AGG",
14799 };
14800 Ok(Expression::Function(Box::new(Function::new(
14801 name.to_string(),
14802 f.args,
14803 ))))
14804 }
14805 // ISNAN(x) / IS_NAN(x) - normalize
14806 "ISNAN" | "IS_NAN" => {
14807 let name = match target {
14808 DialectType::Spark
14809 | DialectType::Databricks
14810 | DialectType::Hive => "ISNAN",
14811 DialectType::Presto
14812 | DialectType::Trino
14813 | DialectType::Athena => "IS_NAN",
14814 DialectType::BigQuery
14815 | DialectType::PostgreSQL
14816 | DialectType::Redshift => "IS_NAN",
14817 DialectType::ClickHouse => "IS_NAN",
14818 _ => "ISNAN",
14819 };
14820 Ok(Expression::Function(Box::new(Function::new(
14821 name.to_string(),
14822 f.args,
14823 ))))
14824 }
14825 // SPLIT_PART(str, delim, index) -> target-specific
14826 "SPLIT_PART" if f.args.len() == 3 => {
14827 match target {
14828 DialectType::Spark | DialectType::Databricks => {
14829 // Keep as SPLIT_PART (Spark 3.4+)
14830 Ok(Expression::Function(Box::new(Function::new(
14831 "SPLIT_PART".to_string(),
14832 f.args,
14833 ))))
14834 }
14835 DialectType::DuckDB
14836 if matches!(source, DialectType::Snowflake) =>
14837 {
14838 // Snowflake SPLIT_PART -> DuckDB with CASE wrapper:
14839 // - part_index 0 treated as 1
14840 // - empty delimiter: return whole string if index 1 or -1, else ''
14841 let mut args = f.args;
14842 let str_arg = args.remove(0);
14843 let delim_arg = args.remove(0);
14844 let idx_arg = args.remove(0);
14845
14846 // (CASE WHEN idx = 0 THEN 1 ELSE idx END)
14847 let adjusted_idx = Expression::Paren(Box::new(Paren {
14848 this: Expression::Case(Box::new(Case {
14849 operand: None,
14850 whens: vec![(
14851 Expression::Eq(Box::new(BinaryOp {
14852 left: idx_arg.clone(),
14853 right: Expression::number(0),
14854 left_comments: vec![],
14855 operator_comments: vec![],
14856 trailing_comments: vec![],
14857 inferred_type: None,
14858 })),
14859 Expression::number(1),
14860 )],
14861 else_: Some(idx_arg.clone()),
14862 comments: vec![],
14863 inferred_type: None,
14864 })),
14865 trailing_comments: vec![],
14866 }));
14867
14868 // SPLIT_PART(str, delim, adjusted_idx)
14869 let base_func =
14870 Expression::Function(Box::new(Function::new(
14871 "SPLIT_PART".to_string(),
14872 vec![
14873 str_arg.clone(),
14874 delim_arg.clone(),
14875 adjusted_idx.clone(),
14876 ],
14877 )));
14878
14879 // (CASE WHEN adjusted_idx = 1 OR adjusted_idx = -1 THEN str ELSE '' END)
14880 let empty_delim_case = Expression::Paren(Box::new(Paren {
14881 this: Expression::Case(Box::new(Case {
14882 operand: None,
14883 whens: vec![(
14884 Expression::Or(Box::new(BinaryOp {
14885 left: Expression::Eq(Box::new(BinaryOp {
14886 left: adjusted_idx.clone(),
14887 right: Expression::number(1),
14888 left_comments: vec![],
14889 operator_comments: vec![],
14890 trailing_comments: vec![],
14891 inferred_type: None,
14892 })),
14893 right: Expression::Eq(Box::new(BinaryOp {
14894 left: adjusted_idx,
14895 right: Expression::number(-1),
14896 left_comments: vec![],
14897 operator_comments: vec![],
14898 trailing_comments: vec![],
14899 inferred_type: None,
14900 })),
14901 left_comments: vec![],
14902 operator_comments: vec![],
14903 trailing_comments: vec![],
14904 inferred_type: None,
14905 })),
14906 str_arg,
14907 )],
14908 else_: Some(Expression::string("")),
14909 comments: vec![],
14910 inferred_type: None,
14911 })),
14912 trailing_comments: vec![],
14913 }));
14914
14915 // CASE WHEN delim = '' THEN (empty case) ELSE SPLIT_PART(...) END
14916 Ok(Expression::Case(Box::new(Case {
14917 operand: None,
14918 whens: vec![(
14919 Expression::Eq(Box::new(BinaryOp {
14920 left: delim_arg,
14921 right: Expression::string(""),
14922 left_comments: vec![],
14923 operator_comments: vec![],
14924 trailing_comments: vec![],
14925 inferred_type: None,
14926 })),
14927 empty_delim_case,
14928 )],
14929 else_: Some(base_func),
14930 comments: vec![],
14931 inferred_type: None,
14932 })))
14933 }
14934 DialectType::DuckDB
14935 | DialectType::PostgreSQL
14936 | DialectType::Snowflake
14937 | DialectType::Redshift
14938 | DialectType::Trino
14939 | DialectType::Presto => Ok(Expression::Function(Box::new(
14940 Function::new("SPLIT_PART".to_string(), f.args),
14941 ))),
14942 DialectType::Hive => {
14943 // SPLIT(str, delim)[index]
14944 // Complex conversion, just keep as-is for now
14945 Ok(Expression::Function(Box::new(Function::new(
14946 "SPLIT_PART".to_string(),
14947 f.args,
14948 ))))
14949 }
14950 _ => Ok(Expression::Function(Box::new(Function::new(
14951 "SPLIT_PART".to_string(),
14952 f.args,
14953 )))),
14954 }
14955 }
14956 // JSON_EXTRACT(json, path) -> target-specific JSON extraction
14957 "JSON_EXTRACT" | "JSON_EXTRACT_SCALAR" if f.args.len() == 2 => {
14958 let is_scalar = name == "JSON_EXTRACT_SCALAR";
14959 match target {
14960 DialectType::Spark
14961 | DialectType::Databricks
14962 | DialectType::Hive => {
14963 let mut args = f.args;
14964 // Spark/Hive don't support Presto's TRY(expr) wrapper form here.
14965 // Mirror sqlglot by unwrapping TRY(expr) to expr before GET_JSON_OBJECT.
14966 if let Some(Expression::Function(inner)) = args.first() {
14967 if inner.name.eq_ignore_ascii_case("TRY")
14968 && inner.args.len() == 1
14969 {
14970 let mut inner_args = inner.args.clone();
14971 args[0] = inner_args.remove(0);
14972 }
14973 }
14974 Ok(Expression::Function(Box::new(Function::new(
14975 "GET_JSON_OBJECT".to_string(),
14976 args,
14977 ))))
14978 }
14979 DialectType::DuckDB | DialectType::SQLite => {
14980 // json -> path syntax
14981 let mut args = f.args;
14982 let json_expr = args.remove(0);
14983 let path = args.remove(0);
14984 Ok(Expression::JsonExtract(Box::new(
14985 crate::expressions::JsonExtractFunc {
14986 this: json_expr,
14987 path,
14988 returning: None,
14989 arrow_syntax: true,
14990 hash_arrow_syntax: false,
14991 wrapper_option: None,
14992 quotes_option: None,
14993 on_scalar_string: false,
14994 on_error: None,
14995 },
14996 )))
14997 }
14998 DialectType::TSQL => {
14999 let func_name = if is_scalar {
15000 "JSON_VALUE"
15001 } else {
15002 "JSON_QUERY"
15003 };
15004 Ok(Expression::Function(Box::new(Function::new(
15005 func_name.to_string(),
15006 f.args,
15007 ))))
15008 }
15009 DialectType::PostgreSQL | DialectType::Redshift => {
15010 let func_name = if is_scalar {
15011 "JSON_EXTRACT_PATH_TEXT"
15012 } else {
15013 "JSON_EXTRACT_PATH"
15014 };
15015 Ok(Expression::Function(Box::new(Function::new(
15016 func_name.to_string(),
15017 f.args,
15018 ))))
15019 }
15020 _ => Ok(Expression::Function(Box::new(Function::new(
15021 name.to_string(),
15022 f.args,
15023 )))),
15024 }
15025 }
15026 // MySQL JSON_SEARCH(json_doc, mode, search[, escape_char[, path]]) -> DuckDB json_tree-based lookup
15027 "JSON_SEARCH"
15028 if matches!(target, DialectType::DuckDB)
15029 && (3..=5).contains(&f.args.len()) =>
15030 {
15031 let args = &f.args;
15032
15033 // Only rewrite deterministic modes and NULL/no escape-char variant.
15034 let mode = match &args[1] {
15035 Expression::Literal(lit)
15036 if matches!(
15037 lit.as_ref(),
15038 crate::expressions::Literal::String(_)
15039 ) =>
15040 {
15041 let crate::expressions::Literal::String(s) = lit.as_ref()
15042 else {
15043 unreachable!()
15044 };
15045 s.to_ascii_lowercase()
15046 }
15047 _ => return Ok(Expression::Function(f)),
15048 };
15049 if mode != "one" && mode != "all" {
15050 return Ok(Expression::Function(f));
15051 }
15052 if args.len() >= 4 && !matches!(&args[3], Expression::Null(_)) {
15053 return Ok(Expression::Function(f));
15054 }
15055
15056 let json_doc_sql = match Generator::sql(&args[0]) {
15057 Ok(sql) => sql,
15058 Err(_) => return Ok(Expression::Function(f)),
15059 };
15060 let search_sql = match Generator::sql(&args[2]) {
15061 Ok(sql) => sql,
15062 Err(_) => return Ok(Expression::Function(f)),
15063 };
15064 let path_sql = if args.len() == 5 {
15065 match Generator::sql(&args[4]) {
15066 Ok(sql) => sql,
15067 Err(_) => return Ok(Expression::Function(f)),
15068 }
15069 } else {
15070 "'$'".to_string()
15071 };
15072
15073 let rewrite_sql = if mode == "all" {
15074 format!(
15075 "(SELECT TO_JSON(LIST(__jt.fullkey)) FROM json_tree({}, {}) AS __jt WHERE __jt.atom = TO_JSON({}))",
15076 json_doc_sql, path_sql, search_sql
15077 )
15078 } else {
15079 format!(
15080 "(SELECT TO_JSON(__jt.fullkey) FROM json_tree({}, {}) AS __jt WHERE __jt.atom = TO_JSON({}) ORDER BY __jt.id LIMIT 1)",
15081 json_doc_sql, path_sql, search_sql
15082 )
15083 };
15084
15085 Ok(Expression::Raw(crate::expressions::Raw {
15086 sql: rewrite_sql,
15087 }))
15088 }
15089 // SingleStore JSON_EXTRACT_JSON(json, key1, key2, ...) -> JSON_EXTRACT(json, '$.key1.key2' or '$.key1[key2]')
15090 // BSON_EXTRACT_BSON(json, key1, ...) -> JSONB_EXTRACT(json, '$.key1')
15091 "JSON_EXTRACT_JSON" | "BSON_EXTRACT_BSON"
15092 if f.args.len() >= 2
15093 && matches!(source, DialectType::SingleStore) =>
15094 {
15095 let is_bson = name == "BSON_EXTRACT_BSON";
15096 let mut args = f.args;
15097 let json_expr = args.remove(0);
15098
15099 // Build JSONPath from remaining arguments
15100 let mut path = String::from("$");
15101 for arg in &args {
15102 if let Expression::Literal(lit) = arg {
15103 if let crate::expressions::Literal::String(s) = lit.as_ref()
15104 {
15105 // Check if it's a numeric string (array index)
15106 if s.parse::<i64>().is_ok() {
15107 path.push('[');
15108 path.push_str(s);
15109 path.push(']');
15110 } else {
15111 path.push('.');
15112 path.push_str(s);
15113 }
15114 }
15115 }
15116 }
15117
15118 let target_func = if is_bson {
15119 "JSONB_EXTRACT"
15120 } else {
15121 "JSON_EXTRACT"
15122 };
15123 Ok(Expression::Function(Box::new(Function::new(
15124 target_func.to_string(),
15125 vec![json_expr, Expression::string(&path)],
15126 ))))
15127 }
15128 // ARRAY_SUM(lambda, array) from Doris -> ClickHouse arraySum
15129 "ARRAY_SUM" if matches!(target, DialectType::ClickHouse) => {
15130 Ok(Expression::Function(Box::new(Function {
15131 name: "arraySum".to_string(),
15132 args: f.args,
15133 distinct: f.distinct,
15134 trailing_comments: f.trailing_comments,
15135 use_bracket_syntax: f.use_bracket_syntax,
15136 no_parens: f.no_parens,
15137 quoted: f.quoted,
15138 span: None,
15139 inferred_type: None,
15140 })))
15141 }
15142 // TSQL JSON_QUERY/JSON_VALUE -> target-specific
15143 // Note: For TSQL->TSQL, JsonQuery stays as Expression::JsonQuery (source transform not called)
15144 // and is handled by JsonQueryValueConvert action. This handles the case where
15145 // TSQL read transform converted JsonQuery to Function("JSON_QUERY") for cross-dialect.
15146 "JSON_QUERY" | "JSON_VALUE"
15147 if f.args.len() == 2
15148 && matches!(
15149 source,
15150 DialectType::TSQL | DialectType::Fabric
15151 ) =>
15152 {
15153 match target {
15154 DialectType::Spark
15155 | DialectType::Databricks
15156 | DialectType::Hive => Ok(Expression::Function(Box::new(
15157 Function::new("GET_JSON_OBJECT".to_string(), f.args),
15158 ))),
15159 _ => Ok(Expression::Function(Box::new(Function::new(
15160 name.to_string(),
15161 f.args,
15162 )))),
15163 }
15164 }
15165 // UNIX_TIMESTAMP(x) -> TO_UNIXTIME(x) for Presto
15166 "UNIX_TIMESTAMP" if f.args.len() == 1 => {
15167 let arg = f.args.into_iter().next().unwrap();
15168 let is_hive_source = matches!(
15169 source,
15170 DialectType::Hive
15171 | DialectType::Spark
15172 | DialectType::Databricks
15173 );
15174 match target {
15175 DialectType::DuckDB if is_hive_source => {
15176 // DuckDB: EPOCH(STRPTIME(x, '%Y-%m-%d %H:%M:%S'))
15177 let strptime =
15178 Expression::Function(Box::new(Function::new(
15179 "STRPTIME".to_string(),
15180 vec![arg, Expression::string("%Y-%m-%d %H:%M:%S")],
15181 )));
15182 Ok(Expression::Function(Box::new(Function::new(
15183 "EPOCH".to_string(),
15184 vec![strptime],
15185 ))))
15186 }
15187 DialectType::Presto | DialectType::Trino if is_hive_source => {
15188 // Presto: TO_UNIXTIME(COALESCE(TRY(DATE_PARSE(CAST(x AS VARCHAR), '%Y-%m-%d %T')), PARSE_DATETIME(DATE_FORMAT(x, '%Y-%m-%d %T'), 'yyyy-MM-dd HH:mm:ss')))
15189 let cast_varchar =
15190 Expression::Cast(Box::new(crate::expressions::Cast {
15191 this: arg.clone(),
15192 to: DataType::VarChar {
15193 length: None,
15194 parenthesized_length: false,
15195 },
15196 trailing_comments: vec![],
15197 double_colon_syntax: false,
15198 format: None,
15199 default: None,
15200 inferred_type: None,
15201 }));
15202 let date_parse =
15203 Expression::Function(Box::new(Function::new(
15204 "DATE_PARSE".to_string(),
15205 vec![
15206 cast_varchar,
15207 Expression::string("%Y-%m-%d %T"),
15208 ],
15209 )));
15210 let try_expr = Expression::Function(Box::new(
15211 Function::new("TRY".to_string(), vec![date_parse]),
15212 ));
15213 let date_format =
15214 Expression::Function(Box::new(Function::new(
15215 "DATE_FORMAT".to_string(),
15216 vec![arg, Expression::string("%Y-%m-%d %T")],
15217 )));
15218 let parse_datetime =
15219 Expression::Function(Box::new(Function::new(
15220 "PARSE_DATETIME".to_string(),
15221 vec![
15222 date_format,
15223 Expression::string("yyyy-MM-dd HH:mm:ss"),
15224 ],
15225 )));
15226 let coalesce =
15227 Expression::Function(Box::new(Function::new(
15228 "COALESCE".to_string(),
15229 vec![try_expr, parse_datetime],
15230 )));
15231 Ok(Expression::Function(Box::new(Function::new(
15232 "TO_UNIXTIME".to_string(),
15233 vec![coalesce],
15234 ))))
15235 }
15236 DialectType::Presto | DialectType::Trino => {
15237 Ok(Expression::Function(Box::new(Function::new(
15238 "TO_UNIXTIME".to_string(),
15239 vec![arg],
15240 ))))
15241 }
15242 _ => Ok(Expression::Function(Box::new(Function::new(
15243 "UNIX_TIMESTAMP".to_string(),
15244 vec![arg],
15245 )))),
15246 }
15247 }
15248 // TO_UNIX_TIMESTAMP(x) -> UNIX_TIMESTAMP(x) for Spark/Hive
15249 "TO_UNIX_TIMESTAMP" if f.args.len() >= 1 => match target {
15250 DialectType::Spark
15251 | DialectType::Databricks
15252 | DialectType::Hive => Ok(Expression::Function(Box::new(
15253 Function::new("UNIX_TIMESTAMP".to_string(), f.args),
15254 ))),
15255 _ => Ok(Expression::Function(Box::new(Function::new(
15256 "TO_UNIX_TIMESTAMP".to_string(),
15257 f.args,
15258 )))),
15259 },
15260 // CURDATE() -> CURRENT_DATE
15261 "CURDATE" => {
15262 Ok(Expression::CurrentDate(crate::expressions::CurrentDate))
15263 }
15264 // CURTIME() -> CURRENT_TIME
15265 "CURTIME" => {
15266 Ok(Expression::CurrentTime(crate::expressions::CurrentTime {
15267 precision: None,
15268 }))
15269 }
15270 // ARRAY_SORT(x) or ARRAY_SORT(x, lambda) -> SORT_ARRAY(x) for Hive, LIST_SORT for DuckDB
15271 "ARRAY_SORT" if f.args.len() >= 1 => {
15272 match target {
15273 DialectType::Hive => {
15274 let mut args = f.args;
15275 args.truncate(1); // Drop lambda comparator
15276 Ok(Expression::Function(Box::new(Function::new(
15277 "SORT_ARRAY".to_string(),
15278 args,
15279 ))))
15280 }
15281 DialectType::DuckDB
15282 if matches!(source, DialectType::Snowflake) =>
15283 {
15284 // Snowflake ARRAY_SORT(arr[, asc_bool[, nulls_first_bool]]) -> DuckDB LIST_SORT(arr[, 'ASC'/'DESC'[, 'NULLS FIRST']])
15285 let mut args_iter = f.args.into_iter();
15286 let arr = args_iter.next().unwrap();
15287 let asc_arg = args_iter.next();
15288 let nulls_first_arg = args_iter.next();
15289
15290 let is_asc_bool = asc_arg
15291 .as_ref()
15292 .map(|a| matches!(a, Expression::Boolean(_)))
15293 .unwrap_or(false);
15294 let is_nf_bool = nulls_first_arg
15295 .as_ref()
15296 .map(|a| matches!(a, Expression::Boolean(_)))
15297 .unwrap_or(false);
15298
15299 // No boolean args: pass through as-is
15300 if !is_asc_bool && !is_nf_bool {
15301 let mut result_args = vec![arr];
15302 if let Some(asc) = asc_arg {
15303 result_args.push(asc);
15304 if let Some(nf) = nulls_first_arg {
15305 result_args.push(nf);
15306 }
15307 }
15308 Ok(Expression::Function(Box::new(Function::new(
15309 "LIST_SORT".to_string(),
15310 result_args,
15311 ))))
15312 } else {
15313 // Has boolean args: convert to DuckDB LIST_SORT format
15314 let descending = matches!(&asc_arg, Some(Expression::Boolean(b)) if !b.value);
15315
15316 // Snowflake defaults: nulls_first = TRUE for DESC, FALSE for ASC
15317 let nulls_are_first = match &nulls_first_arg {
15318 Some(Expression::Boolean(b)) => b.value,
15319 None if is_asc_bool => descending, // Snowflake default
15320 _ => false,
15321 };
15322 let nulls_first_sql = if nulls_are_first {
15323 Some(Expression::string("NULLS FIRST"))
15324 } else {
15325 None
15326 };
15327
15328 if !is_asc_bool {
15329 // asc is non-boolean expression, nulls_first is boolean
15330 let mut result_args = vec![arr];
15331 if let Some(asc) = asc_arg {
15332 result_args.push(asc);
15333 }
15334 if let Some(nf) = nulls_first_sql {
15335 result_args.push(nf);
15336 }
15337 Ok(Expression::Function(Box::new(Function::new(
15338 "LIST_SORT".to_string(),
15339 result_args,
15340 ))))
15341 } else {
15342 if !descending && !nulls_are_first {
15343 // ASC, NULLS LAST (default) -> LIST_SORT(arr)
15344 Ok(Expression::Function(Box::new(
15345 Function::new(
15346 "LIST_SORT".to_string(),
15347 vec![arr],
15348 ),
15349 )))
15350 } else if descending && !nulls_are_first {
15351 // DESC, NULLS LAST -> ARRAY_REVERSE_SORT(arr)
15352 Ok(Expression::Function(Box::new(
15353 Function::new(
15354 "ARRAY_REVERSE_SORT".to_string(),
15355 vec![arr],
15356 ),
15357 )))
15358 } else {
15359 // NULLS FIRST -> LIST_SORT(arr, 'ASC'/'DESC', 'NULLS FIRST')
15360 let order_str =
15361 if descending { "DESC" } else { "ASC" };
15362 Ok(Expression::Function(Box::new(
15363 Function::new(
15364 "LIST_SORT".to_string(),
15365 vec![
15366 arr,
15367 Expression::string(order_str),
15368 Expression::string("NULLS FIRST"),
15369 ],
15370 ),
15371 )))
15372 }
15373 }
15374 }
15375 }
15376 DialectType::DuckDB => {
15377 // Non-Snowflake source: ARRAY_SORT(x, lambda) -> ARRAY_SORT(x) (drop comparator)
15378 let mut args = f.args;
15379 args.truncate(1); // Drop lambda comparator for DuckDB
15380 Ok(Expression::Function(Box::new(Function::new(
15381 "ARRAY_SORT".to_string(),
15382 args,
15383 ))))
15384 }
15385 _ => Ok(Expression::Function(f)),
15386 }
15387 }
15388 // SORT_ARRAY(x) -> LIST_SORT(x) for DuckDB, ARRAY_SORT(x) for Presto/Trino, keep for Hive/Spark
15389 "SORT_ARRAY" if f.args.len() == 1 => match target {
15390 DialectType::Hive
15391 | DialectType::Spark
15392 | DialectType::Databricks => Ok(Expression::Function(f)),
15393 DialectType::DuckDB => Ok(Expression::Function(Box::new(
15394 Function::new("LIST_SORT".to_string(), f.args),
15395 ))),
15396 _ => Ok(Expression::Function(Box::new(Function::new(
15397 "ARRAY_SORT".to_string(),
15398 f.args,
15399 )))),
15400 },
15401 // SORT_ARRAY(x, FALSE) -> ARRAY_REVERSE_SORT(x) for DuckDB, ARRAY_SORT(x, lambda) for Presto
15402 "SORT_ARRAY" if f.args.len() == 2 => {
15403 let is_desc =
15404 matches!(&f.args[1], Expression::Boolean(b) if !b.value);
15405 if is_desc {
15406 match target {
15407 DialectType::DuckDB => {
15408 Ok(Expression::Function(Box::new(Function::new(
15409 "ARRAY_REVERSE_SORT".to_string(),
15410 vec![f.args.into_iter().next().unwrap()],
15411 ))))
15412 }
15413 DialectType::Presto | DialectType::Trino => {
15414 let arr_arg = f.args.into_iter().next().unwrap();
15415 let a = Expression::Column(Box::new(
15416 crate::expressions::Column {
15417 name: crate::expressions::Identifier::new("a"),
15418 table: None,
15419 join_mark: false,
15420 trailing_comments: Vec::new(),
15421 span: None,
15422 inferred_type: None,
15423 },
15424 ));
15425 let b = Expression::Column(Box::new(
15426 crate::expressions::Column {
15427 name: crate::expressions::Identifier::new("b"),
15428 table: None,
15429 join_mark: false,
15430 trailing_comments: Vec::new(),
15431 span: None,
15432 inferred_type: None,
15433 },
15434 ));
15435 let case_expr = Expression::Case(Box::new(
15436 crate::expressions::Case {
15437 operand: None,
15438 whens: vec![
15439 (
15440 Expression::Lt(Box::new(
15441 BinaryOp::new(a.clone(), b.clone()),
15442 )),
15443 Expression::Literal(Box::new(
15444 Literal::Number("1".to_string()),
15445 )),
15446 ),
15447 (
15448 Expression::Gt(Box::new(
15449 BinaryOp::new(a.clone(), b.clone()),
15450 )),
15451 Expression::Literal(Box::new(
15452 Literal::Number("-1".to_string()),
15453 )),
15454 ),
15455 ],
15456 else_: Some(Expression::Literal(Box::new(
15457 Literal::Number("0".to_string()),
15458 ))),
15459 comments: Vec::new(),
15460 inferred_type: None,
15461 },
15462 ));
15463 let lambda = Expression::Lambda(Box::new(
15464 crate::expressions::LambdaExpr {
15465 parameters: vec![
15466 crate::expressions::Identifier::new("a"),
15467 crate::expressions::Identifier::new("b"),
15468 ],
15469 body: case_expr,
15470 colon: false,
15471 parameter_types: Vec::new(),
15472 },
15473 ));
15474 Ok(Expression::Function(Box::new(Function::new(
15475 "ARRAY_SORT".to_string(),
15476 vec![arr_arg, lambda],
15477 ))))
15478 }
15479 _ => Ok(Expression::Function(f)),
15480 }
15481 } else {
15482 // SORT_ARRAY(x, TRUE) -> LIST_SORT(x) for DuckDB, ARRAY_SORT(x) for others
15483 match target {
15484 DialectType::Hive => Ok(Expression::Function(f)),
15485 DialectType::DuckDB => {
15486 Ok(Expression::Function(Box::new(Function::new(
15487 "LIST_SORT".to_string(),
15488 vec![f.args.into_iter().next().unwrap()],
15489 ))))
15490 }
15491 _ => Ok(Expression::Function(Box::new(Function::new(
15492 "ARRAY_SORT".to_string(),
15493 vec![f.args.into_iter().next().unwrap()],
15494 )))),
15495 }
15496 }
15497 }
15498 // LEFT(x, n), RIGHT(x, n) -> SUBSTRING for targets without LEFT/RIGHT
15499 "LEFT" if f.args.len() == 2 => {
15500 match target {
15501 DialectType::Hive
15502 | DialectType::Presto
15503 | DialectType::Trino
15504 | DialectType::Athena => {
15505 let x = f.args[0].clone();
15506 let n = f.args[1].clone();
15507 Ok(Expression::Function(Box::new(Function::new(
15508 "SUBSTRING".to_string(),
15509 vec![x, Expression::number(1), n],
15510 ))))
15511 }
15512 DialectType::Spark | DialectType::Databricks
15513 if matches!(
15514 source,
15515 DialectType::TSQL | DialectType::Fabric
15516 ) =>
15517 {
15518 // TSQL LEFT(x, n) -> LEFT(CAST(x AS STRING), n) for Spark
15519 let x = f.args[0].clone();
15520 let n = f.args[1].clone();
15521 let cast_x = Expression::Cast(Box::new(Cast {
15522 this: x,
15523 to: DataType::VarChar {
15524 length: None,
15525 parenthesized_length: false,
15526 },
15527 double_colon_syntax: false,
15528 trailing_comments: Vec::new(),
15529 format: None,
15530 default: None,
15531 inferred_type: None,
15532 }));
15533 Ok(Expression::Function(Box::new(Function::new(
15534 "LEFT".to_string(),
15535 vec![cast_x, n],
15536 ))))
15537 }
15538 _ => Ok(Expression::Function(f)),
15539 }
15540 }
15541 "RIGHT" if f.args.len() == 2 => {
15542 match target {
15543 DialectType::Hive
15544 | DialectType::Presto
15545 | DialectType::Trino
15546 | DialectType::Athena => {
15547 let x = f.args[0].clone();
15548 let n = f.args[1].clone();
15549 // SUBSTRING(x, LENGTH(x) - (n - 1))
15550 let len_x = Expression::Function(Box::new(Function::new(
15551 "LENGTH".to_string(),
15552 vec![x.clone()],
15553 )));
15554 let n_minus_1 = Expression::Sub(Box::new(
15555 crate::expressions::BinaryOp::new(
15556 n,
15557 Expression::number(1),
15558 ),
15559 ));
15560 let n_minus_1_paren = Expression::Paren(Box::new(
15561 crate::expressions::Paren {
15562 this: n_minus_1,
15563 trailing_comments: Vec::new(),
15564 },
15565 ));
15566 let offset = Expression::Sub(Box::new(
15567 crate::expressions::BinaryOp::new(
15568 len_x,
15569 n_minus_1_paren,
15570 ),
15571 ));
15572 Ok(Expression::Function(Box::new(Function::new(
15573 "SUBSTRING".to_string(),
15574 vec![x, offset],
15575 ))))
15576 }
15577 DialectType::Spark | DialectType::Databricks
15578 if matches!(
15579 source,
15580 DialectType::TSQL | DialectType::Fabric
15581 ) =>
15582 {
15583 // TSQL RIGHT(x, n) -> RIGHT(CAST(x AS STRING), n) for Spark
15584 let x = f.args[0].clone();
15585 let n = f.args[1].clone();
15586 let cast_x = Expression::Cast(Box::new(Cast {
15587 this: x,
15588 to: DataType::VarChar {
15589 length: None,
15590 parenthesized_length: false,
15591 },
15592 double_colon_syntax: false,
15593 trailing_comments: Vec::new(),
15594 format: None,
15595 default: None,
15596 inferred_type: None,
15597 }));
15598 Ok(Expression::Function(Box::new(Function::new(
15599 "RIGHT".to_string(),
15600 vec![cast_x, n],
15601 ))))
15602 }
15603 _ => Ok(Expression::Function(f)),
15604 }
15605 }
15606 // MAP_FROM_ARRAYS(keys, vals) -> target-specific map construction
15607 "MAP_FROM_ARRAYS" if f.args.len() == 2 => match target {
15608 DialectType::Snowflake => Ok(Expression::Function(Box::new(
15609 Function::new("OBJECT_CONSTRUCT".to_string(), f.args),
15610 ))),
15611 DialectType::Spark | DialectType::Databricks => {
15612 Ok(Expression::Function(Box::new(Function::new(
15613 "MAP_FROM_ARRAYS".to_string(),
15614 f.args,
15615 ))))
15616 }
15617 _ => Ok(Expression::Function(Box::new(Function::new(
15618 "MAP".to_string(),
15619 f.args,
15620 )))),
15621 },
15622 // LIKE(foo, 'pat') -> foo LIKE 'pat'; LIKE(foo, 'pat', '!') -> foo LIKE 'pat' ESCAPE '!'
15623 // SQLite uses LIKE(pattern, string[, escape]) with args in reverse order
15624 "LIKE" if f.args.len() >= 2 => {
15625 let (this, pattern) = if matches!(source, DialectType::SQLite) {
15626 // SQLite: LIKE(pattern, string) -> string LIKE pattern
15627 (f.args[1].clone(), f.args[0].clone())
15628 } else {
15629 // Standard: LIKE(string, pattern) -> string LIKE pattern
15630 (f.args[0].clone(), f.args[1].clone())
15631 };
15632 let escape = if f.args.len() >= 3 {
15633 Some(f.args[2].clone())
15634 } else {
15635 None
15636 };
15637 Ok(Expression::Like(Box::new(crate::expressions::LikeOp {
15638 left: this,
15639 right: pattern,
15640 escape,
15641 quantifier: None,
15642 inferred_type: None,
15643 })))
15644 }
15645 // ILIKE(foo, 'pat') -> foo ILIKE 'pat'
15646 "ILIKE" if f.args.len() >= 2 => {
15647 let this = f.args[0].clone();
15648 let pattern = f.args[1].clone();
15649 let escape = if f.args.len() >= 3 {
15650 Some(f.args[2].clone())
15651 } else {
15652 None
15653 };
15654 Ok(Expression::ILike(Box::new(crate::expressions::LikeOp {
15655 left: this,
15656 right: pattern,
15657 escape,
15658 quantifier: None,
15659 inferred_type: None,
15660 })))
15661 }
15662 // CHAR(n) -> CHR(n) for non-MySQL/non-TSQL targets
15663 "CHAR" if f.args.len() == 1 => match target {
15664 DialectType::MySQL
15665 | DialectType::SingleStore
15666 | DialectType::TSQL => Ok(Expression::Function(f)),
15667 _ => Ok(Expression::Function(Box::new(Function::new(
15668 "CHR".to_string(),
15669 f.args,
15670 )))),
15671 },
15672 // CONCAT(a, b) -> a || b for PostgreSQL
15673 "CONCAT"
15674 if f.args.len() == 2
15675 && matches!(target, DialectType::PostgreSQL)
15676 && matches!(
15677 source,
15678 DialectType::ClickHouse | DialectType::MySQL
15679 ) =>
15680 {
15681 let mut args = f.args;
15682 let right = args.pop().unwrap();
15683 let left = args.pop().unwrap();
15684 Ok(Expression::DPipe(Box::new(crate::expressions::DPipe {
15685 this: Box::new(left),
15686 expression: Box::new(right),
15687 safe: None,
15688 })))
15689 }
15690 // ARRAY_TO_STRING(arr, delim) -> target-specific
15691 "ARRAY_TO_STRING"
15692 if f.args.len() == 2
15693 && matches!(target, DialectType::DuckDB)
15694 && matches!(source, DialectType::Snowflake) =>
15695 {
15696 let mut args = f.args;
15697 let arr = args.remove(0);
15698 let sep = args.remove(0);
15699 // sep IS NULL
15700 let sep_is_null = Expression::IsNull(Box::new(IsNull {
15701 this: sep.clone(),
15702 not: false,
15703 postfix_form: false,
15704 }));
15705 // COALESCE(CAST(x AS TEXT), '')
15706 let cast_x = Expression::Cast(Box::new(Cast {
15707 this: Expression::Identifier(Identifier::new("x")),
15708 to: DataType::Text,
15709 trailing_comments: Vec::new(),
15710 double_colon_syntax: false,
15711 format: None,
15712 default: None,
15713 inferred_type: None,
15714 }));
15715 let coalesce = Expression::Coalesce(Box::new(
15716 crate::expressions::VarArgFunc {
15717 original_name: None,
15718 expressions: vec![
15719 cast_x,
15720 Expression::Literal(Box::new(Literal::String(
15721 String::new(),
15722 ))),
15723 ],
15724 inferred_type: None,
15725 },
15726 ));
15727 let lambda =
15728 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
15729 parameters: vec![Identifier::new("x")],
15730 body: coalesce,
15731 colon: false,
15732 parameter_types: Vec::new(),
15733 }));
15734 let list_transform = Expression::Function(Box::new(Function::new(
15735 "LIST_TRANSFORM".to_string(),
15736 vec![arr, lambda],
15737 )));
15738 let array_to_string =
15739 Expression::Function(Box::new(Function::new(
15740 "ARRAY_TO_STRING".to_string(),
15741 vec![list_transform, sep],
15742 )));
15743 Ok(Expression::Case(Box::new(Case {
15744 operand: None,
15745 whens: vec![(sep_is_null, Expression::Null(Null))],
15746 else_: Some(array_to_string),
15747 comments: Vec::new(),
15748 inferred_type: None,
15749 })))
15750 }
15751 "ARRAY_TO_STRING" if f.args.len() >= 2 => match target {
15752 DialectType::Presto | DialectType::Trino => {
15753 Ok(Expression::Function(Box::new(Function::new(
15754 "ARRAY_JOIN".to_string(),
15755 f.args,
15756 ))))
15757 }
15758 DialectType::TSQL => Ok(Expression::Function(Box::new(
15759 Function::new("STRING_AGG".to_string(), f.args),
15760 ))),
15761 _ => Ok(Expression::Function(f)),
15762 },
15763 // ARRAY_CONCAT / LIST_CONCAT -> target-specific
15764 "ARRAY_CONCAT" | "LIST_CONCAT" if f.args.len() == 2 => match target {
15765 DialectType::Spark
15766 | DialectType::Databricks
15767 | DialectType::Hive => Ok(Expression::Function(Box::new(
15768 Function::new("CONCAT".to_string(), f.args),
15769 ))),
15770 DialectType::Snowflake => Ok(Expression::Function(Box::new(
15771 Function::new("ARRAY_CAT".to_string(), f.args),
15772 ))),
15773 DialectType::Redshift => Ok(Expression::Function(Box::new(
15774 Function::new("ARRAY_CONCAT".to_string(), f.args),
15775 ))),
15776 DialectType::PostgreSQL => Ok(Expression::Function(Box::new(
15777 Function::new("ARRAY_CAT".to_string(), f.args),
15778 ))),
15779 DialectType::DuckDB => Ok(Expression::Function(Box::new(
15780 Function::new("LIST_CONCAT".to_string(), f.args),
15781 ))),
15782 DialectType::Presto | DialectType::Trino => {
15783 Ok(Expression::Function(Box::new(Function::new(
15784 "CONCAT".to_string(),
15785 f.args,
15786 ))))
15787 }
15788 DialectType::BigQuery => Ok(Expression::Function(Box::new(
15789 Function::new("ARRAY_CONCAT".to_string(), f.args),
15790 ))),
15791 _ => Ok(Expression::Function(f)),
15792 },
15793 // ARRAY_CONTAINS(arr, x) / HAS(arr, x) / CONTAINS(arr, x) normalization
15794 "HAS" if f.args.len() == 2 => match target {
15795 DialectType::Spark
15796 | DialectType::Databricks
15797 | DialectType::Hive => Ok(Expression::Function(Box::new(
15798 Function::new("ARRAY_CONTAINS".to_string(), f.args),
15799 ))),
15800 DialectType::Presto | DialectType::Trino => {
15801 Ok(Expression::Function(Box::new(Function::new(
15802 "CONTAINS".to_string(),
15803 f.args,
15804 ))))
15805 }
15806 _ => Ok(Expression::Function(f)),
15807 },
15808 // NVL(a, b, c, d) -> COALESCE(a, b, c, d) - NVL should keep all args
15809 "NVL" if f.args.len() > 2 => Ok(Expression::Function(Box::new(
15810 Function::new("COALESCE".to_string(), f.args),
15811 ))),
15812 // ISNULL(x) in MySQL -> (x IS NULL)
15813 "ISNULL"
15814 if f.args.len() == 1
15815 && matches!(source, DialectType::MySQL)
15816 && matches!(target, DialectType::MySQL) =>
15817 {
15818 let arg = f.args.into_iter().next().unwrap();
15819 Ok(Expression::Paren(Box::new(crate::expressions::Paren {
15820 this: Expression::IsNull(Box::new(
15821 crate::expressions::IsNull {
15822 this: arg,
15823 not: false,
15824 postfix_form: false,
15825 },
15826 )),
15827 trailing_comments: Vec::new(),
15828 })))
15829 }
15830 // MONTHNAME(x) -> DATE_FORMAT(x, '%M') for MySQL -> MySQL
15831 "MONTHNAME"
15832 if f.args.len() == 1 && matches!(target, DialectType::MySQL) =>
15833 {
15834 let arg = f.args.into_iter().next().unwrap();
15835 Ok(Expression::Function(Box::new(Function::new(
15836 "DATE_FORMAT".to_string(),
15837 vec![arg, Expression::string("%M")],
15838 ))))
15839 }
15840 // ClickHouse splitByString('s', x) -> DuckDB STR_SPLIT(x, 's') / Hive SPLIT(x, CONCAT('\\Q', 's', '\\E'))
15841 "SPLITBYSTRING" if f.args.len() == 2 => {
15842 let sep = f.args[0].clone();
15843 let str_arg = f.args[1].clone();
15844 match target {
15845 DialectType::DuckDB => Ok(Expression::Function(Box::new(
15846 Function::new("STR_SPLIT".to_string(), vec![str_arg, sep]),
15847 ))),
15848 DialectType::Doris => {
15849 Ok(Expression::Function(Box::new(Function::new(
15850 "SPLIT_BY_STRING".to_string(),
15851 vec![str_arg, sep],
15852 ))))
15853 }
15854 DialectType::Hive
15855 | DialectType::Spark
15856 | DialectType::Databricks => {
15857 // SPLIT(x, CONCAT('\\Q', sep, '\\E'))
15858 let escaped =
15859 Expression::Function(Box::new(Function::new(
15860 "CONCAT".to_string(),
15861 vec![
15862 Expression::string("\\Q"),
15863 sep,
15864 Expression::string("\\E"),
15865 ],
15866 )));
15867 Ok(Expression::Function(Box::new(Function::new(
15868 "SPLIT".to_string(),
15869 vec![str_arg, escaped],
15870 ))))
15871 }
15872 _ => Ok(Expression::Function(f)),
15873 }
15874 }
15875 // ClickHouse splitByRegexp('pattern', x) -> DuckDB STR_SPLIT_REGEX(x, 'pattern')
15876 "SPLITBYREGEXP" if f.args.len() == 2 => {
15877 let sep = f.args[0].clone();
15878 let str_arg = f.args[1].clone();
15879 match target {
15880 DialectType::DuckDB => {
15881 Ok(Expression::Function(Box::new(Function::new(
15882 "STR_SPLIT_REGEX".to_string(),
15883 vec![str_arg, sep],
15884 ))))
15885 }
15886 DialectType::Hive
15887 | DialectType::Spark
15888 | DialectType::Databricks => {
15889 Ok(Expression::Function(Box::new(Function::new(
15890 "SPLIT".to_string(),
15891 vec![str_arg, sep],
15892 ))))
15893 }
15894 _ => Ok(Expression::Function(f)),
15895 }
15896 }
15897 // ClickHouse toMonday(x) -> DATE_TRUNC('WEEK', x) / DATE_TRUNC(x, 'WEEK') for Doris
15898 "TOMONDAY" => {
15899 if f.args.len() == 1 {
15900 let arg = f.args.into_iter().next().unwrap();
15901 match target {
15902 DialectType::Doris => {
15903 Ok(Expression::Function(Box::new(Function::new(
15904 "DATE_TRUNC".to_string(),
15905 vec![arg, Expression::string("WEEK")],
15906 ))))
15907 }
15908 _ => Ok(Expression::Function(Box::new(Function::new(
15909 "DATE_TRUNC".to_string(),
15910 vec![Expression::string("WEEK"), arg],
15911 )))),
15912 }
15913 } else {
15914 Ok(Expression::Function(f))
15915 }
15916 }
15917 // COLLECT_LIST with FILTER(WHERE x IS NOT NULL) for targets that need it
15918 "COLLECT_LIST" if f.args.len() == 1 => match target {
15919 DialectType::Spark
15920 | DialectType::Databricks
15921 | DialectType::Hive => Ok(Expression::Function(f)),
15922 _ => Ok(Expression::Function(Box::new(Function::new(
15923 "ARRAY_AGG".to_string(),
15924 f.args,
15925 )))),
15926 },
15927 // TO_CHAR(x) with 1 arg -> CAST(x AS STRING) for Doris
15928 "TO_CHAR"
15929 if f.args.len() == 1 && matches!(target, DialectType::Doris) =>
15930 {
15931 let arg = f.args.into_iter().next().unwrap();
15932 Ok(Expression::Cast(Box::new(crate::expressions::Cast {
15933 this: arg,
15934 to: DataType::Custom {
15935 name: "STRING".to_string(),
15936 },
15937 double_colon_syntax: false,
15938 trailing_comments: Vec::new(),
15939 format: None,
15940 default: None,
15941 inferred_type: None,
15942 })))
15943 }
15944 // DBMS_RANDOM.VALUE() -> RANDOM() for PostgreSQL
15945 "DBMS_RANDOM.VALUE" if f.args.is_empty() => match target {
15946 DialectType::PostgreSQL => Ok(Expression::Function(Box::new(
15947 Function::new("RANDOM".to_string(), vec![]),
15948 ))),
15949 _ => Ok(Expression::Function(f)),
15950 },
15951 // ClickHouse formatDateTime -> target-specific
15952 "FORMATDATETIME" if f.args.len() >= 2 => match target {
15953 DialectType::MySQL => Ok(Expression::Function(Box::new(
15954 Function::new("DATE_FORMAT".to_string(), f.args),
15955 ))),
15956 _ => Ok(Expression::Function(f)),
15957 },
15958 // REPLICATE('x', n) -> REPEAT('x', n) for non-TSQL targets
15959 "REPLICATE" if f.args.len() == 2 => match target {
15960 DialectType::TSQL => Ok(Expression::Function(f)),
15961 _ => Ok(Expression::Function(Box::new(Function::new(
15962 "REPEAT".to_string(),
15963 f.args,
15964 )))),
15965 },
15966 // LEN(x) -> LENGTH(x) for non-TSQL targets
15967 // No CAST needed when arg is already a string literal
15968 "LEN" if f.args.len() == 1 => {
15969 match target {
15970 DialectType::TSQL => Ok(Expression::Function(f)),
15971 DialectType::Spark | DialectType::Databricks => {
15972 let arg = f.args.into_iter().next().unwrap();
15973 // Don't wrap string literals with CAST - they're already strings
15974 let is_string = matches!(
15975 &arg,
15976 Expression::Literal(lit) if matches!(lit.as_ref(), crate::expressions::Literal::String(_))
15977 );
15978 let final_arg = if is_string {
15979 arg
15980 } else {
15981 Expression::Cast(Box::new(Cast {
15982 this: arg,
15983 to: DataType::VarChar {
15984 length: None,
15985 parenthesized_length: false,
15986 },
15987 double_colon_syntax: false,
15988 trailing_comments: Vec::new(),
15989 format: None,
15990 default: None,
15991 inferred_type: None,
15992 }))
15993 };
15994 Ok(Expression::Function(Box::new(Function::new(
15995 "LENGTH".to_string(),
15996 vec![final_arg],
15997 ))))
15998 }
15999 _ => {
16000 let arg = f.args.into_iter().next().unwrap();
16001 Ok(Expression::Function(Box::new(Function::new(
16002 "LENGTH".to_string(),
16003 vec![arg],
16004 ))))
16005 }
16006 }
16007 }
16008 // COUNT_BIG(x) -> COUNT(x) for non-TSQL targets
16009 "COUNT_BIG" if f.args.len() == 1 => match target {
16010 DialectType::TSQL => Ok(Expression::Function(f)),
16011 _ => Ok(Expression::Function(Box::new(Function::new(
16012 "COUNT".to_string(),
16013 f.args,
16014 )))),
16015 },
16016 // DATEFROMPARTS(y, m, d) -> MAKE_DATE(y, m, d) for non-TSQL targets
16017 "DATEFROMPARTS" if f.args.len() == 3 => match target {
16018 DialectType::TSQL => Ok(Expression::Function(f)),
16019 _ => Ok(Expression::Function(Box::new(Function::new(
16020 "MAKE_DATE".to_string(),
16021 f.args,
16022 )))),
16023 },
16024 // REGEXP_LIKE(str, pattern) -> RegexpLike expression (target-specific output)
16025 "REGEXP_LIKE" if f.args.len() >= 2 => {
16026 let str_expr = f.args[0].clone();
16027 let pattern = f.args[1].clone();
16028 let flags = if f.args.len() >= 3 {
16029 Some(f.args[2].clone())
16030 } else {
16031 None
16032 };
16033 match target {
16034 DialectType::DuckDB => {
16035 let mut new_args = vec![str_expr, pattern];
16036 if let Some(fl) = flags {
16037 new_args.push(fl);
16038 }
16039 Ok(Expression::Function(Box::new(Function::new(
16040 "REGEXP_MATCHES".to_string(),
16041 new_args,
16042 ))))
16043 }
16044 _ => Ok(Expression::RegexpLike(Box::new(
16045 crate::expressions::RegexpFunc {
16046 this: str_expr,
16047 pattern,
16048 flags,
16049 },
16050 ))),
16051 }
16052 }
16053 // ClickHouse arrayJoin -> UNNEST for PostgreSQL
16054 "ARRAYJOIN" if f.args.len() == 1 => match target {
16055 DialectType::PostgreSQL => Ok(Expression::Function(Box::new(
16056 Function::new("UNNEST".to_string(), f.args),
16057 ))),
16058 _ => Ok(Expression::Function(f)),
16059 },
16060 // DATETIMEFROMPARTS(y, m, d, h, mi, s, ms) -> MAKE_TIMESTAMP / TIMESTAMP_FROM_PARTS
16061 "DATETIMEFROMPARTS" if f.args.len() == 7 => {
16062 match target {
16063 DialectType::TSQL => Ok(Expression::Function(f)),
16064 DialectType::DuckDB => {
16065 // MAKE_TIMESTAMP(y, m, d, h, mi, s + (ms / 1000.0))
16066 let mut args = f.args;
16067 let ms = args.pop().unwrap();
16068 let s = args.pop().unwrap();
16069 // s + (ms / 1000.0)
16070 let ms_frac = Expression::Div(Box::new(BinaryOp::new(
16071 ms,
16072 Expression::Literal(Box::new(
16073 crate::expressions::Literal::Number(
16074 "1000.0".to_string(),
16075 ),
16076 )),
16077 )));
16078 let s_with_ms = Expression::Add(Box::new(BinaryOp::new(
16079 s,
16080 Expression::Paren(Box::new(Paren {
16081 this: ms_frac,
16082 trailing_comments: vec![],
16083 })),
16084 )));
16085 args.push(s_with_ms);
16086 Ok(Expression::Function(Box::new(Function::new(
16087 "MAKE_TIMESTAMP".to_string(),
16088 args,
16089 ))))
16090 }
16091 DialectType::Snowflake => {
16092 // TIMESTAMP_FROM_PARTS(y, m, d, h, mi, s, ms * 1000000)
16093 let mut args = f.args;
16094 let ms = args.pop().unwrap();
16095 // ms * 1000000
16096 let ns = Expression::Mul(Box::new(BinaryOp::new(
16097 ms,
16098 Expression::number(1000000),
16099 )));
16100 args.push(ns);
16101 Ok(Expression::Function(Box::new(Function::new(
16102 "TIMESTAMP_FROM_PARTS".to_string(),
16103 args,
16104 ))))
16105 }
16106 _ => {
16107 // Default: keep function name for other targets
16108 Ok(Expression::Function(Box::new(Function::new(
16109 "DATETIMEFROMPARTS".to_string(),
16110 f.args,
16111 ))))
16112 }
16113 }
16114 }
16115 // CONVERT(type, expr [, style]) -> CAST(expr AS type) for non-TSQL targets
16116 // TRY_CONVERT(type, expr [, style]) -> TRY_CAST(expr AS type) for non-TSQL targets
16117 "CONVERT" | "TRY_CONVERT" if f.args.len() >= 2 => {
16118 let is_try = name == "TRY_CONVERT";
16119 let type_expr = f.args[0].clone();
16120 let value_expr = f.args[1].clone();
16121 let style = if f.args.len() >= 3 {
16122 Some(&f.args[2])
16123 } else {
16124 None
16125 };
16126
16127 // For TSQL->TSQL, normalize types and preserve CONVERT/TRY_CONVERT
16128 if matches!(target, DialectType::TSQL) {
16129 let normalized_type = match &type_expr {
16130 Expression::DataType(dt) => {
16131 let new_dt = match dt {
16132 DataType::Int { .. } => DataType::Custom {
16133 name: "INTEGER".to_string(),
16134 },
16135 _ => dt.clone(),
16136 };
16137 Expression::DataType(new_dt)
16138 }
16139 Expression::Identifier(id) => {
16140 if id.name.eq_ignore_ascii_case("INT") {
16141 Expression::Identifier(
16142 crate::expressions::Identifier::new("INTEGER"),
16143 )
16144 } else {
16145 let upper = id.name.to_ascii_uppercase();
16146 Expression::Identifier(
16147 crate::expressions::Identifier::new(upper),
16148 )
16149 }
16150 }
16151 Expression::Column(col) => {
16152 if col.name.name.eq_ignore_ascii_case("INT") {
16153 Expression::Identifier(
16154 crate::expressions::Identifier::new("INTEGER"),
16155 )
16156 } else {
16157 let upper = col.name.name.to_ascii_uppercase();
16158 Expression::Identifier(
16159 crate::expressions::Identifier::new(upper),
16160 )
16161 }
16162 }
16163 _ => type_expr.clone(),
16164 };
16165 let func_name = if is_try { "TRY_CONVERT" } else { "CONVERT" };
16166 let mut new_args = vec![normalized_type, value_expr];
16167 if let Some(s) = style {
16168 new_args.push(s.clone());
16169 }
16170 return Ok(Expression::Function(Box::new(Function::new(
16171 func_name.to_string(),
16172 new_args,
16173 ))));
16174 }
16175
16176 // For other targets: CONVERT(type, expr) -> CAST(expr AS type)
16177 fn expr_to_datatype(e: &Expression) -> Option<DataType> {
16178 match e {
16179 Expression::DataType(dt) => {
16180 // Convert NVARCHAR/NCHAR Custom types to standard VarChar/Char
16181 match dt {
16182 DataType::Custom { name }
16183 if name.starts_with("NVARCHAR(")
16184 || name.starts_with("NCHAR(") =>
16185 {
16186 // Extract the length from "NVARCHAR(200)" or "NCHAR(40)"
16187 let inner = &name[name.find('(').unwrap() + 1
16188 ..name.len() - 1];
16189 if inner.eq_ignore_ascii_case("MAX") {
16190 Some(DataType::Text)
16191 } else if let Ok(len) = inner.parse::<u32>() {
16192 if name.starts_with("NCHAR") {
16193 Some(DataType::Char {
16194 length: Some(len),
16195 })
16196 } else {
16197 Some(DataType::VarChar {
16198 length: Some(len),
16199 parenthesized_length: false,
16200 })
16201 }
16202 } else {
16203 Some(dt.clone())
16204 }
16205 }
16206 DataType::Custom { name } if name == "NVARCHAR" => {
16207 Some(DataType::VarChar {
16208 length: None,
16209 parenthesized_length: false,
16210 })
16211 }
16212 DataType::Custom { name } if name == "NCHAR" => {
16213 Some(DataType::Char { length: None })
16214 }
16215 DataType::Custom { name }
16216 if name == "NVARCHAR(MAX)"
16217 || name == "VARCHAR(MAX)" =>
16218 {
16219 Some(DataType::Text)
16220 }
16221 _ => Some(dt.clone()),
16222 }
16223 }
16224 Expression::Identifier(id) => {
16225 let name = id.name.to_ascii_uppercase();
16226 match name.as_str() {
16227 "INT" | "INTEGER" => Some(DataType::Int {
16228 length: None,
16229 integer_spelling: false,
16230 }),
16231 "BIGINT" => Some(DataType::BigInt { length: None }),
16232 "SMALLINT" => {
16233 Some(DataType::SmallInt { length: None })
16234 }
16235 "TINYINT" => {
16236 Some(DataType::TinyInt { length: None })
16237 }
16238 "FLOAT" => Some(DataType::Float {
16239 precision: None,
16240 scale: None,
16241 real_spelling: false,
16242 }),
16243 "REAL" => Some(DataType::Float {
16244 precision: None,
16245 scale: None,
16246 real_spelling: true,
16247 }),
16248 "DATETIME" | "DATETIME2" => {
16249 Some(DataType::Timestamp {
16250 timezone: false,
16251 precision: None,
16252 })
16253 }
16254 "DATE" => Some(DataType::Date),
16255 "BIT" => Some(DataType::Boolean),
16256 "TEXT" => Some(DataType::Text),
16257 "NUMERIC" => Some(DataType::Decimal {
16258 precision: None,
16259 scale: None,
16260 }),
16261 "MONEY" => Some(DataType::Decimal {
16262 precision: Some(15),
16263 scale: Some(4),
16264 }),
16265 "SMALLMONEY" => Some(DataType::Decimal {
16266 precision: Some(6),
16267 scale: Some(4),
16268 }),
16269 "VARCHAR" => Some(DataType::VarChar {
16270 length: None,
16271 parenthesized_length: false,
16272 }),
16273 "NVARCHAR" => Some(DataType::VarChar {
16274 length: None,
16275 parenthesized_length: false,
16276 }),
16277 "CHAR" => Some(DataType::Char { length: None }),
16278 "NCHAR" => Some(DataType::Char { length: None }),
16279 _ => Some(DataType::Custom { name }),
16280 }
16281 }
16282 Expression::Column(col) => {
16283 let name = col.name.name.to_ascii_uppercase();
16284 match name.as_str() {
16285 "INT" | "INTEGER" => Some(DataType::Int {
16286 length: None,
16287 integer_spelling: false,
16288 }),
16289 "BIGINT" => Some(DataType::BigInt { length: None }),
16290 "FLOAT" => Some(DataType::Float {
16291 precision: None,
16292 scale: None,
16293 real_spelling: false,
16294 }),
16295 "DATETIME" | "DATETIME2" => {
16296 Some(DataType::Timestamp {
16297 timezone: false,
16298 precision: None,
16299 })
16300 }
16301 "DATE" => Some(DataType::Date),
16302 "NUMERIC" => Some(DataType::Decimal {
16303 precision: None,
16304 scale: None,
16305 }),
16306 "VARCHAR" => Some(DataType::VarChar {
16307 length: None,
16308 parenthesized_length: false,
16309 }),
16310 "NVARCHAR" => Some(DataType::VarChar {
16311 length: None,
16312 parenthesized_length: false,
16313 }),
16314 "CHAR" => Some(DataType::Char { length: None }),
16315 "NCHAR" => Some(DataType::Char { length: None }),
16316 _ => Some(DataType::Custom { name }),
16317 }
16318 }
16319 // NVARCHAR(200) parsed as Function("NVARCHAR", [200])
16320 Expression::Function(f) => {
16321 let fname = f.name.to_ascii_uppercase();
16322 match fname.as_str() {
16323 "VARCHAR" | "NVARCHAR" => {
16324 let len = f.args.first().and_then(|a| {
16325 if let Expression::Literal(lit) = a
16326 {
16327 if let crate::expressions::Literal::Number(n) = lit.as_ref() {
16328 n.parse::<u32>().ok()
16329 } else { None }
16330 } else if let Expression::Identifier(id) = a
16331 {
16332 if id.name.eq_ignore_ascii_case("MAX") {
16333 None
16334 } else {
16335 None
16336 }
16337 } else {
16338 None
16339 }
16340 });
16341 // Check for VARCHAR(MAX) -> TEXT
16342 let is_max = f.args.first().map_or(false, |a| {
16343 matches!(a, Expression::Identifier(id) if id.name.eq_ignore_ascii_case("MAX"))
16344 || matches!(a, Expression::Column(col) if col.name.name.eq_ignore_ascii_case("MAX"))
16345 });
16346 if is_max {
16347 Some(DataType::Text)
16348 } else {
16349 Some(DataType::VarChar {
16350 length: len,
16351 parenthesized_length: false,
16352 })
16353 }
16354 }
16355 "NCHAR" | "CHAR" => {
16356 let len = f.args.first().and_then(|a| {
16357 if let Expression::Literal(lit) = a
16358 {
16359 if let crate::expressions::Literal::Number(n) = lit.as_ref() {
16360 n.parse::<u32>().ok()
16361 } else { None }
16362 } else {
16363 None
16364 }
16365 });
16366 Some(DataType::Char { length: len })
16367 }
16368 "NUMERIC" | "DECIMAL" => {
16369 let precision = f.args.first().and_then(|a| {
16370 if let Expression::Literal(lit) = a
16371 {
16372 if let crate::expressions::Literal::Number(n) = lit.as_ref() {
16373 n.parse::<u32>().ok()
16374 } else { None }
16375 } else {
16376 None
16377 }
16378 });
16379 let scale = f.args.get(1).and_then(|a| {
16380 if let Expression::Literal(lit) = a
16381 {
16382 if let crate::expressions::Literal::Number(n) = lit.as_ref() {
16383 n.parse::<u32>().ok()
16384 } else { None }
16385 } else {
16386 None
16387 }
16388 });
16389 Some(DataType::Decimal { precision, scale })
16390 }
16391 _ => None,
16392 }
16393 }
16394 _ => None,
16395 }
16396 }
16397
16398 if let Some(mut dt) = expr_to_datatype(&type_expr) {
16399 // For TSQL source: VARCHAR/CHAR without length defaults to 30
16400 let is_tsql_source =
16401 matches!(source, DialectType::TSQL | DialectType::Fabric);
16402 if is_tsql_source {
16403 match &dt {
16404 DataType::VarChar { length: None, .. } => {
16405 dt = DataType::VarChar {
16406 length: Some(30),
16407 parenthesized_length: false,
16408 };
16409 }
16410 DataType::Char { length: None } => {
16411 dt = DataType::Char { length: Some(30) };
16412 }
16413 _ => {}
16414 }
16415 }
16416
16417 // Determine if this is a string type
16418 let is_string_type = matches!(
16419 dt,
16420 DataType::VarChar { .. }
16421 | DataType::Char { .. }
16422 | DataType::Text
16423 ) || matches!(&dt, DataType::Custom { name } if name == "NVARCHAR" || name == "NCHAR"
16424 || name.starts_with("NVARCHAR(") || name.starts_with("NCHAR(")
16425 || name.starts_with("VARCHAR(") || name == "VARCHAR"
16426 || name == "STRING");
16427
16428 // Determine if this is a date/time type
16429 let is_datetime_type = matches!(
16430 dt,
16431 DataType::Timestamp { .. } | DataType::Date
16432 ) || matches!(&dt, DataType::Custom { name } if name == "DATETIME"
16433 || name == "DATETIME2" || name == "SMALLDATETIME");
16434
16435 // Check for date conversion with style
16436 if style.is_some() {
16437 let style_num = style.and_then(|s| {
16438 if let Expression::Literal(lit) = s {
16439 if let crate::expressions::Literal::Number(n) =
16440 lit.as_ref()
16441 {
16442 n.parse::<u32>().ok()
16443 } else {
16444 None
16445 }
16446 } else {
16447 None
16448 }
16449 });
16450
16451 // TSQL CONVERT date styles (Java format)
16452 let format_str = style_num.and_then(|n| match n {
16453 101 => Some("MM/dd/yyyy"),
16454 102 => Some("yyyy.MM.dd"),
16455 103 => Some("dd/MM/yyyy"),
16456 104 => Some("dd.MM.yyyy"),
16457 105 => Some("dd-MM-yyyy"),
16458 108 => Some("HH:mm:ss"),
16459 110 => Some("MM-dd-yyyy"),
16460 112 => Some("yyyyMMdd"),
16461 120 | 20 => Some("yyyy-MM-dd HH:mm:ss"),
16462 121 | 21 => Some("yyyy-MM-dd HH:mm:ss.SSSSSS"),
16463 126 | 127 => Some("yyyy-MM-dd'T'HH:mm:ss.SSS"),
16464 _ => None,
16465 });
16466
16467 // Non-string, non-datetime types with style: just CAST, ignore the style
16468 if !is_string_type && !is_datetime_type {
16469 let cast_expr = if is_try {
16470 Expression::TryCast(Box::new(
16471 crate::expressions::Cast {
16472 this: value_expr,
16473 to: dt,
16474 trailing_comments: Vec::new(),
16475 double_colon_syntax: false,
16476 format: None,
16477 default: None,
16478 inferred_type: None,
16479 },
16480 ))
16481 } else {
16482 Expression::Cast(Box::new(
16483 crate::expressions::Cast {
16484 this: value_expr,
16485 to: dt,
16486 trailing_comments: Vec::new(),
16487 double_colon_syntax: false,
16488 format: None,
16489 default: None,
16490 inferred_type: None,
16491 },
16492 ))
16493 };
16494 return Ok(cast_expr);
16495 }
16496
16497 if let Some(java_fmt) = format_str {
16498 let c_fmt = java_fmt
16499 .replace("yyyy", "%Y")
16500 .replace("MM", "%m")
16501 .replace("dd", "%d")
16502 .replace("HH", "%H")
16503 .replace("mm", "%M")
16504 .replace("ss", "%S")
16505 .replace("SSSSSS", "%f")
16506 .replace("SSS", "%f")
16507 .replace("'T'", "T");
16508
16509 // For datetime target types: style is the INPUT format for parsing strings -> dates
16510 if is_datetime_type {
16511 match target {
16512 DialectType::DuckDB => {
16513 return Ok(Expression::Function(Box::new(
16514 Function::new(
16515 "STRPTIME".to_string(),
16516 vec![
16517 value_expr,
16518 Expression::string(&c_fmt),
16519 ],
16520 ),
16521 )));
16522 }
16523 DialectType::Spark
16524 | DialectType::Databricks => {
16525 // CONVERT(DATETIME, x, style) -> TO_TIMESTAMP(x, fmt)
16526 // CONVERT(DATE, x, style) -> TO_DATE(x, fmt)
16527 let func_name =
16528 if matches!(dt, DataType::Date) {
16529 "TO_DATE"
16530 } else {
16531 "TO_TIMESTAMP"
16532 };
16533 return Ok(Expression::Function(Box::new(
16534 Function::new(
16535 func_name.to_string(),
16536 vec![
16537 value_expr,
16538 Expression::string(java_fmt),
16539 ],
16540 ),
16541 )));
16542 }
16543 DialectType::Hive => {
16544 return Ok(Expression::Function(Box::new(
16545 Function::new(
16546 "TO_TIMESTAMP".to_string(),
16547 vec![
16548 value_expr,
16549 Expression::string(java_fmt),
16550 ],
16551 ),
16552 )));
16553 }
16554 _ => {
16555 return Ok(Expression::Cast(Box::new(
16556 crate::expressions::Cast {
16557 this: value_expr,
16558 to: dt,
16559 trailing_comments: Vec::new(),
16560 double_colon_syntax: false,
16561 format: None,
16562 default: None,
16563 inferred_type: None,
16564 },
16565 )));
16566 }
16567 }
16568 }
16569
16570 // For string target types: style is the OUTPUT format for dates -> strings
16571 match target {
16572 DialectType::DuckDB => Ok(Expression::Function(
16573 Box::new(Function::new(
16574 "STRPTIME".to_string(),
16575 vec![
16576 value_expr,
16577 Expression::string(&c_fmt),
16578 ],
16579 )),
16580 )),
16581 DialectType::Spark | DialectType::Databricks => {
16582 // For string target types with style: CAST(DATE_FORMAT(x, fmt) AS type)
16583 // Determine the target string type
16584 let string_dt = match &dt {
16585 DataType::VarChar {
16586 length: Some(l),
16587 ..
16588 } => DataType::VarChar {
16589 length: Some(*l),
16590 parenthesized_length: false,
16591 },
16592 DataType::Text => DataType::Custom {
16593 name: "STRING".to_string(),
16594 },
16595 _ => DataType::Custom {
16596 name: "STRING".to_string(),
16597 },
16598 };
16599 let date_format_expr = Expression::Function(
16600 Box::new(Function::new(
16601 "DATE_FORMAT".to_string(),
16602 vec![
16603 value_expr,
16604 Expression::string(java_fmt),
16605 ],
16606 )),
16607 );
16608 let cast_expr = if is_try {
16609 Expression::TryCast(Box::new(
16610 crate::expressions::Cast {
16611 this: date_format_expr,
16612 to: string_dt,
16613 trailing_comments: Vec::new(),
16614 double_colon_syntax: false,
16615 format: None,
16616 default: None,
16617 inferred_type: None,
16618 },
16619 ))
16620 } else {
16621 Expression::Cast(Box::new(
16622 crate::expressions::Cast {
16623 this: date_format_expr,
16624 to: string_dt,
16625 trailing_comments: Vec::new(),
16626 double_colon_syntax: false,
16627 format: None,
16628 default: None,
16629 inferred_type: None,
16630 },
16631 ))
16632 };
16633 Ok(cast_expr)
16634 }
16635 DialectType::MySQL | DialectType::SingleStore => {
16636 // For MySQL: CAST(DATE_FORMAT(x, mysql_fmt) AS CHAR(n))
16637 let mysql_fmt = java_fmt
16638 .replace("yyyy", "%Y")
16639 .replace("MM", "%m")
16640 .replace("dd", "%d")
16641 .replace("HH:mm:ss.SSSSSS", "%T")
16642 .replace("HH:mm:ss", "%T")
16643 .replace("HH", "%H")
16644 .replace("mm", "%i")
16645 .replace("ss", "%S");
16646 let date_format_expr = Expression::Function(
16647 Box::new(Function::new(
16648 "DATE_FORMAT".to_string(),
16649 vec![
16650 value_expr,
16651 Expression::string(&mysql_fmt),
16652 ],
16653 )),
16654 );
16655 // MySQL uses CHAR for string casts
16656 let mysql_dt = match &dt {
16657 DataType::VarChar { length, .. } => {
16658 DataType::Char { length: *length }
16659 }
16660 _ => dt,
16661 };
16662 Ok(Expression::Cast(Box::new(
16663 crate::expressions::Cast {
16664 this: date_format_expr,
16665 to: mysql_dt,
16666 trailing_comments: Vec::new(),
16667 double_colon_syntax: false,
16668 format: None,
16669 default: None,
16670 inferred_type: None,
16671 },
16672 )))
16673 }
16674 DialectType::Hive => {
16675 let func_name = "TO_TIMESTAMP";
16676 Ok(Expression::Function(Box::new(
16677 Function::new(
16678 func_name.to_string(),
16679 vec![
16680 value_expr,
16681 Expression::string(java_fmt),
16682 ],
16683 ),
16684 )))
16685 }
16686 _ => Ok(Expression::Cast(Box::new(
16687 crate::expressions::Cast {
16688 this: value_expr,
16689 to: dt,
16690 trailing_comments: Vec::new(),
16691 double_colon_syntax: false,
16692 format: None,
16693 default: None,
16694 inferred_type: None,
16695 },
16696 ))),
16697 }
16698 } else {
16699 // Unknown style, just CAST
16700 let cast_expr = if is_try {
16701 Expression::TryCast(Box::new(
16702 crate::expressions::Cast {
16703 this: value_expr,
16704 to: dt,
16705 trailing_comments: Vec::new(),
16706 double_colon_syntax: false,
16707 format: None,
16708 default: None,
16709 inferred_type: None,
16710 },
16711 ))
16712 } else {
16713 Expression::Cast(Box::new(
16714 crate::expressions::Cast {
16715 this: value_expr,
16716 to: dt,
16717 trailing_comments: Vec::new(),
16718 double_colon_syntax: false,
16719 format: None,
16720 default: None,
16721 inferred_type: None,
16722 },
16723 ))
16724 };
16725 Ok(cast_expr)
16726 }
16727 } else {
16728 // No style - simple CAST
16729 let final_dt = if matches!(
16730 target,
16731 DialectType::MySQL | DialectType::SingleStore
16732 ) {
16733 match &dt {
16734 DataType::Int { .. }
16735 | DataType::BigInt { .. }
16736 | DataType::SmallInt { .. }
16737 | DataType::TinyInt { .. } => DataType::Custom {
16738 name: "SIGNED".to_string(),
16739 },
16740 DataType::VarChar { length, .. } => {
16741 DataType::Char { length: *length }
16742 }
16743 _ => dt,
16744 }
16745 } else {
16746 dt
16747 };
16748 let cast_expr = if is_try {
16749 Expression::TryCast(Box::new(
16750 crate::expressions::Cast {
16751 this: value_expr,
16752 to: final_dt,
16753 trailing_comments: Vec::new(),
16754 double_colon_syntax: false,
16755 format: None,
16756 default: None,
16757 inferred_type: None,
16758 },
16759 ))
16760 } else {
16761 Expression::Cast(Box::new(crate::expressions::Cast {
16762 this: value_expr,
16763 to: final_dt,
16764 trailing_comments: Vec::new(),
16765 double_colon_syntax: false,
16766 format: None,
16767 default: None,
16768 inferred_type: None,
16769 }))
16770 };
16771 Ok(cast_expr)
16772 }
16773 } else {
16774 // Can't convert type expression - keep as CONVERT/TRY_CONVERT function
16775 Ok(Expression::Function(f))
16776 }
16777 }
16778 // STRFTIME(val, fmt) from DuckDB / STRFTIME(fmt, val) from SQLite -> target-specific
16779 "STRFTIME" if f.args.len() == 2 => {
16780 // SQLite uses STRFTIME(fmt, val); DuckDB uses STRFTIME(val, fmt)
16781 let (val, fmt_expr) = if matches!(source, DialectType::SQLite) {
16782 // SQLite: args[0] = format, args[1] = value
16783 (f.args[1].clone(), &f.args[0])
16784 } else {
16785 // DuckDB and others: args[0] = value, args[1] = format
16786 (f.args[0].clone(), &f.args[1])
16787 };
16788
16789 // Helper to convert C-style format to Java-style
16790 fn c_to_java_format(fmt: &str) -> String {
16791 fmt.replace("%Y", "yyyy")
16792 .replace("%m", "MM")
16793 .replace("%d", "dd")
16794 .replace("%H", "HH")
16795 .replace("%M", "mm")
16796 .replace("%S", "ss")
16797 .replace("%f", "SSSSSS")
16798 .replace("%y", "yy")
16799 .replace("%-m", "M")
16800 .replace("%-d", "d")
16801 .replace("%-H", "H")
16802 .replace("%-I", "h")
16803 .replace("%I", "hh")
16804 .replace("%p", "a")
16805 .replace("%j", "DDD")
16806 .replace("%a", "EEE")
16807 .replace("%b", "MMM")
16808 .replace("%F", "yyyy-MM-dd")
16809 .replace("%T", "HH:mm:ss")
16810 }
16811
16812 // Helper: recursively convert format strings within expressions (handles CONCAT)
16813 fn convert_fmt_expr(
16814 expr: &Expression,
16815 converter: &dyn Fn(&str) -> String,
16816 ) -> Expression {
16817 match expr {
16818 Expression::Literal(lit)
16819 if matches!(
16820 lit.as_ref(),
16821 crate::expressions::Literal::String(_)
16822 ) =>
16823 {
16824 let crate::expressions::Literal::String(s) =
16825 lit.as_ref()
16826 else {
16827 unreachable!()
16828 };
16829 Expression::string(&converter(s))
16830 }
16831 Expression::Function(func)
16832 if func.name.eq_ignore_ascii_case("CONCAT") =>
16833 {
16834 let new_args: Vec<Expression> = func
16835 .args
16836 .iter()
16837 .map(|a| convert_fmt_expr(a, converter))
16838 .collect();
16839 Expression::Function(Box::new(Function::new(
16840 "CONCAT".to_string(),
16841 new_args,
16842 )))
16843 }
16844 other => other.clone(),
16845 }
16846 }
16847
16848 match target {
16849 DialectType::DuckDB => {
16850 if matches!(source, DialectType::SQLite) {
16851 // SQLite STRFTIME(fmt, val) -> DuckDB STRFTIME(CAST(val AS TIMESTAMP), fmt)
16852 let cast_val = Expression::Cast(Box::new(Cast {
16853 this: val,
16854 to: crate::expressions::DataType::Timestamp {
16855 precision: None,
16856 timezone: false,
16857 },
16858 trailing_comments: Vec::new(),
16859 double_colon_syntax: false,
16860 format: None,
16861 default: None,
16862 inferred_type: None,
16863 }));
16864 Ok(Expression::Function(Box::new(Function::new(
16865 "STRFTIME".to_string(),
16866 vec![cast_val, fmt_expr.clone()],
16867 ))))
16868 } else {
16869 Ok(Expression::Function(f))
16870 }
16871 }
16872 DialectType::Spark
16873 | DialectType::Databricks
16874 | DialectType::Hive => {
16875 // STRFTIME(val, fmt) -> DATE_FORMAT(val, java_fmt)
16876 let converted_fmt =
16877 convert_fmt_expr(fmt_expr, &c_to_java_format);
16878 Ok(Expression::Function(Box::new(Function::new(
16879 "DATE_FORMAT".to_string(),
16880 vec![val, converted_fmt],
16881 ))))
16882 }
16883 DialectType::TSQL | DialectType::Fabric => {
16884 // STRFTIME(val, fmt) -> FORMAT(val, java_fmt)
16885 let converted_fmt =
16886 convert_fmt_expr(fmt_expr, &c_to_java_format);
16887 Ok(Expression::Function(Box::new(Function::new(
16888 "FORMAT".to_string(),
16889 vec![val, converted_fmt],
16890 ))))
16891 }
16892 DialectType::Presto
16893 | DialectType::Trino
16894 | DialectType::Athena => {
16895 // STRFTIME(val, fmt) -> DATE_FORMAT(val, presto_fmt) (convert DuckDB format to Presto)
16896 if let Expression::Literal(lit) = fmt_expr {
16897 if let crate::expressions::Literal::String(s) =
16898 lit.as_ref()
16899 {
16900 let presto_fmt = duckdb_to_presto_format(s);
16901 Ok(Expression::Function(Box::new(Function::new(
16902 "DATE_FORMAT".to_string(),
16903 vec![val, Expression::string(&presto_fmt)],
16904 ))))
16905 } else {
16906 Ok(Expression::Function(Box::new(Function::new(
16907 "DATE_FORMAT".to_string(),
16908 vec![val, fmt_expr.clone()],
16909 ))))
16910 }
16911 } else {
16912 Ok(Expression::Function(Box::new(Function::new(
16913 "DATE_FORMAT".to_string(),
16914 vec![val, fmt_expr.clone()],
16915 ))))
16916 }
16917 }
16918 DialectType::BigQuery => {
16919 // STRFTIME(val, fmt) -> FORMAT_DATE(bq_fmt, val) - note reversed arg order
16920 if let Expression::Literal(lit) = fmt_expr {
16921 if let crate::expressions::Literal::String(s) =
16922 lit.as_ref()
16923 {
16924 let bq_fmt = duckdb_to_bigquery_format(s);
16925 Ok(Expression::Function(Box::new(Function::new(
16926 "FORMAT_DATE".to_string(),
16927 vec![Expression::string(&bq_fmt), val],
16928 ))))
16929 } else {
16930 Ok(Expression::Function(Box::new(Function::new(
16931 "FORMAT_DATE".to_string(),
16932 vec![fmt_expr.clone(), val],
16933 ))))
16934 }
16935 } else {
16936 Ok(Expression::Function(Box::new(Function::new(
16937 "FORMAT_DATE".to_string(),
16938 vec![fmt_expr.clone(), val],
16939 ))))
16940 }
16941 }
16942 DialectType::PostgreSQL | DialectType::Redshift => {
16943 // STRFTIME(val, fmt) -> TO_CHAR(val, pg_fmt)
16944 if let Expression::Literal(lit) = fmt_expr {
16945 if let crate::expressions::Literal::String(s) =
16946 lit.as_ref()
16947 {
16948 let pg_fmt = s
16949 .replace("%Y", "YYYY")
16950 .replace("%m", "MM")
16951 .replace("%d", "DD")
16952 .replace("%H", "HH24")
16953 .replace("%M", "MI")
16954 .replace("%S", "SS")
16955 .replace("%y", "YY")
16956 .replace("%-m", "FMMM")
16957 .replace("%-d", "FMDD")
16958 .replace("%-H", "FMHH24")
16959 .replace("%-I", "FMHH12")
16960 .replace("%p", "AM")
16961 .replace("%F", "YYYY-MM-DD")
16962 .replace("%T", "HH24:MI:SS");
16963 Ok(Expression::Function(Box::new(Function::new(
16964 "TO_CHAR".to_string(),
16965 vec![val, Expression::string(&pg_fmt)],
16966 ))))
16967 } else {
16968 Ok(Expression::Function(Box::new(Function::new(
16969 "TO_CHAR".to_string(),
16970 vec![val, fmt_expr.clone()],
16971 ))))
16972 }
16973 } else {
16974 Ok(Expression::Function(Box::new(Function::new(
16975 "TO_CHAR".to_string(),
16976 vec![val, fmt_expr.clone()],
16977 ))))
16978 }
16979 }
16980 _ => Ok(Expression::Function(f)),
16981 }
16982 }
16983 // STRPTIME(val, fmt) from DuckDB -> target-specific date parse function
16984 "STRPTIME" if f.args.len() == 2 => {
16985 let val = f.args[0].clone();
16986 let fmt_expr = &f.args[1];
16987
16988 fn c_to_java_format_parse(fmt: &str) -> String {
16989 fmt.replace("%Y", "yyyy")
16990 .replace("%m", "MM")
16991 .replace("%d", "dd")
16992 .replace("%H", "HH")
16993 .replace("%M", "mm")
16994 .replace("%S", "ss")
16995 .replace("%f", "SSSSSS")
16996 .replace("%y", "yy")
16997 .replace("%-m", "M")
16998 .replace("%-d", "d")
16999 .replace("%-H", "H")
17000 .replace("%-I", "h")
17001 .replace("%I", "hh")
17002 .replace("%p", "a")
17003 .replace("%F", "yyyy-MM-dd")
17004 .replace("%T", "HH:mm:ss")
17005 }
17006
17007 match target {
17008 DialectType::DuckDB => Ok(Expression::Function(f)),
17009 DialectType::Spark | DialectType::Databricks => {
17010 // STRPTIME(val, fmt) -> TO_TIMESTAMP(val, java_fmt)
17011 if let Expression::Literal(lit) = fmt_expr {
17012 if let crate::expressions::Literal::String(s) =
17013 lit.as_ref()
17014 {
17015 let java_fmt = c_to_java_format_parse(s);
17016 Ok(Expression::Function(Box::new(Function::new(
17017 "TO_TIMESTAMP".to_string(),
17018 vec![val, Expression::string(&java_fmt)],
17019 ))))
17020 } else {
17021 Ok(Expression::Function(Box::new(Function::new(
17022 "TO_TIMESTAMP".to_string(),
17023 vec![val, fmt_expr.clone()],
17024 ))))
17025 }
17026 } else {
17027 Ok(Expression::Function(Box::new(Function::new(
17028 "TO_TIMESTAMP".to_string(),
17029 vec![val, fmt_expr.clone()],
17030 ))))
17031 }
17032 }
17033 DialectType::Hive => {
17034 // STRPTIME(val, fmt) -> CAST(FROM_UNIXTIME(UNIX_TIMESTAMP(val, java_fmt)) AS TIMESTAMP)
17035 if let Expression::Literal(lit) = fmt_expr {
17036 if let crate::expressions::Literal::String(s) =
17037 lit.as_ref()
17038 {
17039 let java_fmt = c_to_java_format_parse(s);
17040 let unix_ts =
17041 Expression::Function(Box::new(Function::new(
17042 "UNIX_TIMESTAMP".to_string(),
17043 vec![val, Expression::string(&java_fmt)],
17044 )));
17045 let from_unix =
17046 Expression::Function(Box::new(Function::new(
17047 "FROM_UNIXTIME".to_string(),
17048 vec![unix_ts],
17049 )));
17050 Ok(Expression::Cast(Box::new(
17051 crate::expressions::Cast {
17052 this: from_unix,
17053 to: DataType::Timestamp {
17054 timezone: false,
17055 precision: None,
17056 },
17057 trailing_comments: Vec::new(),
17058 double_colon_syntax: false,
17059 format: None,
17060 default: None,
17061 inferred_type: None,
17062 },
17063 )))
17064 } else {
17065 Ok(Expression::Function(f))
17066 }
17067 } else {
17068 Ok(Expression::Function(f))
17069 }
17070 }
17071 DialectType::Presto
17072 | DialectType::Trino
17073 | DialectType::Athena => {
17074 // STRPTIME(val, fmt) -> DATE_PARSE(val, presto_fmt) (convert DuckDB format to Presto)
17075 if let Expression::Literal(lit) = fmt_expr {
17076 if let crate::expressions::Literal::String(s) =
17077 lit.as_ref()
17078 {
17079 let presto_fmt = duckdb_to_presto_format(s);
17080 Ok(Expression::Function(Box::new(Function::new(
17081 "DATE_PARSE".to_string(),
17082 vec![val, Expression::string(&presto_fmt)],
17083 ))))
17084 } else {
17085 Ok(Expression::Function(Box::new(Function::new(
17086 "DATE_PARSE".to_string(),
17087 vec![val, fmt_expr.clone()],
17088 ))))
17089 }
17090 } else {
17091 Ok(Expression::Function(Box::new(Function::new(
17092 "DATE_PARSE".to_string(),
17093 vec![val, fmt_expr.clone()],
17094 ))))
17095 }
17096 }
17097 DialectType::BigQuery => {
17098 // STRPTIME(val, fmt) -> PARSE_TIMESTAMP(bq_fmt, val) - note reversed arg order
17099 if let Expression::Literal(lit) = fmt_expr {
17100 if let crate::expressions::Literal::String(s) =
17101 lit.as_ref()
17102 {
17103 let bq_fmt = duckdb_to_bigquery_format(s);
17104 Ok(Expression::Function(Box::new(Function::new(
17105 "PARSE_TIMESTAMP".to_string(),
17106 vec![Expression::string(&bq_fmt), val],
17107 ))))
17108 } else {
17109 Ok(Expression::Function(Box::new(Function::new(
17110 "PARSE_TIMESTAMP".to_string(),
17111 vec![fmt_expr.clone(), val],
17112 ))))
17113 }
17114 } else {
17115 Ok(Expression::Function(Box::new(Function::new(
17116 "PARSE_TIMESTAMP".to_string(),
17117 vec![fmt_expr.clone(), val],
17118 ))))
17119 }
17120 }
17121 _ => Ok(Expression::Function(f)),
17122 }
17123 }
17124 // DATE_FORMAT(val, fmt) from Presto source (C-style format) -> target-specific
17125 "DATE_FORMAT"
17126 if f.args.len() >= 2
17127 && matches!(
17128 source,
17129 DialectType::Presto
17130 | DialectType::Trino
17131 | DialectType::Athena
17132 ) =>
17133 {
17134 let val = f.args[0].clone();
17135 let fmt_expr = &f.args[1];
17136
17137 match target {
17138 DialectType::Presto
17139 | DialectType::Trino
17140 | DialectType::Athena => {
17141 // Presto -> Presto: normalize format (e.g., %H:%i:%S -> %T)
17142 if let Expression::Literal(lit) = fmt_expr {
17143 if let crate::expressions::Literal::String(s) =
17144 lit.as_ref()
17145 {
17146 let normalized = normalize_presto_format(s);
17147 Ok(Expression::Function(Box::new(Function::new(
17148 "DATE_FORMAT".to_string(),
17149 vec![val, Expression::string(&normalized)],
17150 ))))
17151 } else {
17152 Ok(Expression::Function(f))
17153 }
17154 } else {
17155 Ok(Expression::Function(f))
17156 }
17157 }
17158 DialectType::Hive
17159 | DialectType::Spark
17160 | DialectType::Databricks => {
17161 // Convert Presto C-style to Java-style format
17162 if let Expression::Literal(lit) = fmt_expr {
17163 if let crate::expressions::Literal::String(s) =
17164 lit.as_ref()
17165 {
17166 let java_fmt = presto_to_java_format(s);
17167 Ok(Expression::Function(Box::new(Function::new(
17168 "DATE_FORMAT".to_string(),
17169 vec![val, Expression::string(&java_fmt)],
17170 ))))
17171 } else {
17172 Ok(Expression::Function(f))
17173 }
17174 } else {
17175 Ok(Expression::Function(f))
17176 }
17177 }
17178 DialectType::DuckDB => {
17179 // Convert to STRFTIME(val, duckdb_fmt)
17180 if let Expression::Literal(lit) = fmt_expr {
17181 if let crate::expressions::Literal::String(s) =
17182 lit.as_ref()
17183 {
17184 let duckdb_fmt = presto_to_duckdb_format(s);
17185 Ok(Expression::Function(Box::new(Function::new(
17186 "STRFTIME".to_string(),
17187 vec![val, Expression::string(&duckdb_fmt)],
17188 ))))
17189 } else {
17190 Ok(Expression::Function(Box::new(Function::new(
17191 "STRFTIME".to_string(),
17192 vec![val, fmt_expr.clone()],
17193 ))))
17194 }
17195 } else {
17196 Ok(Expression::Function(Box::new(Function::new(
17197 "STRFTIME".to_string(),
17198 vec![val, fmt_expr.clone()],
17199 ))))
17200 }
17201 }
17202 DialectType::BigQuery => {
17203 // Convert to FORMAT_DATE(bq_fmt, val) - reversed args
17204 if let Expression::Literal(lit) = fmt_expr {
17205 if let crate::expressions::Literal::String(s) =
17206 lit.as_ref()
17207 {
17208 let bq_fmt = presto_to_bigquery_format(s);
17209 Ok(Expression::Function(Box::new(Function::new(
17210 "FORMAT_DATE".to_string(),
17211 vec![Expression::string(&bq_fmt), val],
17212 ))))
17213 } else {
17214 Ok(Expression::Function(Box::new(Function::new(
17215 "FORMAT_DATE".to_string(),
17216 vec![fmt_expr.clone(), val],
17217 ))))
17218 }
17219 } else {
17220 Ok(Expression::Function(Box::new(Function::new(
17221 "FORMAT_DATE".to_string(),
17222 vec![fmt_expr.clone(), val],
17223 ))))
17224 }
17225 }
17226 _ => Ok(Expression::Function(f)),
17227 }
17228 }
17229 // DATE_PARSE(val, fmt) from Presto source -> target-specific parse function
17230 "DATE_PARSE"
17231 if f.args.len() >= 2
17232 && matches!(
17233 source,
17234 DialectType::Presto
17235 | DialectType::Trino
17236 | DialectType::Athena
17237 ) =>
17238 {
17239 let val = f.args[0].clone();
17240 let fmt_expr = &f.args[1];
17241
17242 match target {
17243 DialectType::Presto
17244 | DialectType::Trino
17245 | DialectType::Athena => {
17246 // Presto -> Presto: normalize format
17247 if let Expression::Literal(lit) = fmt_expr {
17248 if let crate::expressions::Literal::String(s) =
17249 lit.as_ref()
17250 {
17251 let normalized = normalize_presto_format(s);
17252 Ok(Expression::Function(Box::new(Function::new(
17253 "DATE_PARSE".to_string(),
17254 vec![val, Expression::string(&normalized)],
17255 ))))
17256 } else {
17257 Ok(Expression::Function(f))
17258 }
17259 } else {
17260 Ok(Expression::Function(f))
17261 }
17262 }
17263 DialectType::Hive => {
17264 // Presto -> Hive: if default format, just CAST(x AS TIMESTAMP)
17265 if let Expression::Literal(lit) = fmt_expr {
17266 if let crate::expressions::Literal::String(s) =
17267 lit.as_ref()
17268 {
17269 if is_default_presto_timestamp_format(s)
17270 || is_default_presto_date_format(s)
17271 {
17272 Ok(Expression::Cast(Box::new(
17273 crate::expressions::Cast {
17274 this: val,
17275 to: DataType::Timestamp {
17276 timezone: false,
17277 precision: None,
17278 },
17279 trailing_comments: Vec::new(),
17280 double_colon_syntax: false,
17281 format: None,
17282 default: None,
17283 inferred_type: None,
17284 },
17285 )))
17286 } else {
17287 let java_fmt = presto_to_java_format(s);
17288 Ok(Expression::Function(Box::new(
17289 Function::new(
17290 "TO_TIMESTAMP".to_string(),
17291 vec![
17292 val,
17293 Expression::string(&java_fmt),
17294 ],
17295 ),
17296 )))
17297 }
17298 } else {
17299 Ok(Expression::Function(f))
17300 }
17301 } else {
17302 Ok(Expression::Function(f))
17303 }
17304 }
17305 DialectType::Spark | DialectType::Databricks => {
17306 // Presto -> Spark: TO_TIMESTAMP(val, java_fmt)
17307 if let Expression::Literal(lit) = fmt_expr {
17308 if let crate::expressions::Literal::String(s) =
17309 lit.as_ref()
17310 {
17311 let java_fmt = presto_to_java_format(s);
17312 Ok(Expression::Function(Box::new(Function::new(
17313 "TO_TIMESTAMP".to_string(),
17314 vec![val, Expression::string(&java_fmt)],
17315 ))))
17316 } else {
17317 Ok(Expression::Function(f))
17318 }
17319 } else {
17320 Ok(Expression::Function(f))
17321 }
17322 }
17323 DialectType::DuckDB => {
17324 // Presto -> DuckDB: STRPTIME(val, duckdb_fmt)
17325 if let Expression::Literal(lit) = fmt_expr {
17326 if let crate::expressions::Literal::String(s) =
17327 lit.as_ref()
17328 {
17329 let duckdb_fmt = presto_to_duckdb_format(s);
17330 Ok(Expression::Function(Box::new(Function::new(
17331 "STRPTIME".to_string(),
17332 vec![val, Expression::string(&duckdb_fmt)],
17333 ))))
17334 } else {
17335 Ok(Expression::Function(Box::new(Function::new(
17336 "STRPTIME".to_string(),
17337 vec![val, fmt_expr.clone()],
17338 ))))
17339 }
17340 } else {
17341 Ok(Expression::Function(Box::new(Function::new(
17342 "STRPTIME".to_string(),
17343 vec![val, fmt_expr.clone()],
17344 ))))
17345 }
17346 }
17347 _ => Ok(Expression::Function(f)),
17348 }
17349 }
17350 // FROM_BASE64(x) / TO_BASE64(x) from Presto -> Hive-specific renames
17351 "FROM_BASE64"
17352 if f.args.len() == 1 && matches!(target, DialectType::Hive) =>
17353 {
17354 Ok(Expression::Function(Box::new(Function::new(
17355 "UNBASE64".to_string(),
17356 f.args,
17357 ))))
17358 }
17359 "TO_BASE64"
17360 if f.args.len() == 1 && matches!(target, DialectType::Hive) =>
17361 {
17362 Ok(Expression::Function(Box::new(Function::new(
17363 "BASE64".to_string(),
17364 f.args,
17365 ))))
17366 }
17367 // FROM_UNIXTIME(x) -> CAST(FROM_UNIXTIME(x) AS TIMESTAMP) for Spark
17368 "FROM_UNIXTIME"
17369 if f.args.len() == 1
17370 && matches!(
17371 source,
17372 DialectType::Presto
17373 | DialectType::Trino
17374 | DialectType::Athena
17375 )
17376 && matches!(
17377 target,
17378 DialectType::Spark | DialectType::Databricks
17379 ) =>
17380 {
17381 // Wrap FROM_UNIXTIME(x) in CAST(... AS TIMESTAMP)
17382 let from_unix = Expression::Function(Box::new(Function::new(
17383 "FROM_UNIXTIME".to_string(),
17384 f.args,
17385 )));
17386 Ok(Expression::Cast(Box::new(crate::expressions::Cast {
17387 this: from_unix,
17388 to: DataType::Timestamp {
17389 timezone: false,
17390 precision: None,
17391 },
17392 trailing_comments: Vec::new(),
17393 double_colon_syntax: false,
17394 format: None,
17395 default: None,
17396 inferred_type: None,
17397 })))
17398 }
17399 // DATE_FORMAT(val, fmt) from Hive/Spark/MySQL -> target-specific format function
17400 "DATE_FORMAT"
17401 if f.args.len() >= 2
17402 && !matches!(
17403 target,
17404 DialectType::Hive
17405 | DialectType::Spark
17406 | DialectType::Databricks
17407 | DialectType::MySQL
17408 | DialectType::SingleStore
17409 ) =>
17410 {
17411 let val = f.args[0].clone();
17412 let fmt_expr = &f.args[1];
17413 let is_hive_source = matches!(
17414 source,
17415 DialectType::Hive
17416 | DialectType::Spark
17417 | DialectType::Databricks
17418 );
17419
17420 fn java_to_c_format(fmt: &str) -> String {
17421 // Replace Java patterns with C strftime patterns.
17422 // Uses multi-pass to handle patterns that conflict.
17423 // First pass: replace multi-char patterns (longer first)
17424 let result = fmt
17425 .replace("yyyy", "%Y")
17426 .replace("SSSSSS", "%f")
17427 .replace("EEEE", "%W")
17428 .replace("MM", "%m")
17429 .replace("dd", "%d")
17430 .replace("HH", "%H")
17431 .replace("mm", "%M")
17432 .replace("ss", "%S")
17433 .replace("yy", "%y");
17434 // Second pass: handle single-char timezone patterns
17435 // z -> %Z (timezone name), Z -> %z (timezone offset)
17436 // Must be careful not to replace 'z'/'Z' inside already-replaced %Y, %M etc.
17437 let mut out = String::new();
17438 let chars: Vec<char> = result.chars().collect();
17439 let mut i = 0;
17440 while i < chars.len() {
17441 if chars[i] == '%' && i + 1 < chars.len() {
17442 // Already a format specifier, skip both chars
17443 out.push(chars[i]);
17444 out.push(chars[i + 1]);
17445 i += 2;
17446 } else if chars[i] == 'z' {
17447 out.push_str("%Z");
17448 i += 1;
17449 } else if chars[i] == 'Z' {
17450 out.push_str("%z");
17451 i += 1;
17452 } else {
17453 out.push(chars[i]);
17454 i += 1;
17455 }
17456 }
17457 out
17458 }
17459
17460 fn java_to_presto_format(fmt: &str) -> String {
17461 // Presto uses %T for HH:MM:SS
17462 let c_fmt = java_to_c_format(fmt);
17463 c_fmt.replace("%H:%M:%S", "%T")
17464 }
17465
17466 fn java_to_bq_format(fmt: &str) -> String {
17467 // BigQuery uses %F for yyyy-MM-dd and %T for HH:mm:ss
17468 let c_fmt = java_to_c_format(fmt);
17469 c_fmt.replace("%Y-%m-%d", "%F").replace("%H:%M:%S", "%T")
17470 }
17471
17472 // For Hive source, CAST string literals to appropriate type
17473 let cast_val = if is_hive_source {
17474 match &val {
17475 Expression::Literal(lit)
17476 if matches!(
17477 lit.as_ref(),
17478 crate::expressions::Literal::String(_)
17479 ) =>
17480 {
17481 match target {
17482 DialectType::DuckDB
17483 | DialectType::Presto
17484 | DialectType::Trino
17485 | DialectType::Athena => {
17486 Self::ensure_cast_timestamp(val.clone())
17487 }
17488 DialectType::BigQuery => {
17489 // BigQuery: CAST(val AS DATETIME)
17490 Expression::Cast(Box::new(
17491 crate::expressions::Cast {
17492 this: val.clone(),
17493 to: DataType::Custom {
17494 name: "DATETIME".to_string(),
17495 },
17496 trailing_comments: vec![],
17497 double_colon_syntax: false,
17498 format: None,
17499 default: None,
17500 inferred_type: None,
17501 },
17502 ))
17503 }
17504 _ => val.clone(),
17505 }
17506 }
17507 // For CAST(x AS DATE) or DATE literal, Presto needs CAST(CAST(x AS DATE) AS TIMESTAMP)
17508 Expression::Cast(c)
17509 if matches!(c.to, DataType::Date)
17510 && matches!(
17511 target,
17512 DialectType::Presto
17513 | DialectType::Trino
17514 | DialectType::Athena
17515 ) =>
17516 {
17517 Expression::Cast(Box::new(crate::expressions::Cast {
17518 this: val.clone(),
17519 to: DataType::Timestamp {
17520 timezone: false,
17521 precision: None,
17522 },
17523 trailing_comments: vec![],
17524 double_colon_syntax: false,
17525 format: None,
17526 default: None,
17527 inferred_type: None,
17528 }))
17529 }
17530 Expression::Literal(lit)
17531 if matches!(
17532 lit.as_ref(),
17533 crate::expressions::Literal::Date(_)
17534 ) && matches!(
17535 target,
17536 DialectType::Presto
17537 | DialectType::Trino
17538 | DialectType::Athena
17539 ) =>
17540 {
17541 // DATE 'x' -> CAST(CAST('x' AS DATE) AS TIMESTAMP)
17542 let cast_date = Self::date_literal_to_cast(val.clone());
17543 Expression::Cast(Box::new(crate::expressions::Cast {
17544 this: cast_date,
17545 to: DataType::Timestamp {
17546 timezone: false,
17547 precision: None,
17548 },
17549 trailing_comments: vec![],
17550 double_colon_syntax: false,
17551 format: None,
17552 default: None,
17553 inferred_type: None,
17554 }))
17555 }
17556 _ => val.clone(),
17557 }
17558 } else {
17559 val.clone()
17560 };
17561
17562 match target {
17563 DialectType::DuckDB => {
17564 if let Expression::Literal(lit) = fmt_expr {
17565 if let crate::expressions::Literal::String(s) =
17566 lit.as_ref()
17567 {
17568 let c_fmt = if is_hive_source {
17569 java_to_c_format(s)
17570 } else {
17571 s.clone()
17572 };
17573 Ok(Expression::Function(Box::new(Function::new(
17574 "STRFTIME".to_string(),
17575 vec![cast_val, Expression::string(&c_fmt)],
17576 ))))
17577 } else {
17578 Ok(Expression::Function(Box::new(Function::new(
17579 "STRFTIME".to_string(),
17580 vec![cast_val, fmt_expr.clone()],
17581 ))))
17582 }
17583 } else {
17584 Ok(Expression::Function(Box::new(Function::new(
17585 "STRFTIME".to_string(),
17586 vec![cast_val, fmt_expr.clone()],
17587 ))))
17588 }
17589 }
17590 DialectType::Presto
17591 | DialectType::Trino
17592 | DialectType::Athena => {
17593 if is_hive_source {
17594 if let Expression::Literal(lit) = fmt_expr {
17595 if let crate::expressions::Literal::String(s) =
17596 lit.as_ref()
17597 {
17598 let p_fmt = java_to_presto_format(s);
17599 Ok(Expression::Function(Box::new(
17600 Function::new(
17601 "DATE_FORMAT".to_string(),
17602 vec![
17603 cast_val,
17604 Expression::string(&p_fmt),
17605 ],
17606 ),
17607 )))
17608 } else {
17609 Ok(Expression::Function(Box::new(
17610 Function::new(
17611 "DATE_FORMAT".to_string(),
17612 vec![cast_val, fmt_expr.clone()],
17613 ),
17614 )))
17615 }
17616 } else {
17617 Ok(Expression::Function(Box::new(Function::new(
17618 "DATE_FORMAT".to_string(),
17619 vec![cast_val, fmt_expr.clone()],
17620 ))))
17621 }
17622 } else {
17623 Ok(Expression::Function(Box::new(Function::new(
17624 "DATE_FORMAT".to_string(),
17625 f.args,
17626 ))))
17627 }
17628 }
17629 DialectType::BigQuery => {
17630 // DATE_FORMAT(val, fmt) -> FORMAT_DATE(fmt, val)
17631 if let Expression::Literal(lit) = fmt_expr {
17632 if let crate::expressions::Literal::String(s) =
17633 lit.as_ref()
17634 {
17635 let bq_fmt = if is_hive_source {
17636 java_to_bq_format(s)
17637 } else {
17638 java_to_c_format(s)
17639 };
17640 Ok(Expression::Function(Box::new(Function::new(
17641 "FORMAT_DATE".to_string(),
17642 vec![Expression::string(&bq_fmt), cast_val],
17643 ))))
17644 } else {
17645 Ok(Expression::Function(Box::new(Function::new(
17646 "FORMAT_DATE".to_string(),
17647 vec![fmt_expr.clone(), cast_val],
17648 ))))
17649 }
17650 } else {
17651 Ok(Expression::Function(Box::new(Function::new(
17652 "FORMAT_DATE".to_string(),
17653 vec![fmt_expr.clone(), cast_val],
17654 ))))
17655 }
17656 }
17657 DialectType::PostgreSQL | DialectType::Redshift => {
17658 if let Expression::Literal(lit) = fmt_expr {
17659 if let crate::expressions::Literal::String(s) =
17660 lit.as_ref()
17661 {
17662 let pg_fmt = s
17663 .replace("yyyy", "YYYY")
17664 .replace("MM", "MM")
17665 .replace("dd", "DD")
17666 .replace("HH", "HH24")
17667 .replace("mm", "MI")
17668 .replace("ss", "SS")
17669 .replace("yy", "YY");
17670 Ok(Expression::Function(Box::new(Function::new(
17671 "TO_CHAR".to_string(),
17672 vec![val, Expression::string(&pg_fmt)],
17673 ))))
17674 } else {
17675 Ok(Expression::Function(Box::new(Function::new(
17676 "TO_CHAR".to_string(),
17677 vec![val, fmt_expr.clone()],
17678 ))))
17679 }
17680 } else {
17681 Ok(Expression::Function(Box::new(Function::new(
17682 "TO_CHAR".to_string(),
17683 vec![val, fmt_expr.clone()],
17684 ))))
17685 }
17686 }
17687 _ => Ok(Expression::Function(f)),
17688 }
17689 }
17690 // DATEDIFF(unit, start, end) - 3-arg form
17691 // SQLite uses DATEDIFF(date1, date2, unit_string) instead
17692 "DATEDIFF" if f.args.len() == 3 => {
17693 let mut args = f.args;
17694 // SQLite source: args = (date1, date2, unit_string)
17695 // Standard source: args = (unit, start, end)
17696 let (_arg0, arg1, arg2, unit_str) =
17697 if matches!(source, DialectType::SQLite) {
17698 let date1 = args.remove(0);
17699 let date2 = args.remove(0);
17700 let unit_expr = args.remove(0);
17701 let unit_s = Self::get_unit_str_static(&unit_expr);
17702
17703 // For SQLite target, generate JULIANDAY arithmetic directly
17704 if matches!(target, DialectType::SQLite) {
17705 let jd_first = Expression::Function(Box::new(
17706 Function::new("JULIANDAY".to_string(), vec![date1]),
17707 ));
17708 let jd_second = Expression::Function(Box::new(
17709 Function::new("JULIANDAY".to_string(), vec![date2]),
17710 ));
17711 let diff = Expression::Sub(Box::new(
17712 crate::expressions::BinaryOp::new(
17713 jd_first, jd_second,
17714 ),
17715 ));
17716 let paren_diff = Expression::Paren(Box::new(
17717 crate::expressions::Paren {
17718 this: diff,
17719 trailing_comments: Vec::new(),
17720 },
17721 ));
17722 let adjusted = match unit_s.as_str() {
17723 "HOUR" => Expression::Mul(Box::new(
17724 crate::expressions::BinaryOp::new(
17725 paren_diff,
17726 Expression::Literal(Box::new(
17727 Literal::Number("24.0".to_string()),
17728 )),
17729 ),
17730 )),
17731 "MINUTE" => Expression::Mul(Box::new(
17732 crate::expressions::BinaryOp::new(
17733 paren_diff,
17734 Expression::Literal(Box::new(
17735 Literal::Number("1440.0".to_string()),
17736 )),
17737 ),
17738 )),
17739 "SECOND" => Expression::Mul(Box::new(
17740 crate::expressions::BinaryOp::new(
17741 paren_diff,
17742 Expression::Literal(Box::new(
17743 Literal::Number("86400.0".to_string()),
17744 )),
17745 ),
17746 )),
17747 "MONTH" => Expression::Div(Box::new(
17748 crate::expressions::BinaryOp::new(
17749 paren_diff,
17750 Expression::Literal(Box::new(
17751 Literal::Number("30.0".to_string()),
17752 )),
17753 ),
17754 )),
17755 "YEAR" => Expression::Div(Box::new(
17756 crate::expressions::BinaryOp::new(
17757 paren_diff,
17758 Expression::Literal(Box::new(
17759 Literal::Number("365.0".to_string()),
17760 )),
17761 ),
17762 )),
17763 _ => paren_diff,
17764 };
17765 return Ok(Expression::Cast(Box::new(Cast {
17766 this: adjusted,
17767 to: DataType::Int {
17768 length: None,
17769 integer_spelling: true,
17770 },
17771 trailing_comments: vec![],
17772 double_colon_syntax: false,
17773 format: None,
17774 default: None,
17775 inferred_type: None,
17776 })));
17777 }
17778
17779 // For other targets, remap to standard (unit, start, end) form
17780 let unit_ident =
17781 Expression::Identifier(Identifier::new(&unit_s));
17782 (unit_ident, date1, date2, unit_s)
17783 } else {
17784 let arg0 = args.remove(0);
17785 let arg1 = args.remove(0);
17786 let arg2 = args.remove(0);
17787 let unit_s = Self::get_unit_str_static(&arg0);
17788 (arg0, arg1, arg2, unit_s)
17789 };
17790
17791 // For Hive/Spark source, string literal dates need to be cast
17792 // Note: Databricks is excluded - it handles string args like standard SQL
17793 let is_hive_spark =
17794 matches!(source, DialectType::Hive | DialectType::Spark);
17795
17796 match target {
17797 DialectType::Snowflake => {
17798 let unit =
17799 Expression::Identifier(Identifier::new(&unit_str));
17800 // Use ensure_to_date_preserved to add TO_DATE with a marker
17801 // that prevents the Snowflake TO_DATE handler from converting it to CAST
17802 let d1 = if is_hive_spark {
17803 Self::ensure_to_date_preserved(arg1)
17804 } else {
17805 arg1
17806 };
17807 let d2 = if is_hive_spark {
17808 Self::ensure_to_date_preserved(arg2)
17809 } else {
17810 arg2
17811 };
17812 Ok(Expression::Function(Box::new(Function::new(
17813 "DATEDIFF".to_string(),
17814 vec![unit, d1, d2],
17815 ))))
17816 }
17817 DialectType::Redshift => {
17818 let unit =
17819 Expression::Identifier(Identifier::new(&unit_str));
17820 let d1 = if is_hive_spark {
17821 Self::ensure_cast_date(arg1)
17822 } else {
17823 arg1
17824 };
17825 let d2 = if is_hive_spark {
17826 Self::ensure_cast_date(arg2)
17827 } else {
17828 arg2
17829 };
17830 Ok(Expression::Function(Box::new(Function::new(
17831 "DATEDIFF".to_string(),
17832 vec![unit, d1, d2],
17833 ))))
17834 }
17835 DialectType::TSQL => {
17836 let unit =
17837 Expression::Identifier(Identifier::new(&unit_str));
17838 Ok(Expression::Function(Box::new(Function::new(
17839 "DATEDIFF".to_string(),
17840 vec![unit, arg1, arg2],
17841 ))))
17842 }
17843 DialectType::DuckDB => {
17844 let is_redshift_tsql = matches!(
17845 source,
17846 DialectType::Redshift | DialectType::TSQL
17847 );
17848 if is_hive_spark {
17849 // For Hive/Spark source, CAST string args to DATE and emit DATE_DIFF directly
17850 let d1 = Self::ensure_cast_date(arg1);
17851 let d2 = Self::ensure_cast_date(arg2);
17852 Ok(Expression::Function(Box::new(Function::new(
17853 "DATE_DIFF".to_string(),
17854 vec![Expression::string(&unit_str), d1, d2],
17855 ))))
17856 } else if matches!(source, DialectType::Snowflake) {
17857 // For Snowflake source: special handling per unit
17858 match unit_str.as_str() {
17859 "NANOSECOND" => {
17860 // DATEDIFF(NANOSECOND, start, end) -> EPOCH_NS(CAST(end AS TIMESTAMP_NS)) - EPOCH_NS(CAST(start AS TIMESTAMP_NS))
17861 fn cast_to_timestamp_ns(
17862 expr: Expression,
17863 ) -> Expression
17864 {
17865 Expression::Cast(Box::new(Cast {
17866 this: expr,
17867 to: DataType::Custom {
17868 name: "TIMESTAMP_NS".to_string(),
17869 },
17870 trailing_comments: vec![],
17871 double_colon_syntax: false,
17872 format: None,
17873 default: None,
17874 inferred_type: None,
17875 }))
17876 }
17877 let epoch_end = Expression::Function(Box::new(
17878 Function::new(
17879 "EPOCH_NS".to_string(),
17880 vec![cast_to_timestamp_ns(arg2)],
17881 ),
17882 ));
17883 let epoch_start = Expression::Function(
17884 Box::new(Function::new(
17885 "EPOCH_NS".to_string(),
17886 vec![cast_to_timestamp_ns(arg1)],
17887 )),
17888 );
17889 Ok(Expression::Sub(Box::new(BinaryOp::new(
17890 epoch_end,
17891 epoch_start,
17892 ))))
17893 }
17894 "WEEK" => {
17895 // DATE_DIFF('WEEK', DATE_TRUNC('WEEK', CAST(x AS DATE)), DATE_TRUNC('WEEK', CAST(y AS DATE)))
17896 let d1 = Self::force_cast_date(arg1);
17897 let d2 = Self::force_cast_date(arg2);
17898 let dt1 = Expression::Function(Box::new(
17899 Function::new(
17900 "DATE_TRUNC".to_string(),
17901 vec![Expression::string("WEEK"), d1],
17902 ),
17903 ));
17904 let dt2 = Expression::Function(Box::new(
17905 Function::new(
17906 "DATE_TRUNC".to_string(),
17907 vec![Expression::string("WEEK"), d2],
17908 ),
17909 ));
17910 Ok(Expression::Function(Box::new(
17911 Function::new(
17912 "DATE_DIFF".to_string(),
17913 vec![
17914 Expression::string(&unit_str),
17915 dt1,
17916 dt2,
17917 ],
17918 ),
17919 )))
17920 }
17921 _ => {
17922 // YEAR, MONTH, QUARTER, DAY, etc.: CAST to DATE
17923 let d1 = Self::force_cast_date(arg1);
17924 let d2 = Self::force_cast_date(arg2);
17925 Ok(Expression::Function(Box::new(
17926 Function::new(
17927 "DATE_DIFF".to_string(),
17928 vec![
17929 Expression::string(&unit_str),
17930 d1,
17931 d2,
17932 ],
17933 ),
17934 )))
17935 }
17936 }
17937 } else if is_redshift_tsql {
17938 // For Redshift/TSQL source, CAST args to TIMESTAMP (always)
17939 let d1 = Self::force_cast_timestamp(arg1);
17940 let d2 = Self::force_cast_timestamp(arg2);
17941 Ok(Expression::Function(Box::new(Function::new(
17942 "DATE_DIFF".to_string(),
17943 vec![Expression::string(&unit_str), d1, d2],
17944 ))))
17945 } else {
17946 // Keep as DATEDIFF so DuckDB's transform_datediff handles
17947 // DATE_TRUNC for WEEK, CAST for string literals, etc.
17948 let unit =
17949 Expression::Identifier(Identifier::new(&unit_str));
17950 Ok(Expression::Function(Box::new(Function::new(
17951 "DATEDIFF".to_string(),
17952 vec![unit, arg1, arg2],
17953 ))))
17954 }
17955 }
17956 DialectType::BigQuery => {
17957 let is_redshift_tsql = matches!(
17958 source,
17959 DialectType::Redshift
17960 | DialectType::TSQL
17961 | DialectType::Snowflake
17962 );
17963 let cast_d1 = if is_hive_spark {
17964 Self::ensure_cast_date(arg1)
17965 } else if is_redshift_tsql {
17966 Self::force_cast_datetime(arg1)
17967 } else {
17968 Self::ensure_cast_datetime(arg1)
17969 };
17970 let cast_d2 = if is_hive_spark {
17971 Self::ensure_cast_date(arg2)
17972 } else if is_redshift_tsql {
17973 Self::force_cast_datetime(arg2)
17974 } else {
17975 Self::ensure_cast_datetime(arg2)
17976 };
17977 let unit =
17978 Expression::Identifier(Identifier::new(&unit_str));
17979 Ok(Expression::Function(Box::new(Function::new(
17980 "DATE_DIFF".to_string(),
17981 vec![cast_d2, cast_d1, unit],
17982 ))))
17983 }
17984 DialectType::Presto
17985 | DialectType::Trino
17986 | DialectType::Athena => {
17987 // For Hive/Spark source, string literals need double-cast: CAST(CAST(x AS TIMESTAMP) AS DATE)
17988 // For Redshift/TSQL source, args need CAST to TIMESTAMP (always)
17989 let is_redshift_tsql = matches!(
17990 source,
17991 DialectType::Redshift
17992 | DialectType::TSQL
17993 | DialectType::Snowflake
17994 );
17995 let d1 = if is_hive_spark {
17996 Self::double_cast_timestamp_date(arg1)
17997 } else if is_redshift_tsql {
17998 Self::force_cast_timestamp(arg1)
17999 } else {
18000 arg1
18001 };
18002 let d2 = if is_hive_spark {
18003 Self::double_cast_timestamp_date(arg2)
18004 } else if is_redshift_tsql {
18005 Self::force_cast_timestamp(arg2)
18006 } else {
18007 arg2
18008 };
18009 Ok(Expression::Function(Box::new(Function::new(
18010 "DATE_DIFF".to_string(),
18011 vec![Expression::string(&unit_str), d1, d2],
18012 ))))
18013 }
18014 DialectType::Hive => match unit_str.as_str() {
18015 "MONTH" => Ok(Expression::Cast(Box::new(Cast {
18016 this: Expression::Function(Box::new(Function::new(
18017 "MONTHS_BETWEEN".to_string(),
18018 vec![arg2, arg1],
18019 ))),
18020 to: DataType::Int {
18021 length: None,
18022 integer_spelling: false,
18023 },
18024 trailing_comments: vec![],
18025 double_colon_syntax: false,
18026 format: None,
18027 default: None,
18028 inferred_type: None,
18029 }))),
18030 "WEEK" => Ok(Expression::Cast(Box::new(Cast {
18031 this: Expression::Div(Box::new(
18032 crate::expressions::BinaryOp::new(
18033 Expression::Function(Box::new(Function::new(
18034 "DATEDIFF".to_string(),
18035 vec![arg2, arg1],
18036 ))),
18037 Expression::number(7),
18038 ),
18039 )),
18040 to: DataType::Int {
18041 length: None,
18042 integer_spelling: false,
18043 },
18044 trailing_comments: vec![],
18045 double_colon_syntax: false,
18046 format: None,
18047 default: None,
18048 inferred_type: None,
18049 }))),
18050 _ => Ok(Expression::Function(Box::new(Function::new(
18051 "DATEDIFF".to_string(),
18052 vec![arg2, arg1],
18053 )))),
18054 },
18055 DialectType::Spark | DialectType::Databricks => {
18056 let unit =
18057 Expression::Identifier(Identifier::new(&unit_str));
18058 Ok(Expression::Function(Box::new(Function::new(
18059 "DATEDIFF".to_string(),
18060 vec![unit, arg1, arg2],
18061 ))))
18062 }
18063 _ => {
18064 // For Hive/Spark source targeting PostgreSQL etc., cast string literals to DATE
18065 let d1 = if is_hive_spark {
18066 Self::ensure_cast_date(arg1)
18067 } else {
18068 arg1
18069 };
18070 let d2 = if is_hive_spark {
18071 Self::ensure_cast_date(arg2)
18072 } else {
18073 arg2
18074 };
18075 let unit =
18076 Expression::Identifier(Identifier::new(&unit_str));
18077 Ok(Expression::Function(Box::new(Function::new(
18078 "DATEDIFF".to_string(),
18079 vec![unit, d1, d2],
18080 ))))
18081 }
18082 }
18083 }
18084 // DATEDIFF(end, start) - 2-arg form from Hive/MySQL
18085 "DATEDIFF" if f.args.len() == 2 => {
18086 let mut args = f.args;
18087 let arg0 = args.remove(0);
18088 let arg1 = args.remove(0);
18089
18090 // Helper: unwrap TO_DATE(x) -> x (extracts inner arg)
18091 // Also recognizes TryCast/Cast to DATE that may have been produced by
18092 // cross-dialect TO_DATE -> TRY_CAST conversion
18093 let unwrap_to_date = |e: Expression| -> (Expression, bool) {
18094 if let Expression::Function(ref f) = e {
18095 if f.name.eq_ignore_ascii_case("TO_DATE")
18096 && f.args.len() == 1
18097 {
18098 return (f.args[0].clone(), true);
18099 }
18100 }
18101 // Also recognize TryCast(x, Date) as an already-converted TO_DATE
18102 if let Expression::TryCast(ref c) = e {
18103 if matches!(c.to, DataType::Date) {
18104 return (e, true); // Already properly cast, return as-is
18105 }
18106 }
18107 (e, false)
18108 };
18109
18110 match target {
18111 DialectType::DuckDB => {
18112 // For Hive source, always CAST to DATE
18113 // If arg is TO_DATE(x) or TRY_CAST(x AS DATE), use it directly
18114 let cast_d0 = if matches!(
18115 source,
18116 DialectType::Hive
18117 | DialectType::Spark
18118 | DialectType::Databricks
18119 ) {
18120 let (inner, was_to_date) = unwrap_to_date(arg1);
18121 if was_to_date {
18122 // Already a date expression, use directly
18123 if matches!(&inner, Expression::TryCast(_)) {
18124 inner // Already TRY_CAST(x AS DATE)
18125 } else {
18126 Self::try_cast_date(inner)
18127 }
18128 } else {
18129 Self::force_cast_date(inner)
18130 }
18131 } else {
18132 Self::ensure_cast_date(arg1)
18133 };
18134 let cast_d1 = if matches!(
18135 source,
18136 DialectType::Hive
18137 | DialectType::Spark
18138 | DialectType::Databricks
18139 ) {
18140 let (inner, was_to_date) = unwrap_to_date(arg0);
18141 if was_to_date {
18142 if matches!(&inner, Expression::TryCast(_)) {
18143 inner
18144 } else {
18145 Self::try_cast_date(inner)
18146 }
18147 } else {
18148 Self::force_cast_date(inner)
18149 }
18150 } else {
18151 Self::ensure_cast_date(arg0)
18152 };
18153 Ok(Expression::Function(Box::new(Function::new(
18154 "DATE_DIFF".to_string(),
18155 vec![Expression::string("DAY"), cast_d0, cast_d1],
18156 ))))
18157 }
18158 DialectType::Presto
18159 | DialectType::Trino
18160 | DialectType::Athena => {
18161 // For Hive/Spark source, apply double_cast_timestamp_date
18162 // For other sources (MySQL etc.), just swap args without casting
18163 if matches!(
18164 source,
18165 DialectType::Hive
18166 | DialectType::Spark
18167 | DialectType::Databricks
18168 ) {
18169 let cast_fn = |e: Expression| -> Expression {
18170 let (inner, was_to_date) = unwrap_to_date(e);
18171 if was_to_date {
18172 let first_cast =
18173 Self::double_cast_timestamp_date(inner);
18174 Self::double_cast_timestamp_date(first_cast)
18175 } else {
18176 Self::double_cast_timestamp_date(inner)
18177 }
18178 };
18179 Ok(Expression::Function(Box::new(Function::new(
18180 "DATE_DIFF".to_string(),
18181 vec![
18182 Expression::string("DAY"),
18183 cast_fn(arg1),
18184 cast_fn(arg0),
18185 ],
18186 ))))
18187 } else {
18188 Ok(Expression::Function(Box::new(Function::new(
18189 "DATE_DIFF".to_string(),
18190 vec![Expression::string("DAY"), arg1, arg0],
18191 ))))
18192 }
18193 }
18194 DialectType::Redshift => {
18195 let unit = Expression::Identifier(Identifier::new("DAY"));
18196 Ok(Expression::Function(Box::new(Function::new(
18197 "DATEDIFF".to_string(),
18198 vec![unit, arg1, arg0],
18199 ))))
18200 }
18201 _ => Ok(Expression::Function(Box::new(Function::new(
18202 "DATEDIFF".to_string(),
18203 vec![arg0, arg1],
18204 )))),
18205 }
18206 }
18207 // DATE_DIFF(unit, start, end) - 3-arg with string unit (ClickHouse/DuckDB style)
18208 "DATE_DIFF" if f.args.len() == 3 => {
18209 let mut args = f.args;
18210 let arg0 = args.remove(0);
18211 let arg1 = args.remove(0);
18212 let arg2 = args.remove(0);
18213 let unit_str = Self::get_unit_str_static(&arg0);
18214
18215 match target {
18216 DialectType::DuckDB => {
18217 // DuckDB: DATE_DIFF('UNIT', start, end)
18218 Ok(Expression::Function(Box::new(Function::new(
18219 "DATE_DIFF".to_string(),
18220 vec![Expression::string(&unit_str), arg1, arg2],
18221 ))))
18222 }
18223 DialectType::Presto
18224 | DialectType::Trino
18225 | DialectType::Athena => {
18226 Ok(Expression::Function(Box::new(Function::new(
18227 "DATE_DIFF".to_string(),
18228 vec![Expression::string(&unit_str), arg1, arg2],
18229 ))))
18230 }
18231 DialectType::ClickHouse => {
18232 // ClickHouse: DATE_DIFF(UNIT, start, end) - identifier unit
18233 let unit =
18234 Expression::Identifier(Identifier::new(&unit_str));
18235 Ok(Expression::Function(Box::new(Function::new(
18236 "DATE_DIFF".to_string(),
18237 vec![unit, arg1, arg2],
18238 ))))
18239 }
18240 DialectType::Snowflake | DialectType::Redshift => {
18241 let unit =
18242 Expression::Identifier(Identifier::new(&unit_str));
18243 Ok(Expression::Function(Box::new(Function::new(
18244 "DATEDIFF".to_string(),
18245 vec![unit, arg1, arg2],
18246 ))))
18247 }
18248 _ => {
18249 let unit =
18250 Expression::Identifier(Identifier::new(&unit_str));
18251 Ok(Expression::Function(Box::new(Function::new(
18252 "DATEDIFF".to_string(),
18253 vec![unit, arg1, arg2],
18254 ))))
18255 }
18256 }
18257 }
18258 // DATEADD(unit, val, date) - 3-arg form
18259 "DATEADD" if f.args.len() == 3 => {
18260 let mut args = f.args;
18261 let arg0 = args.remove(0);
18262 let arg1 = args.remove(0);
18263 let arg2 = args.remove(0);
18264 let unit_str = Self::get_unit_str_static(&arg0);
18265
18266 // Normalize TSQL unit abbreviations to standard names
18267 let unit_str = match unit_str.as_str() {
18268 "YY" | "YYYY" => "YEAR".to_string(),
18269 "QQ" | "Q" => "QUARTER".to_string(),
18270 "MM" | "M" => "MONTH".to_string(),
18271 "WK" | "WW" => "WEEK".to_string(),
18272 "DD" | "D" | "DY" => "DAY".to_string(),
18273 "HH" => "HOUR".to_string(),
18274 "MI" | "N" => "MINUTE".to_string(),
18275 "SS" | "S" => "SECOND".to_string(),
18276 "MS" => "MILLISECOND".to_string(),
18277 "MCS" | "US" => "MICROSECOND".to_string(),
18278 _ => unit_str,
18279 };
18280 match target {
18281 DialectType::Snowflake => {
18282 let unit =
18283 Expression::Identifier(Identifier::new(&unit_str));
18284 // Cast string literal to TIMESTAMP, but not for Snowflake source
18285 // (Snowflake natively accepts string literals in DATEADD)
18286 let arg2 = if matches!(
18287 &arg2,
18288 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_))
18289 ) && !matches!(source, DialectType::Snowflake)
18290 {
18291 Expression::Cast(Box::new(Cast {
18292 this: arg2,
18293 to: DataType::Timestamp {
18294 precision: None,
18295 timezone: false,
18296 },
18297 trailing_comments: Vec::new(),
18298 double_colon_syntax: false,
18299 format: None,
18300 default: None,
18301 inferred_type: None,
18302 }))
18303 } else {
18304 arg2
18305 };
18306 Ok(Expression::Function(Box::new(Function::new(
18307 "DATEADD".to_string(),
18308 vec![unit, arg1, arg2],
18309 ))))
18310 }
18311 DialectType::TSQL => {
18312 let unit =
18313 Expression::Identifier(Identifier::new(&unit_str));
18314 // Cast string literal to DATETIME2, but not when source is Spark/Databricks family
18315 let arg2 = if matches!(
18316 &arg2,
18317 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_))
18318 ) && !matches!(
18319 source,
18320 DialectType::Spark
18321 | DialectType::Databricks
18322 | DialectType::Hive
18323 ) {
18324 Expression::Cast(Box::new(Cast {
18325 this: arg2,
18326 to: DataType::Custom {
18327 name: "DATETIME2".to_string(),
18328 },
18329 trailing_comments: Vec::new(),
18330 double_colon_syntax: false,
18331 format: None,
18332 default: None,
18333 inferred_type: None,
18334 }))
18335 } else {
18336 arg2
18337 };
18338 Ok(Expression::Function(Box::new(Function::new(
18339 "DATEADD".to_string(),
18340 vec![unit, arg1, arg2],
18341 ))))
18342 }
18343 DialectType::Redshift => {
18344 let unit =
18345 Expression::Identifier(Identifier::new(&unit_str));
18346 Ok(Expression::Function(Box::new(Function::new(
18347 "DATEADD".to_string(),
18348 vec![unit, arg1, arg2],
18349 ))))
18350 }
18351 DialectType::Databricks => {
18352 let unit =
18353 Expression::Identifier(Identifier::new(&unit_str));
18354 // Sources with native DATEADD (TSQL, Databricks, Snowflake) -> DATEADD
18355 // Other sources (Redshift TsOrDsAdd, etc.) -> DATE_ADD
18356 let func_name = if matches!(
18357 source,
18358 DialectType::TSQL
18359 | DialectType::Fabric
18360 | DialectType::Databricks
18361 | DialectType::Snowflake
18362 ) {
18363 "DATEADD"
18364 } else {
18365 "DATE_ADD"
18366 };
18367 Ok(Expression::Function(Box::new(Function::new(
18368 func_name.to_string(),
18369 vec![unit, arg1, arg2],
18370 ))))
18371 }
18372 DialectType::DuckDB => {
18373 // Special handling for NANOSECOND from Snowflake
18374 if unit_str == "NANOSECOND"
18375 && matches!(source, DialectType::Snowflake)
18376 {
18377 // DATEADD(NANOSECOND, offset, ts) -> MAKE_TIMESTAMP_NS(EPOCH_NS(CAST(ts AS TIMESTAMP_NS)) + offset)
18378 let cast_ts = Expression::Cast(Box::new(Cast {
18379 this: arg2,
18380 to: DataType::Custom {
18381 name: "TIMESTAMP_NS".to_string(),
18382 },
18383 trailing_comments: vec![],
18384 double_colon_syntax: false,
18385 format: None,
18386 default: None,
18387 inferred_type: None,
18388 }));
18389 let epoch_ns =
18390 Expression::Function(Box::new(Function::new(
18391 "EPOCH_NS".to_string(),
18392 vec![cast_ts],
18393 )));
18394 let sum = Expression::Add(Box::new(BinaryOp::new(
18395 epoch_ns, arg1,
18396 )));
18397 Ok(Expression::Function(Box::new(Function::new(
18398 "MAKE_TIMESTAMP_NS".to_string(),
18399 vec![sum],
18400 ))))
18401 } else {
18402 // DuckDB: convert to date + INTERVAL syntax with CAST
18403 let iu = Self::parse_interval_unit_static(&unit_str);
18404 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
18405 this: Some(arg1),
18406 unit: Some(crate::expressions::IntervalUnitSpec::Simple { unit: iu, use_plural: false }),
18407 }));
18408 // Cast string literal to TIMESTAMP
18409 let arg2 = if matches!(
18410 &arg2,
18411 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_))
18412 ) {
18413 Expression::Cast(Box::new(Cast {
18414 this: arg2,
18415 to: DataType::Timestamp {
18416 precision: None,
18417 timezone: false,
18418 },
18419 trailing_comments: Vec::new(),
18420 double_colon_syntax: false,
18421 format: None,
18422 default: None,
18423 inferred_type: None,
18424 }))
18425 } else {
18426 arg2
18427 };
18428 Ok(Expression::Add(Box::new(
18429 crate::expressions::BinaryOp::new(arg2, interval),
18430 )))
18431 }
18432 }
18433 DialectType::Spark => {
18434 // For TSQL source: convert to ADD_MONTHS/DATE_ADD(date, val)
18435 // For other sources: keep 3-arg DATE_ADD(UNIT, val, date) form
18436 if matches!(source, DialectType::TSQL | DialectType::Fabric)
18437 {
18438 fn multiply_expr_spark(
18439 expr: Expression,
18440 factor: i64,
18441 ) -> Expression
18442 {
18443 if let Expression::Literal(lit) = &expr {
18444 if let crate::expressions::Literal::Number(n) =
18445 lit.as_ref()
18446 {
18447 if let Ok(val) = n.parse::<i64>() {
18448 return Expression::Literal(Box::new(
18449 crate::expressions::Literal::Number(
18450 (val * factor).to_string(),
18451 ),
18452 ));
18453 }
18454 }
18455 }
18456 Expression::Mul(Box::new(
18457 crate::expressions::BinaryOp::new(
18458 expr,
18459 Expression::Literal(Box::new(
18460 crate::expressions::Literal::Number(
18461 factor.to_string(),
18462 ),
18463 )),
18464 ),
18465 ))
18466 }
18467 let normalized_unit = match unit_str.as_str() {
18468 "YEAR" | "YY" | "YYYY" => "YEAR",
18469 "QUARTER" | "QQ" | "Q" => "QUARTER",
18470 "MONTH" | "MM" | "M" => "MONTH",
18471 "WEEK" | "WK" | "WW" => "WEEK",
18472 "DAY" | "DD" | "D" | "DY" => "DAY",
18473 _ => &unit_str,
18474 };
18475 match normalized_unit {
18476 "YEAR" => {
18477 let months = multiply_expr_spark(arg1, 12);
18478 Ok(Expression::Function(Box::new(
18479 Function::new(
18480 "ADD_MONTHS".to_string(),
18481 vec![arg2, months],
18482 ),
18483 )))
18484 }
18485 "QUARTER" => {
18486 let months = multiply_expr_spark(arg1, 3);
18487 Ok(Expression::Function(Box::new(
18488 Function::new(
18489 "ADD_MONTHS".to_string(),
18490 vec![arg2, months],
18491 ),
18492 )))
18493 }
18494 "MONTH" => Ok(Expression::Function(Box::new(
18495 Function::new(
18496 "ADD_MONTHS".to_string(),
18497 vec![arg2, arg1],
18498 ),
18499 ))),
18500 "WEEK" => {
18501 let days = multiply_expr_spark(arg1, 7);
18502 Ok(Expression::Function(Box::new(
18503 Function::new(
18504 "DATE_ADD".to_string(),
18505 vec![arg2, days],
18506 ),
18507 )))
18508 }
18509 "DAY" => Ok(Expression::Function(Box::new(
18510 Function::new(
18511 "DATE_ADD".to_string(),
18512 vec![arg2, arg1],
18513 ),
18514 ))),
18515 _ => {
18516 let unit = Expression::Identifier(
18517 Identifier::new(&unit_str),
18518 );
18519 Ok(Expression::Function(Box::new(
18520 Function::new(
18521 "DATE_ADD".to_string(),
18522 vec![unit, arg1, arg2],
18523 ),
18524 )))
18525 }
18526 }
18527 } else {
18528 // Non-TSQL source: keep 3-arg DATE_ADD(UNIT, val, date)
18529 let unit =
18530 Expression::Identifier(Identifier::new(&unit_str));
18531 Ok(Expression::Function(Box::new(Function::new(
18532 "DATE_ADD".to_string(),
18533 vec![unit, arg1, arg2],
18534 ))))
18535 }
18536 }
18537 DialectType::Hive => match unit_str.as_str() {
18538 "MONTH" => {
18539 Ok(Expression::Function(Box::new(Function::new(
18540 "ADD_MONTHS".to_string(),
18541 vec![arg2, arg1],
18542 ))))
18543 }
18544 _ => Ok(Expression::Function(Box::new(Function::new(
18545 "DATE_ADD".to_string(),
18546 vec![arg2, arg1],
18547 )))),
18548 },
18549 DialectType::Presto
18550 | DialectType::Trino
18551 | DialectType::Athena => {
18552 // Cast string literal date to TIMESTAMP
18553 let arg2 = if matches!(
18554 &arg2,
18555 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_))
18556 ) {
18557 Expression::Cast(Box::new(Cast {
18558 this: arg2,
18559 to: DataType::Timestamp {
18560 precision: None,
18561 timezone: false,
18562 },
18563 trailing_comments: Vec::new(),
18564 double_colon_syntax: false,
18565 format: None,
18566 default: None,
18567 inferred_type: None,
18568 }))
18569 } else {
18570 arg2
18571 };
18572 Ok(Expression::Function(Box::new(Function::new(
18573 "DATE_ADD".to_string(),
18574 vec![Expression::string(&unit_str), arg1, arg2],
18575 ))))
18576 }
18577 DialectType::MySQL => {
18578 let iu = Self::parse_interval_unit_static(&unit_str);
18579 Ok(Expression::DateAdd(Box::new(
18580 crate::expressions::DateAddFunc {
18581 this: arg2,
18582 interval: arg1,
18583 unit: iu,
18584 },
18585 )))
18586 }
18587 DialectType::PostgreSQL => {
18588 // Cast string literal date to TIMESTAMP
18589 let arg2 = if matches!(
18590 &arg2,
18591 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_))
18592 ) {
18593 Expression::Cast(Box::new(Cast {
18594 this: arg2,
18595 to: DataType::Timestamp {
18596 precision: None,
18597 timezone: false,
18598 },
18599 trailing_comments: Vec::new(),
18600 double_colon_syntax: false,
18601 format: None,
18602 default: None,
18603 inferred_type: None,
18604 }))
18605 } else {
18606 arg2
18607 };
18608 let interval = Expression::Interval(Box::new(
18609 crate::expressions::Interval {
18610 this: Some(Expression::string(&format!(
18611 "{} {}",
18612 Self::expr_to_string_static(&arg1),
18613 unit_str
18614 ))),
18615 unit: None,
18616 },
18617 ));
18618 Ok(Expression::Add(Box::new(
18619 crate::expressions::BinaryOp::new(arg2, interval),
18620 )))
18621 }
18622 DialectType::BigQuery => {
18623 let iu = Self::parse_interval_unit_static(&unit_str);
18624 let interval = Expression::Interval(Box::new(
18625 crate::expressions::Interval {
18626 this: Some(arg1),
18627 unit: Some(
18628 crate::expressions::IntervalUnitSpec::Simple {
18629 unit: iu,
18630 use_plural: false,
18631 },
18632 ),
18633 },
18634 ));
18635 // Non-TSQL sources: CAST string literal to DATETIME
18636 let arg2 = if !matches!(
18637 source,
18638 DialectType::TSQL | DialectType::Fabric
18639 ) && matches!(
18640 &arg2,
18641 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_))
18642 ) {
18643 Expression::Cast(Box::new(Cast {
18644 this: arg2,
18645 to: DataType::Custom {
18646 name: "DATETIME".to_string(),
18647 },
18648 trailing_comments: Vec::new(),
18649 double_colon_syntax: false,
18650 format: None,
18651 default: None,
18652 inferred_type: None,
18653 }))
18654 } else {
18655 arg2
18656 };
18657 Ok(Expression::Function(Box::new(Function::new(
18658 "DATE_ADD".to_string(),
18659 vec![arg2, interval],
18660 ))))
18661 }
18662 _ => {
18663 let unit =
18664 Expression::Identifier(Identifier::new(&unit_str));
18665 Ok(Expression::Function(Box::new(Function::new(
18666 "DATEADD".to_string(),
18667 vec![unit, arg1, arg2],
18668 ))))
18669 }
18670 }
18671 }
18672 // DATE_ADD - 3-arg: either (unit, val, date) from Presto/ClickHouse
18673 // or (date, val, 'UNIT') from Generic canonical form
18674 "DATE_ADD" if f.args.len() == 3 => {
18675 let mut args = f.args;
18676 let arg0 = args.remove(0);
18677 let arg1 = args.remove(0);
18678 let arg2 = args.remove(0);
18679 // Detect Generic canonical form: DATE_ADD(date, amount, 'UNIT')
18680 // where arg2 is a string literal matching a unit name
18681 let arg2_unit = match &arg2 {
18682 Expression::Literal(lit)
18683 if matches!(lit.as_ref(), Literal::String(_)) =>
18684 {
18685 let Literal::String(s) = lit.as_ref() else {
18686 unreachable!()
18687 };
18688 let u = s.to_ascii_uppercase();
18689 if matches!(
18690 u.as_str(),
18691 "DAY"
18692 | "MONTH"
18693 | "YEAR"
18694 | "HOUR"
18695 | "MINUTE"
18696 | "SECOND"
18697 | "WEEK"
18698 | "QUARTER"
18699 | "MILLISECOND"
18700 | "MICROSECOND"
18701 ) {
18702 Some(u)
18703 } else {
18704 None
18705 }
18706 }
18707 _ => None,
18708 };
18709 // Reorder: if arg2 is the unit, swap to (unit, val, date) form
18710 let (unit_str, val, date) = if let Some(u) = arg2_unit {
18711 (u, arg1, arg0)
18712 } else {
18713 (Self::get_unit_str_static(&arg0), arg1, arg2)
18714 };
18715 // Alias for backward compat with the rest of the match
18716 let arg1 = val;
18717 let arg2 = date;
18718
18719 match target {
18720 DialectType::Presto
18721 | DialectType::Trino
18722 | DialectType::Athena => {
18723 Ok(Expression::Function(Box::new(Function::new(
18724 "DATE_ADD".to_string(),
18725 vec![Expression::string(&unit_str), arg1, arg2],
18726 ))))
18727 }
18728 DialectType::DuckDB => {
18729 let iu = Self::parse_interval_unit_static(&unit_str);
18730 let interval = Expression::Interval(Box::new(
18731 crate::expressions::Interval {
18732 this: Some(arg1),
18733 unit: Some(
18734 crate::expressions::IntervalUnitSpec::Simple {
18735 unit: iu,
18736 use_plural: false,
18737 },
18738 ),
18739 },
18740 ));
18741 Ok(Expression::Add(Box::new(
18742 crate::expressions::BinaryOp::new(arg2, interval),
18743 )))
18744 }
18745 DialectType::PostgreSQL
18746 | DialectType::Materialize
18747 | DialectType::RisingWave => {
18748 // PostgreSQL: x + INTERVAL '1 DAY'
18749 let amount_str = Self::expr_to_string_static(&arg1);
18750 let interval = Expression::Interval(Box::new(
18751 crate::expressions::Interval {
18752 this: Some(Expression::string(&format!(
18753 "{} {}",
18754 amount_str, unit_str
18755 ))),
18756 unit: None,
18757 },
18758 ));
18759 Ok(Expression::Add(Box::new(
18760 crate::expressions::BinaryOp::new(arg2, interval),
18761 )))
18762 }
18763 DialectType::Snowflake
18764 | DialectType::TSQL
18765 | DialectType::Redshift => {
18766 let unit =
18767 Expression::Identifier(Identifier::new(&unit_str));
18768 Ok(Expression::Function(Box::new(Function::new(
18769 "DATEADD".to_string(),
18770 vec![unit, arg1, arg2],
18771 ))))
18772 }
18773 DialectType::BigQuery
18774 | DialectType::MySQL
18775 | DialectType::Doris
18776 | DialectType::StarRocks
18777 | DialectType::Drill => {
18778 // DATE_ADD(date, INTERVAL amount UNIT)
18779 let iu = Self::parse_interval_unit_static(&unit_str);
18780 let interval = Expression::Interval(Box::new(
18781 crate::expressions::Interval {
18782 this: Some(arg1),
18783 unit: Some(
18784 crate::expressions::IntervalUnitSpec::Simple {
18785 unit: iu,
18786 use_plural: false,
18787 },
18788 ),
18789 },
18790 ));
18791 Ok(Expression::Function(Box::new(Function::new(
18792 "DATE_ADD".to_string(),
18793 vec![arg2, interval],
18794 ))))
18795 }
18796 DialectType::SQLite => {
18797 // SQLite: DATE(x, '1 DAY')
18798 // Build the string '1 DAY' from amount and unit
18799 let amount_str = match &arg1 {
18800 Expression::Literal(lit)
18801 if matches!(lit.as_ref(), Literal::Number(_)) =>
18802 {
18803 let Literal::Number(n) = lit.as_ref() else {
18804 unreachable!()
18805 };
18806 n.clone()
18807 }
18808 _ => "1".to_string(),
18809 };
18810 Ok(Expression::Function(Box::new(Function::new(
18811 "DATE".to_string(),
18812 vec![
18813 arg2,
18814 Expression::string(format!(
18815 "{} {}",
18816 amount_str, unit_str
18817 )),
18818 ],
18819 ))))
18820 }
18821 DialectType::Dremio => {
18822 // Dremio: DATE_ADD(date, amount) - drops unit
18823 Ok(Expression::Function(Box::new(Function::new(
18824 "DATE_ADD".to_string(),
18825 vec![arg2, arg1],
18826 ))))
18827 }
18828 DialectType::Spark => {
18829 // Spark: DATE_ADD(date, val) for DAY, or DATEADD(UNIT, val, date)
18830 if unit_str == "DAY" {
18831 Ok(Expression::Function(Box::new(Function::new(
18832 "DATE_ADD".to_string(),
18833 vec![arg2, arg1],
18834 ))))
18835 } else {
18836 let unit =
18837 Expression::Identifier(Identifier::new(&unit_str));
18838 Ok(Expression::Function(Box::new(Function::new(
18839 "DATE_ADD".to_string(),
18840 vec![unit, arg1, arg2],
18841 ))))
18842 }
18843 }
18844 DialectType::Databricks => {
18845 let unit =
18846 Expression::Identifier(Identifier::new(&unit_str));
18847 Ok(Expression::Function(Box::new(Function::new(
18848 "DATE_ADD".to_string(),
18849 vec![unit, arg1, arg2],
18850 ))))
18851 }
18852 DialectType::Hive => {
18853 // Hive: DATE_ADD(date, val) for DAY
18854 Ok(Expression::Function(Box::new(Function::new(
18855 "DATE_ADD".to_string(),
18856 vec![arg2, arg1],
18857 ))))
18858 }
18859 _ => {
18860 let unit =
18861 Expression::Identifier(Identifier::new(&unit_str));
18862 Ok(Expression::Function(Box::new(Function::new(
18863 "DATE_ADD".to_string(),
18864 vec![unit, arg1, arg2],
18865 ))))
18866 }
18867 }
18868 }
18869 // DATE_ADD(date, days) - 2-arg Hive/Spark/Generic form (add days)
18870 "DATE_ADD"
18871 if f.args.len() == 2
18872 && matches!(
18873 source,
18874 DialectType::Hive
18875 | DialectType::Spark
18876 | DialectType::Databricks
18877 | DialectType::Generic
18878 ) =>
18879 {
18880 let mut args = f.args;
18881 let date = args.remove(0);
18882 let days = args.remove(0);
18883 match target {
18884 DialectType::Hive | DialectType::Spark => {
18885 // Keep as DATE_ADD(date, days) for Hive/Spark
18886 Ok(Expression::Function(Box::new(Function::new(
18887 "DATE_ADD".to_string(),
18888 vec![date, days],
18889 ))))
18890 }
18891 DialectType::Databricks => Ok(Expression::Function(Box::new(
18892 Function::new("DATE_ADD".to_string(), vec![date, days]),
18893 ))),
18894 DialectType::DuckDB => {
18895 // DuckDB: CAST(date AS DATE) + INTERVAL days DAY
18896 let cast_date = Self::ensure_cast_date(date);
18897 // Wrap complex expressions (like Mul from DATE_SUB negation) in Paren
18898 let interval_val = if matches!(
18899 days,
18900 Expression::Mul(_)
18901 | Expression::Sub(_)
18902 | Expression::Add(_)
18903 ) {
18904 Expression::Paren(Box::new(crate::expressions::Paren {
18905 this: days,
18906 trailing_comments: vec![],
18907 }))
18908 } else {
18909 days
18910 };
18911 let interval = Expression::Interval(Box::new(
18912 crate::expressions::Interval {
18913 this: Some(interval_val),
18914 unit: Some(
18915 crate::expressions::IntervalUnitSpec::Simple {
18916 unit: crate::expressions::IntervalUnit::Day,
18917 use_plural: false,
18918 },
18919 ),
18920 },
18921 ));
18922 Ok(Expression::Add(Box::new(
18923 crate::expressions::BinaryOp::new(cast_date, interval),
18924 )))
18925 }
18926 DialectType::Snowflake => {
18927 // For Hive source with string literal date, use CAST(CAST(date AS TIMESTAMP) AS DATE)
18928 let cast_date = if matches!(
18929 source,
18930 DialectType::Hive
18931 | DialectType::Spark
18932 | DialectType::Databricks
18933 ) {
18934 if matches!(
18935 date,
18936 Expression::Literal(ref lit) if matches!(lit.as_ref(), Literal::String(_))
18937 ) {
18938 Self::double_cast_timestamp_date(date)
18939 } else {
18940 date
18941 }
18942 } else {
18943 date
18944 };
18945 Ok(Expression::Function(Box::new(Function::new(
18946 "DATEADD".to_string(),
18947 vec![
18948 Expression::Identifier(Identifier::new("DAY")),
18949 days,
18950 cast_date,
18951 ],
18952 ))))
18953 }
18954 DialectType::Redshift => {
18955 Ok(Expression::Function(Box::new(Function::new(
18956 "DATEADD".to_string(),
18957 vec![
18958 Expression::Identifier(Identifier::new("DAY")),
18959 days,
18960 date,
18961 ],
18962 ))))
18963 }
18964 DialectType::TSQL | DialectType::Fabric => {
18965 // For Hive source with string literal date, use CAST(CAST(date AS DATETIME2) AS DATE)
18966 // But Databricks DATE_ADD doesn't need this wrapping for TSQL
18967 let cast_date = if matches!(
18968 source,
18969 DialectType::Hive
18970 | DialectType::Spark
18971 | DialectType::Databricks
18972 ) {
18973 if matches!(
18974 date,
18975 Expression::Literal(ref lit) if matches!(lit.as_ref(), Literal::String(_))
18976 ) {
18977 Self::double_cast_datetime2_date(date)
18978 } else {
18979 date
18980 }
18981 } else {
18982 date
18983 };
18984 Ok(Expression::Function(Box::new(Function::new(
18985 "DATEADD".to_string(),
18986 vec![
18987 Expression::Identifier(Identifier::new("DAY")),
18988 days,
18989 cast_date,
18990 ],
18991 ))))
18992 }
18993 DialectType::Presto
18994 | DialectType::Trino
18995 | DialectType::Athena => {
18996 // For Hive source with string literal date, use CAST(CAST(date AS TIMESTAMP) AS DATE)
18997 let cast_date = if matches!(
18998 source,
18999 DialectType::Hive
19000 | DialectType::Spark
19001 | DialectType::Databricks
19002 ) {
19003 if matches!(
19004 date,
19005 Expression::Literal(ref lit) if matches!(lit.as_ref(), Literal::String(_))
19006 ) {
19007 Self::double_cast_timestamp_date(date)
19008 } else {
19009 date
19010 }
19011 } else {
19012 date
19013 };
19014 Ok(Expression::Function(Box::new(Function::new(
19015 "DATE_ADD".to_string(),
19016 vec![Expression::string("DAY"), days, cast_date],
19017 ))))
19018 }
19019 DialectType::BigQuery => {
19020 // For Hive/Spark source, wrap date in CAST(CAST(date AS DATETIME) AS DATE)
19021 let cast_date = if matches!(
19022 source,
19023 DialectType::Hive
19024 | DialectType::Spark
19025 | DialectType::Databricks
19026 ) {
19027 Self::double_cast_datetime_date(date)
19028 } else {
19029 date
19030 };
19031 // Wrap complex expressions in Paren for interval
19032 let interval_val = if matches!(
19033 days,
19034 Expression::Mul(_)
19035 | Expression::Sub(_)
19036 | Expression::Add(_)
19037 ) {
19038 Expression::Paren(Box::new(crate::expressions::Paren {
19039 this: days,
19040 trailing_comments: vec![],
19041 }))
19042 } else {
19043 days
19044 };
19045 let interval = Expression::Interval(Box::new(
19046 crate::expressions::Interval {
19047 this: Some(interval_val),
19048 unit: Some(
19049 crate::expressions::IntervalUnitSpec::Simple {
19050 unit: crate::expressions::IntervalUnit::Day,
19051 use_plural: false,
19052 },
19053 ),
19054 },
19055 ));
19056 Ok(Expression::Function(Box::new(Function::new(
19057 "DATE_ADD".to_string(),
19058 vec![cast_date, interval],
19059 ))))
19060 }
19061 DialectType::MySQL => {
19062 let iu = crate::expressions::IntervalUnit::Day;
19063 Ok(Expression::DateAdd(Box::new(
19064 crate::expressions::DateAddFunc {
19065 this: date,
19066 interval: days,
19067 unit: iu,
19068 },
19069 )))
19070 }
19071 DialectType::PostgreSQL => {
19072 let interval = Expression::Interval(Box::new(
19073 crate::expressions::Interval {
19074 this: Some(Expression::string(&format!(
19075 "{} DAY",
19076 Self::expr_to_string_static(&days)
19077 ))),
19078 unit: None,
19079 },
19080 ));
19081 Ok(Expression::Add(Box::new(
19082 crate::expressions::BinaryOp::new(date, interval),
19083 )))
19084 }
19085 DialectType::Doris
19086 | DialectType::StarRocks
19087 | DialectType::Drill => {
19088 // DATE_ADD(date, INTERVAL days DAY)
19089 let interval = Expression::Interval(Box::new(
19090 crate::expressions::Interval {
19091 this: Some(days),
19092 unit: Some(
19093 crate::expressions::IntervalUnitSpec::Simple {
19094 unit: crate::expressions::IntervalUnit::Day,
19095 use_plural: false,
19096 },
19097 ),
19098 },
19099 ));
19100 Ok(Expression::Function(Box::new(Function::new(
19101 "DATE_ADD".to_string(),
19102 vec![date, interval],
19103 ))))
19104 }
19105 _ => Ok(Expression::Function(Box::new(Function::new(
19106 "DATE_ADD".to_string(),
19107 vec![date, days],
19108 )))),
19109 }
19110 }
19111 // DATE_ADD(date, INTERVAL val UNIT) - MySQL 2-arg form with INTERVAL as 2nd arg
19112 "DATE_ADD"
19113 if f.args.len() == 2
19114 && matches!(
19115 source,
19116 DialectType::MySQL | DialectType::SingleStore
19117 )
19118 && matches!(&f.args[1], Expression::Interval(_)) =>
19119 {
19120 let mut args = f.args;
19121 let date = args.remove(0);
19122 let interval_expr = args.remove(0);
19123 let (val, unit) = Self::extract_interval_parts(&interval_expr)
19124 .unwrap_or_else(|| {
19125 (
19126 interval_expr.clone(),
19127 crate::expressions::IntervalUnit::Day,
19128 )
19129 });
19130 let unit_str = Self::interval_unit_to_string(&unit);
19131 let is_literal = matches!(&val,
19132 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(_) | Literal::String(_))
19133 );
19134
19135 match target {
19136 DialectType::MySQL | DialectType::SingleStore => {
19137 // Keep as DATE_ADD(date, INTERVAL val UNIT)
19138 Ok(Expression::Function(Box::new(Function::new(
19139 "DATE_ADD".to_string(),
19140 vec![date, interval_expr],
19141 ))))
19142 }
19143 DialectType::PostgreSQL => {
19144 if is_literal {
19145 // Literal: date + INTERVAL 'val UNIT'
19146 let interval = Expression::Interval(Box::new(
19147 crate::expressions::Interval {
19148 this: Some(Expression::Literal(Box::new(
19149 Literal::String(format!(
19150 "{} {}",
19151 Self::expr_to_string(&val),
19152 unit_str
19153 )),
19154 ))),
19155 unit: None,
19156 },
19157 ));
19158 Ok(Expression::Add(Box::new(
19159 crate::expressions::BinaryOp::new(date, interval),
19160 )))
19161 } else {
19162 // Non-literal (column ref): date + INTERVAL '1 UNIT' * val
19163 let interval_one = Expression::Interval(Box::new(
19164 crate::expressions::Interval {
19165 this: Some(Expression::Literal(Box::new(
19166 Literal::String(format!("1 {}", unit_str)),
19167 ))),
19168 unit: None,
19169 },
19170 ));
19171 let mul = Expression::Mul(Box::new(
19172 crate::expressions::BinaryOp::new(
19173 interval_one,
19174 val,
19175 ),
19176 ));
19177 Ok(Expression::Add(Box::new(
19178 crate::expressions::BinaryOp::new(date, mul),
19179 )))
19180 }
19181 }
19182 _ => {
19183 // Default: keep as DATE_ADD(date, interval)
19184 Ok(Expression::Function(Box::new(Function::new(
19185 "DATE_ADD".to_string(),
19186 vec![date, interval_expr],
19187 ))))
19188 }
19189 }
19190 }
19191 // DATE_SUB(date, days) - 2-arg Hive/Spark form (subtract days)
19192 "DATE_SUB"
19193 if f.args.len() == 2
19194 && matches!(
19195 source,
19196 DialectType::Hive
19197 | DialectType::Spark
19198 | DialectType::Databricks
19199 ) =>
19200 {
19201 let mut args = f.args;
19202 let date = args.remove(0);
19203 let days = args.remove(0);
19204 // Helper to create days * -1
19205 let make_neg_days = |d: Expression| -> Expression {
19206 Expression::Mul(Box::new(crate::expressions::BinaryOp::new(
19207 d,
19208 Expression::Literal(Box::new(Literal::Number(
19209 "-1".to_string(),
19210 ))),
19211 )))
19212 };
19213 let is_string_literal = matches!(date, Expression::Literal(ref lit) if matches!(lit.as_ref(), Literal::String(_)));
19214 match target {
19215 DialectType::Hive
19216 | DialectType::Spark
19217 | DialectType::Databricks => {
19218 // Keep as DATE_SUB(date, days) for Hive/Spark
19219 Ok(Expression::Function(Box::new(Function::new(
19220 "DATE_SUB".to_string(),
19221 vec![date, days],
19222 ))))
19223 }
19224 DialectType::DuckDB => {
19225 let cast_date = Self::ensure_cast_date(date);
19226 let neg = make_neg_days(days);
19227 let interval = Expression::Interval(Box::new(
19228 crate::expressions::Interval {
19229 this: Some(Expression::Paren(Box::new(
19230 crate::expressions::Paren {
19231 this: neg,
19232 trailing_comments: vec![],
19233 },
19234 ))),
19235 unit: Some(
19236 crate::expressions::IntervalUnitSpec::Simple {
19237 unit: crate::expressions::IntervalUnit::Day,
19238 use_plural: false,
19239 },
19240 ),
19241 },
19242 ));
19243 Ok(Expression::Add(Box::new(
19244 crate::expressions::BinaryOp::new(cast_date, interval),
19245 )))
19246 }
19247 DialectType::Snowflake => {
19248 let cast_date = if is_string_literal {
19249 Self::double_cast_timestamp_date(date)
19250 } else {
19251 date
19252 };
19253 let neg = make_neg_days(days);
19254 Ok(Expression::Function(Box::new(Function::new(
19255 "DATEADD".to_string(),
19256 vec![
19257 Expression::Identifier(Identifier::new("DAY")),
19258 neg,
19259 cast_date,
19260 ],
19261 ))))
19262 }
19263 DialectType::Redshift => {
19264 let neg = make_neg_days(days);
19265 Ok(Expression::Function(Box::new(Function::new(
19266 "DATEADD".to_string(),
19267 vec![
19268 Expression::Identifier(Identifier::new("DAY")),
19269 neg,
19270 date,
19271 ],
19272 ))))
19273 }
19274 DialectType::TSQL | DialectType::Fabric => {
19275 let cast_date = if is_string_literal {
19276 Self::double_cast_datetime2_date(date)
19277 } else {
19278 date
19279 };
19280 let neg = make_neg_days(days);
19281 Ok(Expression::Function(Box::new(Function::new(
19282 "DATEADD".to_string(),
19283 vec![
19284 Expression::Identifier(Identifier::new("DAY")),
19285 neg,
19286 cast_date,
19287 ],
19288 ))))
19289 }
19290 DialectType::Presto
19291 | DialectType::Trino
19292 | DialectType::Athena => {
19293 let cast_date = if is_string_literal {
19294 Self::double_cast_timestamp_date(date)
19295 } else {
19296 date
19297 };
19298 let neg = make_neg_days(days);
19299 Ok(Expression::Function(Box::new(Function::new(
19300 "DATE_ADD".to_string(),
19301 vec![Expression::string("DAY"), neg, cast_date],
19302 ))))
19303 }
19304 DialectType::BigQuery => {
19305 let cast_date = if is_string_literal {
19306 Self::double_cast_datetime_date(date)
19307 } else {
19308 date
19309 };
19310 let neg = make_neg_days(days);
19311 let interval = Expression::Interval(Box::new(
19312 crate::expressions::Interval {
19313 this: Some(Expression::Paren(Box::new(
19314 crate::expressions::Paren {
19315 this: neg,
19316 trailing_comments: vec![],
19317 },
19318 ))),
19319 unit: Some(
19320 crate::expressions::IntervalUnitSpec::Simple {
19321 unit: crate::expressions::IntervalUnit::Day,
19322 use_plural: false,
19323 },
19324 ),
19325 },
19326 ));
19327 Ok(Expression::Function(Box::new(Function::new(
19328 "DATE_ADD".to_string(),
19329 vec![cast_date, interval],
19330 ))))
19331 }
19332 _ => Ok(Expression::Function(Box::new(Function::new(
19333 "DATE_SUB".to_string(),
19334 vec![date, days],
19335 )))),
19336 }
19337 }
19338 // ADD_MONTHS(date, val) -> target-specific
19339 "ADD_MONTHS" if f.args.len() == 2 => {
19340 let mut args = f.args;
19341 let date = args.remove(0);
19342 let val = args.remove(0);
19343 match target {
19344 DialectType::TSQL => {
19345 let cast_date = Self::ensure_cast_datetime2(date);
19346 Ok(Expression::Function(Box::new(Function::new(
19347 "DATEADD".to_string(),
19348 vec![
19349 Expression::Identifier(Identifier::new("MONTH")),
19350 val,
19351 cast_date,
19352 ],
19353 ))))
19354 }
19355 DialectType::DuckDB => {
19356 let interval = Expression::Interval(Box::new(
19357 crate::expressions::Interval {
19358 this: Some(val),
19359 unit: Some(
19360 crate::expressions::IntervalUnitSpec::Simple {
19361 unit:
19362 crate::expressions::IntervalUnit::Month,
19363 use_plural: false,
19364 },
19365 ),
19366 },
19367 ));
19368 Ok(Expression::Add(Box::new(
19369 crate::expressions::BinaryOp::new(date, interval),
19370 )))
19371 }
19372 DialectType::Snowflake => {
19373 // Keep ADD_MONTHS when source is Snowflake
19374 if matches!(source, DialectType::Snowflake) {
19375 Ok(Expression::Function(Box::new(Function::new(
19376 "ADD_MONTHS".to_string(),
19377 vec![date, val],
19378 ))))
19379 } else {
19380 Ok(Expression::Function(Box::new(Function::new(
19381 "DATEADD".to_string(),
19382 vec![
19383 Expression::Identifier(Identifier::new(
19384 "MONTH",
19385 )),
19386 val,
19387 date,
19388 ],
19389 ))))
19390 }
19391 }
19392 DialectType::Redshift => {
19393 Ok(Expression::Function(Box::new(Function::new(
19394 "DATEADD".to_string(),
19395 vec![
19396 Expression::Identifier(Identifier::new("MONTH")),
19397 val,
19398 date,
19399 ],
19400 ))))
19401 }
19402 DialectType::Presto
19403 | DialectType::Trino
19404 | DialectType::Athena => {
19405 Ok(Expression::Function(Box::new(Function::new(
19406 "DATE_ADD".to_string(),
19407 vec![Expression::string("MONTH"), val, date],
19408 ))))
19409 }
19410 DialectType::BigQuery => {
19411 let interval = Expression::Interval(Box::new(
19412 crate::expressions::Interval {
19413 this: Some(val),
19414 unit: Some(
19415 crate::expressions::IntervalUnitSpec::Simple {
19416 unit:
19417 crate::expressions::IntervalUnit::Month,
19418 use_plural: false,
19419 },
19420 ),
19421 },
19422 ));
19423 Ok(Expression::Function(Box::new(Function::new(
19424 "DATE_ADD".to_string(),
19425 vec![date, interval],
19426 ))))
19427 }
19428 _ => Ok(Expression::Function(Box::new(Function::new(
19429 "ADD_MONTHS".to_string(),
19430 vec![date, val],
19431 )))),
19432 }
19433 }
19434 // DATETRUNC(unit, date) - TSQL form -> DATE_TRUNC for other targets
19435 "DATETRUNC" if f.args.len() == 2 => {
19436 let mut args = f.args;
19437 let arg0 = args.remove(0);
19438 let arg1 = args.remove(0);
19439 let unit_str = Self::get_unit_str_static(&arg0);
19440 match target {
19441 DialectType::TSQL | DialectType::Fabric => {
19442 // Keep as DATETRUNC for TSQL - the target handler will uppercase the unit
19443 Ok(Expression::Function(Box::new(Function::new(
19444 "DATETRUNC".to_string(),
19445 vec![
19446 Expression::Identifier(Identifier::new(&unit_str)),
19447 arg1,
19448 ],
19449 ))))
19450 }
19451 DialectType::DuckDB => {
19452 // DuckDB: DATE_TRUNC('UNIT', expr) with CAST for string literals
19453 let date = Self::ensure_cast_timestamp(arg1);
19454 Ok(Expression::Function(Box::new(Function::new(
19455 "DATE_TRUNC".to_string(),
19456 vec![Expression::string(&unit_str), date],
19457 ))))
19458 }
19459 DialectType::ClickHouse => {
19460 // ClickHouse: dateTrunc('UNIT', expr)
19461 Ok(Expression::Function(Box::new(Function::new(
19462 "dateTrunc".to_string(),
19463 vec![Expression::string(&unit_str), arg1],
19464 ))))
19465 }
19466 _ => {
19467 // Standard: DATE_TRUNC('UNIT', expr)
19468 let unit = Expression::string(&unit_str);
19469 Ok(Expression::Function(Box::new(Function::new(
19470 "DATE_TRUNC".to_string(),
19471 vec![unit, arg1],
19472 ))))
19473 }
19474 }
19475 }
19476 // GETDATE() -> CURRENT_TIMESTAMP for non-TSQL targets
19477 "GETDATE" if f.args.is_empty() => match target {
19478 DialectType::TSQL => Ok(Expression::Function(f)),
19479 DialectType::Redshift => Ok(Expression::Function(Box::new(
19480 Function::new("GETDATE".to_string(), vec![]),
19481 ))),
19482 _ => Ok(Expression::CurrentTimestamp(
19483 crate::expressions::CurrentTimestamp {
19484 precision: None,
19485 sysdate: false,
19486 },
19487 )),
19488 },
19489 // TO_HEX(x) / HEX(x) -> target-specific hex function
19490 "TO_HEX" | "HEX" if f.args.len() == 1 => {
19491 let name = match target {
19492 DialectType::Presto | DialectType::Trino => "TO_HEX",
19493 DialectType::Spark
19494 | DialectType::Databricks
19495 | DialectType::Hive => "HEX",
19496 DialectType::DuckDB
19497 | DialectType::PostgreSQL
19498 | DialectType::Redshift => "TO_HEX",
19499 _ => &f.name,
19500 };
19501 Ok(Expression::Function(Box::new(Function::new(
19502 name.to_string(),
19503 f.args,
19504 ))))
19505 }
19506 // FROM_HEX(x) / UNHEX(x) -> target-specific hex decode function
19507 "FROM_HEX" | "UNHEX" if f.args.len() == 1 => {
19508 match target {
19509 DialectType::BigQuery => {
19510 // BigQuery: UNHEX(x) -> FROM_HEX(x)
19511 // Special case: UNHEX(MD5(x)) -> FROM_HEX(TO_HEX(MD5(x)))
19512 // because BigQuery MD5 returns BYTES, not hex string
19513 let arg = &f.args[0];
19514 let wrapped_arg = match arg {
19515 Expression::Function(inner_f)
19516 if inner_f.name.eq_ignore_ascii_case("MD5")
19517 || inner_f
19518 .name
19519 .eq_ignore_ascii_case("SHA1")
19520 || inner_f
19521 .name
19522 .eq_ignore_ascii_case("SHA256")
19523 || inner_f
19524 .name
19525 .eq_ignore_ascii_case("SHA512") =>
19526 {
19527 // Wrap hash function in TO_HEX for BigQuery
19528 Expression::Function(Box::new(Function::new(
19529 "TO_HEX".to_string(),
19530 vec![arg.clone()],
19531 )))
19532 }
19533 _ => f.args.into_iter().next().unwrap(),
19534 };
19535 Ok(Expression::Function(Box::new(Function::new(
19536 "FROM_HEX".to_string(),
19537 vec![wrapped_arg],
19538 ))))
19539 }
19540 _ => {
19541 let name = match target {
19542 DialectType::Presto | DialectType::Trino => "FROM_HEX",
19543 DialectType::Spark
19544 | DialectType::Databricks
19545 | DialectType::Hive => "UNHEX",
19546 _ => &f.name,
19547 };
19548 Ok(Expression::Function(Box::new(Function::new(
19549 name.to_string(),
19550 f.args,
19551 ))))
19552 }
19553 }
19554 }
19555 // TO_UTF8(x) -> ENCODE(x, 'utf-8') for Spark
19556 "TO_UTF8" if f.args.len() == 1 => match target {
19557 DialectType::Spark | DialectType::Databricks => {
19558 let mut args = f.args;
19559 args.push(Expression::string("utf-8"));
19560 Ok(Expression::Function(Box::new(Function::new(
19561 "ENCODE".to_string(),
19562 args,
19563 ))))
19564 }
19565 _ => Ok(Expression::Function(f)),
19566 },
19567 // FROM_UTF8(x) -> DECODE(x, 'utf-8') for Spark
19568 "FROM_UTF8" if f.args.len() == 1 => match target {
19569 DialectType::Spark | DialectType::Databricks => {
19570 let mut args = f.args;
19571 args.push(Expression::string("utf-8"));
19572 Ok(Expression::Function(Box::new(Function::new(
19573 "DECODE".to_string(),
19574 args,
19575 ))))
19576 }
19577 _ => Ok(Expression::Function(f)),
19578 },
19579 // STARTS_WITH(x, y) / STARTSWITH(x, y) -> target-specific
19580 "STARTS_WITH" | "STARTSWITH" if f.args.len() == 2 => {
19581 let name = match target {
19582 DialectType::Spark | DialectType::Databricks => "STARTSWITH",
19583 DialectType::Presto | DialectType::Trino => "STARTS_WITH",
19584 DialectType::PostgreSQL | DialectType::Redshift => {
19585 "STARTS_WITH"
19586 }
19587 _ => &f.name,
19588 };
19589 Ok(Expression::Function(Box::new(Function::new(
19590 name.to_string(),
19591 f.args,
19592 ))))
19593 }
19594 // APPROX_COUNT_DISTINCT(x) <-> APPROX_DISTINCT(x)
19595 "APPROX_COUNT_DISTINCT" if f.args.len() >= 1 => {
19596 let name = match target {
19597 DialectType::Presto
19598 | DialectType::Trino
19599 | DialectType::Athena => "APPROX_DISTINCT",
19600 _ => "APPROX_COUNT_DISTINCT",
19601 };
19602 Ok(Expression::Function(Box::new(Function::new(
19603 name.to_string(),
19604 f.args,
19605 ))))
19606 }
19607 // JSON_EXTRACT -> GET_JSON_OBJECT for Spark/Hive
19608 "JSON_EXTRACT"
19609 if f.args.len() == 2
19610 && !matches!(source, DialectType::BigQuery)
19611 && matches!(
19612 target,
19613 DialectType::Spark
19614 | DialectType::Databricks
19615 | DialectType::Hive
19616 ) =>
19617 {
19618 Ok(Expression::Function(Box::new(Function::new(
19619 "GET_JSON_OBJECT".to_string(),
19620 f.args,
19621 ))))
19622 }
19623 // JSON_EXTRACT(x, path) -> x -> path for SQLite (arrow syntax)
19624 "JSON_EXTRACT"
19625 if f.args.len() == 2 && matches!(target, DialectType::SQLite) =>
19626 {
19627 let mut args = f.args;
19628 let path = args.remove(1);
19629 let this = args.remove(0);
19630 Ok(Expression::JsonExtract(Box::new(
19631 crate::expressions::JsonExtractFunc {
19632 this,
19633 path,
19634 returning: None,
19635 arrow_syntax: true,
19636 hash_arrow_syntax: false,
19637 wrapper_option: None,
19638 quotes_option: None,
19639 on_scalar_string: false,
19640 on_error: None,
19641 },
19642 )))
19643 }
19644 // JSON_FORMAT(x) -> TO_JSON(x) for Spark, TO_JSON_STRING for BigQuery, CAST(TO_JSON(x) AS TEXT) for DuckDB
19645 "JSON_FORMAT" if f.args.len() == 1 => {
19646 match target {
19647 DialectType::Spark | DialectType::Databricks => {
19648 // Presto JSON_FORMAT(JSON '...') needs Spark's string-unquoting flow:
19649 // REGEXP_EXTRACT(TO_JSON(FROM_JSON('[...]', SCHEMA_OF_JSON('[...]'))), '^.(.*).$', 1)
19650 if matches!(
19651 source,
19652 DialectType::Presto
19653 | DialectType::Trino
19654 | DialectType::Athena
19655 ) {
19656 if let Some(Expression::ParseJson(pj)) = f.args.first()
19657 {
19658 if let Expression::Literal(lit) = &pj.this {
19659 if let Literal::String(s) = lit.as_ref() {
19660 let wrapped =
19661 Expression::Literal(Box::new(
19662 Literal::String(format!("[{}]", s)),
19663 ));
19664 let schema_of_json = Expression::Function(
19665 Box::new(Function::new(
19666 "SCHEMA_OF_JSON".to_string(),
19667 vec![wrapped.clone()],
19668 )),
19669 );
19670 let from_json = Expression::Function(
19671 Box::new(Function::new(
19672 "FROM_JSON".to_string(),
19673 vec![wrapped, schema_of_json],
19674 )),
19675 );
19676 let to_json = Expression::Function(
19677 Box::new(Function::new(
19678 "TO_JSON".to_string(),
19679 vec![from_json],
19680 )),
19681 );
19682 return Ok(Expression::Function(Box::new(
19683 Function::new(
19684 "REGEXP_EXTRACT".to_string(),
19685 vec![
19686 to_json,
19687 Expression::Literal(Box::new(
19688 Literal::String(
19689 "^.(.*).$".to_string(),
19690 ),
19691 )),
19692 Expression::Literal(Box::new(
19693 Literal::Number(
19694 "1".to_string(),
19695 ),
19696 )),
19697 ],
19698 ),
19699 )));
19700 }
19701 }
19702 }
19703 }
19704
19705 // Strip inner CAST(... AS JSON) or TO_JSON() if present
19706 // The CastToJsonForSpark may have already converted CAST(x AS JSON) to TO_JSON(x)
19707 let mut args = f.args;
19708 if let Some(Expression::Cast(ref c)) = args.first() {
19709 if matches!(&c.to, DataType::Json | DataType::JsonB) {
19710 args = vec![c.this.clone()];
19711 }
19712 } else if let Some(Expression::Function(ref inner_f)) =
19713 args.first()
19714 {
19715 if inner_f.name.eq_ignore_ascii_case("TO_JSON")
19716 && inner_f.args.len() == 1
19717 {
19718 // Already TO_JSON(x) from CastToJsonForSpark, just use the inner arg
19719 args = inner_f.args.clone();
19720 }
19721 }
19722 Ok(Expression::Function(Box::new(Function::new(
19723 "TO_JSON".to_string(),
19724 args,
19725 ))))
19726 }
19727 DialectType::BigQuery => Ok(Expression::Function(Box::new(
19728 Function::new("TO_JSON_STRING".to_string(), f.args),
19729 ))),
19730 DialectType::DuckDB => {
19731 // CAST(TO_JSON(x) AS TEXT)
19732 let to_json = Expression::Function(Box::new(
19733 Function::new("TO_JSON".to_string(), f.args),
19734 ));
19735 Ok(Expression::Cast(Box::new(Cast {
19736 this: to_json,
19737 to: DataType::Text,
19738 trailing_comments: Vec::new(),
19739 double_colon_syntax: false,
19740 format: None,
19741 default: None,
19742 inferred_type: None,
19743 })))
19744 }
19745 _ => Ok(Expression::Function(f)),
19746 }
19747 }
19748 // SYSDATE -> CURRENT_TIMESTAMP for non-Oracle/Redshift/Snowflake targets
19749 "SYSDATE" if f.args.is_empty() => {
19750 match target {
19751 DialectType::Oracle | DialectType::Redshift => {
19752 Ok(Expression::Function(f))
19753 }
19754 DialectType::Snowflake => {
19755 // Snowflake uses SYSDATE() with parens
19756 let mut f = *f;
19757 f.no_parens = false;
19758 Ok(Expression::Function(Box::new(f)))
19759 }
19760 DialectType::DuckDB => {
19761 // DuckDB: SYSDATE() -> CURRENT_TIMESTAMP AT TIME ZONE 'UTC'
19762 Ok(Expression::AtTimeZone(Box::new(
19763 crate::expressions::AtTimeZone {
19764 this: Expression::CurrentTimestamp(
19765 crate::expressions::CurrentTimestamp {
19766 precision: None,
19767 sysdate: false,
19768 },
19769 ),
19770 zone: Expression::Literal(Box::new(
19771 Literal::String("UTC".to_string()),
19772 )),
19773 },
19774 )))
19775 }
19776 _ => Ok(Expression::CurrentTimestamp(
19777 crate::expressions::CurrentTimestamp {
19778 precision: None,
19779 sysdate: true,
19780 },
19781 )),
19782 }
19783 }
19784 // LOGICAL_OR(x) -> BOOL_OR(x)
19785 "LOGICAL_OR" if f.args.len() == 1 => {
19786 let name = match target {
19787 DialectType::Spark | DialectType::Databricks => "BOOL_OR",
19788 _ => &f.name,
19789 };
19790 Ok(Expression::Function(Box::new(Function::new(
19791 name.to_string(),
19792 f.args,
19793 ))))
19794 }
19795 // LOGICAL_AND(x) -> BOOL_AND(x)
19796 "LOGICAL_AND" if f.args.len() == 1 => {
19797 let name = match target {
19798 DialectType::Spark | DialectType::Databricks => "BOOL_AND",
19799 _ => &f.name,
19800 };
19801 Ok(Expression::Function(Box::new(Function::new(
19802 name.to_string(),
19803 f.args,
19804 ))))
19805 }
19806 // MONTHS_ADD(d, n) -> ADD_MONTHS(d, n) for Oracle
19807 "MONTHS_ADD" if f.args.len() == 2 => match target {
19808 DialectType::Oracle => Ok(Expression::Function(Box::new(
19809 Function::new("ADD_MONTHS".to_string(), f.args),
19810 ))),
19811 _ => Ok(Expression::Function(f)),
19812 },
19813 // ARRAY_JOIN(arr, sep[, null_replacement]) -> target-specific
19814 "ARRAY_JOIN" if f.args.len() >= 2 => {
19815 match target {
19816 DialectType::Spark | DialectType::Databricks => {
19817 // Keep as ARRAY_JOIN for Spark (it supports null_replacement)
19818 Ok(Expression::Function(f))
19819 }
19820 DialectType::Hive => {
19821 // ARRAY_JOIN(arr, sep[, null_rep]) -> CONCAT_WS(sep, arr) (drop null_replacement)
19822 let mut args = f.args;
19823 let arr = args.remove(0);
19824 let sep = args.remove(0);
19825 // Drop any remaining args (null_replacement)
19826 Ok(Expression::Function(Box::new(Function::new(
19827 "CONCAT_WS".to_string(),
19828 vec![sep, arr],
19829 ))))
19830 }
19831 DialectType::Presto | DialectType::Trino => {
19832 Ok(Expression::Function(f))
19833 }
19834 _ => Ok(Expression::Function(f)),
19835 }
19836 }
19837 // LOCATE(substr, str, pos) 3-arg -> target-specific
19838 // For Presto/DuckDB: STRPOS doesn't support 3-arg, need complex expansion
19839 "LOCATE"
19840 if f.args.len() == 3
19841 && matches!(
19842 target,
19843 DialectType::Presto
19844 | DialectType::Trino
19845 | DialectType::Athena
19846 | DialectType::DuckDB
19847 ) =>
19848 {
19849 let mut args = f.args;
19850 let substr = args.remove(0);
19851 let string = args.remove(0);
19852 let pos = args.remove(0);
19853 // STRPOS(SUBSTRING(string, pos), substr)
19854 let substring_call = Expression::Function(Box::new(Function::new(
19855 "SUBSTRING".to_string(),
19856 vec![string.clone(), pos.clone()],
19857 )));
19858 let strpos_call = Expression::Function(Box::new(Function::new(
19859 "STRPOS".to_string(),
19860 vec![substring_call, substr.clone()],
19861 )));
19862 // STRPOS(...) + pos - 1
19863 let pos_adjusted =
19864 Expression::Sub(Box::new(crate::expressions::BinaryOp::new(
19865 Expression::Add(Box::new(
19866 crate::expressions::BinaryOp::new(
19867 strpos_call.clone(),
19868 pos.clone(),
19869 ),
19870 )),
19871 Expression::number(1),
19872 )));
19873 // STRPOS(...) = 0
19874 let is_zero =
19875 Expression::Eq(Box::new(crate::expressions::BinaryOp::new(
19876 strpos_call.clone(),
19877 Expression::number(0),
19878 )));
19879
19880 match target {
19881 DialectType::Presto
19882 | DialectType::Trino
19883 | DialectType::Athena => {
19884 // IF(STRPOS(...) = 0, 0, STRPOS(...) + pos - 1)
19885 Ok(Expression::Function(Box::new(Function::new(
19886 "IF".to_string(),
19887 vec![is_zero, Expression::number(0), pos_adjusted],
19888 ))))
19889 }
19890 DialectType::DuckDB => {
19891 // CASE WHEN STRPOS(...) = 0 THEN 0 ELSE STRPOS(...) + pos - 1 END
19892 Ok(Expression::Case(Box::new(crate::expressions::Case {
19893 operand: None,
19894 whens: vec![(is_zero, Expression::number(0))],
19895 else_: Some(pos_adjusted),
19896 comments: Vec::new(),
19897 inferred_type: None,
19898 })))
19899 }
19900 _ => Ok(Expression::Function(Box::new(Function::new(
19901 "LOCATE".to_string(),
19902 vec![substr, string, pos],
19903 )))),
19904 }
19905 }
19906 // STRPOS(haystack, needle, occurrence) 3-arg -> INSTR(haystack, needle, 1, occurrence)
19907 "STRPOS"
19908 if f.args.len() == 3
19909 && matches!(
19910 target,
19911 DialectType::BigQuery
19912 | DialectType::Oracle
19913 | DialectType::Teradata
19914 ) =>
19915 {
19916 let mut args = f.args;
19917 let haystack = args.remove(0);
19918 let needle = args.remove(0);
19919 let occurrence = args.remove(0);
19920 Ok(Expression::Function(Box::new(Function::new(
19921 "INSTR".to_string(),
19922 vec![haystack, needle, Expression::number(1), occurrence],
19923 ))))
19924 }
19925 // SCHEMA_NAME(id) -> target-specific
19926 "SCHEMA_NAME" if f.args.len() <= 1 => match target {
19927 DialectType::MySQL | DialectType::SingleStore => {
19928 Ok(Expression::Function(Box::new(Function::new(
19929 "SCHEMA".to_string(),
19930 vec![],
19931 ))))
19932 }
19933 DialectType::PostgreSQL => Ok(Expression::CurrentSchema(Box::new(
19934 crate::expressions::CurrentSchema { this: None },
19935 ))),
19936 DialectType::SQLite => Ok(Expression::string("main")),
19937 _ => Ok(Expression::Function(f)),
19938 },
19939 // STRTOL(str, base) -> FROM_BASE(str, base) for Trino/Presto
19940 "STRTOL" if f.args.len() == 2 => match target {
19941 DialectType::Presto | DialectType::Trino => {
19942 Ok(Expression::Function(Box::new(Function::new(
19943 "FROM_BASE".to_string(),
19944 f.args,
19945 ))))
19946 }
19947 _ => Ok(Expression::Function(f)),
19948 },
19949 // EDITDIST3(a, b) -> LEVENSHTEIN(a, b) for Spark
19950 "EDITDIST3" if f.args.len() == 2 => match target {
19951 DialectType::Spark | DialectType::Databricks => {
19952 Ok(Expression::Function(Box::new(Function::new(
19953 "LEVENSHTEIN".to_string(),
19954 f.args,
19955 ))))
19956 }
19957 _ => Ok(Expression::Function(f)),
19958 },
19959 // FORMAT(num, decimals) from MySQL -> DuckDB FORMAT('{:,.Xf}', num)
19960 "FORMAT"
19961 if f.args.len() == 2
19962 && matches!(
19963 source,
19964 DialectType::MySQL | DialectType::SingleStore
19965 )
19966 && matches!(target, DialectType::DuckDB) =>
19967 {
19968 let mut args = f.args;
19969 let num_expr = args.remove(0);
19970 let decimals_expr = args.remove(0);
19971 // Extract decimal count
19972 let dec_count = match &decimals_expr {
19973 Expression::Literal(lit)
19974 if matches!(lit.as_ref(), Literal::Number(_)) =>
19975 {
19976 let Literal::Number(n) = lit.as_ref() else {
19977 unreachable!()
19978 };
19979 n.clone()
19980 }
19981 _ => "0".to_string(),
19982 };
19983 let fmt_str = format!("{{:,.{}f}}", dec_count);
19984 Ok(Expression::Function(Box::new(Function::new(
19985 "FORMAT".to_string(),
19986 vec![Expression::string(&fmt_str), num_expr],
19987 ))))
19988 }
19989 // FORMAT(x, fmt) from TSQL -> DATE_FORMAT for Spark, or expand short codes
19990 "FORMAT"
19991 if f.args.len() == 2
19992 && matches!(
19993 source,
19994 DialectType::TSQL | DialectType::Fabric
19995 ) =>
19996 {
19997 let val_expr = f.args[0].clone();
19998 let fmt_expr = f.args[1].clone();
19999 // Expand unambiguous .NET single-char date format shortcodes to full patterns.
20000 // Only expand shortcodes that are NOT also valid numeric format specifiers.
20001 // Ambiguous: d, D, f, F, g, G (used for both dates and numbers)
20002 // Unambiguous date: m/M (Month day), t/T (Time), y/Y (Year month)
20003 let (expanded_fmt, is_shortcode) = match &fmt_expr {
20004 Expression::Literal(lit)
20005 if matches!(
20006 lit.as_ref(),
20007 crate::expressions::Literal::String(_)
20008 ) =>
20009 {
20010 let crate::expressions::Literal::String(s) = lit.as_ref()
20011 else {
20012 unreachable!()
20013 };
20014 match s.as_str() {
20015 "m" | "M" => (Expression::string("MMMM d"), true),
20016 "t" => (Expression::string("h:mm tt"), true),
20017 "T" => (Expression::string("h:mm:ss tt"), true),
20018 "y" | "Y" => (Expression::string("MMMM yyyy"), true),
20019 _ => (fmt_expr.clone(), false),
20020 }
20021 }
20022 _ => (fmt_expr.clone(), false),
20023 };
20024 // Check if the format looks like a date format
20025 let is_date_format = is_shortcode
20026 || match &expanded_fmt {
20027 Expression::Literal(lit)
20028 if matches!(
20029 lit.as_ref(),
20030 crate::expressions::Literal::String(_)
20031 ) =>
20032 {
20033 let crate::expressions::Literal::String(s) =
20034 lit.as_ref()
20035 else {
20036 unreachable!()
20037 };
20038 // Date formats typically contain yyyy, MM, dd, MMMM, HH, etc.
20039 s.contains("yyyy")
20040 || s.contains("YYYY")
20041 || s.contains("MM")
20042 || s.contains("dd")
20043 || s.contains("MMMM")
20044 || s.contains("HH")
20045 || s.contains("hh")
20046 || s.contains("ss")
20047 }
20048 _ => false,
20049 };
20050 match target {
20051 DialectType::Spark | DialectType::Databricks => {
20052 let func_name = if is_date_format {
20053 "DATE_FORMAT"
20054 } else {
20055 "FORMAT_NUMBER"
20056 };
20057 Ok(Expression::Function(Box::new(Function::new(
20058 func_name.to_string(),
20059 vec![val_expr, expanded_fmt],
20060 ))))
20061 }
20062 _ => {
20063 // For TSQL and other targets, expand shortcodes but keep FORMAT
20064 if is_shortcode {
20065 Ok(Expression::Function(Box::new(Function::new(
20066 "FORMAT".to_string(),
20067 vec![val_expr, expanded_fmt],
20068 ))))
20069 } else {
20070 Ok(Expression::Function(f))
20071 }
20072 }
20073 }
20074 }
20075 // FORMAT('%s', x) from Trino/Presto -> target-specific
20076 "FORMAT"
20077 if f.args.len() >= 2
20078 && matches!(
20079 source,
20080 DialectType::Trino
20081 | DialectType::Presto
20082 | DialectType::Athena
20083 ) =>
20084 {
20085 let fmt_expr = f.args[0].clone();
20086 let value_args: Vec<Expression> = f.args[1..].to_vec();
20087 match target {
20088 // DuckDB: replace %s with {} in format string
20089 DialectType::DuckDB => {
20090 let new_fmt = match &fmt_expr {
20091 Expression::Literal(lit)
20092 if matches!(lit.as_ref(), Literal::String(_)) =>
20093 {
20094 let Literal::String(s) = lit.as_ref() else {
20095 unreachable!()
20096 };
20097 Expression::Literal(Box::new(Literal::String(
20098 s.replace("%s", "{}"),
20099 )))
20100 }
20101 _ => fmt_expr,
20102 };
20103 let mut args = vec![new_fmt];
20104 args.extend(value_args);
20105 Ok(Expression::Function(Box::new(Function::new(
20106 "FORMAT".to_string(),
20107 args,
20108 ))))
20109 }
20110 // Snowflake: FORMAT('%s', x) -> TO_CHAR(x) when just %s
20111 DialectType::Snowflake => match &fmt_expr {
20112 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(s) if s == "%s" && value_args.len() == 1) =>
20113 {
20114 let Literal::String(_) = lit.as_ref() else {
20115 unreachable!()
20116 };
20117 Ok(Expression::Function(Box::new(Function::new(
20118 "TO_CHAR".to_string(),
20119 value_args,
20120 ))))
20121 }
20122 _ => Ok(Expression::Function(f)),
20123 },
20124 // Default: keep FORMAT as-is
20125 _ => Ok(Expression::Function(f)),
20126 }
20127 }
20128 // LIST_CONTAINS / LIST_HAS / ARRAY_CONTAINS -> target-specific
20129 "LIST_CONTAINS" | "LIST_HAS" | "ARRAY_CONTAINS"
20130 if f.args.len() == 2 =>
20131 {
20132 // When coming from Snowflake source: ARRAY_CONTAINS(value, array)
20133 // args[0]=value, args[1]=array. For DuckDB target, swap and add NULL-aware CASE.
20134 if matches!(target, DialectType::DuckDB)
20135 && matches!(source, DialectType::Snowflake)
20136 && f.name.eq_ignore_ascii_case("ARRAY_CONTAINS")
20137 {
20138 let value = f.args[0].clone();
20139 let array = f.args[1].clone();
20140
20141 // value IS NULL
20142 let value_is_null =
20143 Expression::IsNull(Box::new(crate::expressions::IsNull {
20144 this: value.clone(),
20145 not: false,
20146 postfix_form: false,
20147 }));
20148
20149 // ARRAY_LENGTH(array)
20150 let array_length =
20151 Expression::Function(Box::new(Function::new(
20152 "ARRAY_LENGTH".to_string(),
20153 vec![array.clone()],
20154 )));
20155 // LIST_COUNT(array)
20156 let list_count = Expression::Function(Box::new(Function::new(
20157 "LIST_COUNT".to_string(),
20158 vec![array.clone()],
20159 )));
20160 // ARRAY_LENGTH(array) <> LIST_COUNT(array)
20161 let neq =
20162 Expression::Neq(Box::new(crate::expressions::BinaryOp {
20163 left: array_length,
20164 right: list_count,
20165 left_comments: vec![],
20166 operator_comments: vec![],
20167 trailing_comments: vec![],
20168 inferred_type: None,
20169 }));
20170 // NULLIF(ARRAY_LENGTH(array) <> LIST_COUNT(array), FALSE)
20171 let nullif =
20172 Expression::Nullif(Box::new(crate::expressions::Nullif {
20173 this: Box::new(neq),
20174 expression: Box::new(Expression::Boolean(
20175 crate::expressions::BooleanLiteral { value: false },
20176 )),
20177 }));
20178
20179 // ARRAY_CONTAINS(array, value) - DuckDB syntax: array first, value second
20180 let array_contains =
20181 Expression::Function(Box::new(Function::new(
20182 "ARRAY_CONTAINS".to_string(),
20183 vec![array, value],
20184 )));
20185
20186 // CASE WHEN value IS NULL THEN NULLIF(...) ELSE ARRAY_CONTAINS(array, value) END
20187 return Ok(Expression::Case(Box::new(Case {
20188 operand: None,
20189 whens: vec![(value_is_null, nullif)],
20190 else_: Some(array_contains),
20191 comments: Vec::new(),
20192 inferred_type: None,
20193 })));
20194 }
20195 match target {
20196 DialectType::PostgreSQL | DialectType::Redshift => {
20197 // CASE WHEN needle IS NULL THEN NULL ELSE COALESCE(needle = ANY(arr), FALSE) END
20198 let arr = f.args[0].clone();
20199 let needle = f.args[1].clone();
20200 // Convert [] to ARRAY[] for PostgreSQL
20201 let pg_arr = match arr {
20202 Expression::Array(a) => Expression::ArrayFunc(
20203 Box::new(crate::expressions::ArrayConstructor {
20204 expressions: a.expressions,
20205 bracket_notation: false,
20206 use_list_keyword: false,
20207 }),
20208 ),
20209 _ => arr,
20210 };
20211 // needle = ANY(arr) using the Any quantified expression
20212 let any_expr = Expression::Any(Box::new(
20213 crate::expressions::QuantifiedExpr {
20214 this: needle.clone(),
20215 subquery: pg_arr,
20216 op: Some(crate::expressions::QuantifiedOp::Eq),
20217 },
20218 ));
20219 let coalesce = Expression::Coalesce(Box::new(
20220 crate::expressions::VarArgFunc {
20221 expressions: vec![
20222 any_expr,
20223 Expression::Boolean(
20224 crate::expressions::BooleanLiteral {
20225 value: false,
20226 },
20227 ),
20228 ],
20229 original_name: None,
20230 inferred_type: None,
20231 },
20232 ));
20233 let is_null_check = Expression::IsNull(Box::new(
20234 crate::expressions::IsNull {
20235 this: needle,
20236 not: false,
20237 postfix_form: false,
20238 },
20239 ));
20240 Ok(Expression::Case(Box::new(Case {
20241 operand: None,
20242 whens: vec![(
20243 is_null_check,
20244 Expression::Null(crate::expressions::Null),
20245 )],
20246 else_: Some(coalesce),
20247 comments: Vec::new(),
20248 inferred_type: None,
20249 })))
20250 }
20251 _ => Ok(Expression::Function(Box::new(Function::new(
20252 "ARRAY_CONTAINS".to_string(),
20253 f.args,
20254 )))),
20255 }
20256 }
20257 // LIST_HAS_ANY / ARRAY_HAS_ANY -> target-specific overlap operator
20258 "LIST_HAS_ANY" | "ARRAY_HAS_ANY" if f.args.len() == 2 => {
20259 match target {
20260 DialectType::PostgreSQL | DialectType::Redshift => {
20261 // arr1 && arr2 with ARRAY[] syntax
20262 let mut args = f.args;
20263 let arr1 = args.remove(0);
20264 let arr2 = args.remove(0);
20265 let pg_arr1 = match arr1 {
20266 Expression::Array(a) => Expression::ArrayFunc(
20267 Box::new(crate::expressions::ArrayConstructor {
20268 expressions: a.expressions,
20269 bracket_notation: false,
20270 use_list_keyword: false,
20271 }),
20272 ),
20273 _ => arr1,
20274 };
20275 let pg_arr2 = match arr2 {
20276 Expression::Array(a) => Expression::ArrayFunc(
20277 Box::new(crate::expressions::ArrayConstructor {
20278 expressions: a.expressions,
20279 bracket_notation: false,
20280 use_list_keyword: false,
20281 }),
20282 ),
20283 _ => arr2,
20284 };
20285 Ok(Expression::ArrayOverlaps(Box::new(BinaryOp::new(
20286 pg_arr1, pg_arr2,
20287 ))))
20288 }
20289 DialectType::DuckDB => {
20290 // DuckDB: arr1 && arr2 (native support)
20291 let mut args = f.args;
20292 let arr1 = args.remove(0);
20293 let arr2 = args.remove(0);
20294 Ok(Expression::ArrayOverlaps(Box::new(BinaryOp::new(
20295 arr1, arr2,
20296 ))))
20297 }
20298 _ => Ok(Expression::Function(Box::new(Function::new(
20299 "LIST_HAS_ANY".to_string(),
20300 f.args,
20301 )))),
20302 }
20303 }
20304 // APPROX_QUANTILE(x, q) -> target-specific
20305 "APPROX_QUANTILE" if f.args.len() == 2 => match target {
20306 DialectType::Snowflake => Ok(Expression::Function(Box::new(
20307 Function::new("APPROX_PERCENTILE".to_string(), f.args),
20308 ))),
20309 DialectType::DuckDB => Ok(Expression::Function(f)),
20310 _ => Ok(Expression::Function(f)),
20311 },
20312 // MAKE_DATE(y, m, d) -> DATE(y, m, d) for BigQuery
20313 "MAKE_DATE" if f.args.len() == 3 => match target {
20314 DialectType::BigQuery => Ok(Expression::Function(Box::new(
20315 Function::new("DATE".to_string(), f.args),
20316 ))),
20317 _ => Ok(Expression::Function(f)),
20318 },
20319 // RANGE(start, end[, step]) -> target-specific
20320 "RANGE"
20321 if f.args.len() >= 2 && !matches!(target, DialectType::DuckDB) =>
20322 {
20323 let start = f.args[0].clone();
20324 let end = f.args[1].clone();
20325 let step = f.args.get(2).cloned();
20326 match target {
20327 // Snowflake ARRAY_GENERATE_RANGE uses exclusive end (same as DuckDB RANGE),
20328 // so just rename without adjusting the end argument.
20329 DialectType::Snowflake => {
20330 let mut args = vec![start, end];
20331 if let Some(s) = step {
20332 args.push(s);
20333 }
20334 Ok(Expression::Function(Box::new(Function::new(
20335 "ARRAY_GENERATE_RANGE".to_string(),
20336 args,
20337 ))))
20338 }
20339 DialectType::Spark | DialectType::Databricks => {
20340 // RANGE(start, end) -> SEQUENCE(start, end-1)
20341 // RANGE(start, end, step) -> SEQUENCE(start, end-step, step) when step constant
20342 // RANGE(start, start) -> ARRAY() (empty)
20343 // RANGE(start, end, 0) -> ARRAY() (empty)
20344 // When end is variable: IF((end - 1) <= start, ARRAY(), SEQUENCE(start, (end - 1)))
20345
20346 // Check for constant args
20347 fn extract_i64(e: &Expression) -> Option<i64> {
20348 match e {
20349 Expression::Literal(lit)
20350 if matches!(
20351 lit.as_ref(),
20352 Literal::Number(_)
20353 ) =>
20354 {
20355 let Literal::Number(n) = lit.as_ref() else {
20356 unreachable!()
20357 };
20358 n.parse::<i64>().ok()
20359 }
20360 Expression::Neg(u) => {
20361 if let Expression::Literal(lit) = &u.this {
20362 if let Literal::Number(n) = lit.as_ref() {
20363 n.parse::<i64>().ok().map(|v| -v)
20364 } else {
20365 None
20366 }
20367 } else {
20368 None
20369 }
20370 }
20371 _ => None,
20372 }
20373 }
20374 let start_val = extract_i64(&start);
20375 let end_val = extract_i64(&end);
20376 let step_val = step.as_ref().and_then(|s| extract_i64(s));
20377
20378 // Check for RANGE(x, x) or RANGE(x, y, 0) -> empty array
20379 if step_val == Some(0) {
20380 return Ok(Expression::Function(Box::new(
20381 Function::new("ARRAY".to_string(), vec![]),
20382 )));
20383 }
20384 if let (Some(s), Some(e_val)) = (start_val, end_val) {
20385 if s == e_val {
20386 return Ok(Expression::Function(Box::new(
20387 Function::new("ARRAY".to_string(), vec![]),
20388 )));
20389 }
20390 }
20391
20392 if let (Some(_s_val), Some(e_val)) = (start_val, end_val) {
20393 // All constants - compute new end = end - step (if step provided) or end - 1
20394 match step_val {
20395 Some(st) if st < 0 => {
20396 // Negative step: SEQUENCE(start, end - step, step)
20397 let new_end = e_val - st; // end - step (= end + |step|)
20398 let mut args =
20399 vec![start, Expression::number(new_end)];
20400 if let Some(s) = step {
20401 args.push(s);
20402 }
20403 Ok(Expression::Function(Box::new(
20404 Function::new("SEQUENCE".to_string(), args),
20405 )))
20406 }
20407 Some(st) => {
20408 let new_end = e_val - st;
20409 let mut args =
20410 vec![start, Expression::number(new_end)];
20411 if let Some(s) = step {
20412 args.push(s);
20413 }
20414 Ok(Expression::Function(Box::new(
20415 Function::new("SEQUENCE".to_string(), args),
20416 )))
20417 }
20418 None => {
20419 // No step: SEQUENCE(start, end - 1)
20420 let new_end = e_val - 1;
20421 Ok(Expression::Function(Box::new(
20422 Function::new(
20423 "SEQUENCE".to_string(),
20424 vec![
20425 start,
20426 Expression::number(new_end),
20427 ],
20428 ),
20429 )))
20430 }
20431 }
20432 } else {
20433 // Variable end: IF((end - 1) < start, ARRAY(), SEQUENCE(start, (end - 1)))
20434 let end_m1 = Expression::Sub(Box::new(BinaryOp::new(
20435 end.clone(),
20436 Expression::number(1),
20437 )));
20438 let cond = Expression::Lt(Box::new(BinaryOp::new(
20439 Expression::Paren(Box::new(Paren {
20440 this: end_m1.clone(),
20441 trailing_comments: Vec::new(),
20442 })),
20443 start.clone(),
20444 )));
20445 let empty = Expression::Function(Box::new(
20446 Function::new("ARRAY".to_string(), vec![]),
20447 ));
20448 let mut seq_args = vec![
20449 start,
20450 Expression::Paren(Box::new(Paren {
20451 this: end_m1,
20452 trailing_comments: Vec::new(),
20453 })),
20454 ];
20455 if let Some(s) = step {
20456 seq_args.push(s);
20457 }
20458 let seq = Expression::Function(Box::new(
20459 Function::new("SEQUENCE".to_string(), seq_args),
20460 ));
20461 Ok(Expression::IfFunc(Box::new(
20462 crate::expressions::IfFunc {
20463 condition: cond,
20464 true_value: empty,
20465 false_value: Some(seq),
20466 original_name: None,
20467 inferred_type: None,
20468 },
20469 )))
20470 }
20471 }
20472 DialectType::SQLite => {
20473 // RANGE(start, end) -> GENERATE_SERIES(start, end)
20474 // The subquery wrapping is handled at the Alias level
20475 let mut args = vec![start, end];
20476 if let Some(s) = step {
20477 args.push(s);
20478 }
20479 Ok(Expression::Function(Box::new(Function::new(
20480 "GENERATE_SERIES".to_string(),
20481 args,
20482 ))))
20483 }
20484 _ => Ok(Expression::Function(f)),
20485 }
20486 }
20487 // ARRAY_REVERSE_SORT -> target-specific
20488 // (handled above as well, but also need DuckDB self-normalization)
20489 // MAP_FROM_ARRAYS(keys, values) -> target-specific map construction
20490 "MAP_FROM_ARRAYS" if f.args.len() == 2 => match target {
20491 DialectType::Snowflake => Ok(Expression::Function(Box::new(
20492 Function::new("OBJECT_CONSTRUCT".to_string(), f.args),
20493 ))),
20494 DialectType::Spark | DialectType::Databricks => {
20495 Ok(Expression::Function(Box::new(Function::new(
20496 "MAP_FROM_ARRAYS".to_string(),
20497 f.args,
20498 ))))
20499 }
20500 _ => Ok(Expression::Function(Box::new(Function::new(
20501 "MAP".to_string(),
20502 f.args,
20503 )))),
20504 },
20505 // VARIANCE(x) -> varSamp(x) for ClickHouse
20506 "VARIANCE" if f.args.len() == 1 => match target {
20507 DialectType::ClickHouse => Ok(Expression::Function(Box::new(
20508 Function::new("varSamp".to_string(), f.args),
20509 ))),
20510 _ => Ok(Expression::Function(f)),
20511 },
20512 // STDDEV(x) -> stddevSamp(x) for ClickHouse
20513 "STDDEV" if f.args.len() == 1 => match target {
20514 DialectType::ClickHouse => Ok(Expression::Function(Box::new(
20515 Function::new("stddevSamp".to_string(), f.args),
20516 ))),
20517 _ => Ok(Expression::Function(f)),
20518 },
20519 // ISINF(x) -> IS_INF(x) for BigQuery
20520 "ISINF" if f.args.len() == 1 => match target {
20521 DialectType::BigQuery => Ok(Expression::Function(Box::new(
20522 Function::new("IS_INF".to_string(), f.args),
20523 ))),
20524 _ => Ok(Expression::Function(f)),
20525 },
20526 // CONTAINS(arr, x) -> ARRAY_CONTAINS(arr, x) for Spark/Hive
20527 "CONTAINS" if f.args.len() == 2 => match target {
20528 DialectType::Spark
20529 | DialectType::Databricks
20530 | DialectType::Hive => Ok(Expression::Function(Box::new(
20531 Function::new("ARRAY_CONTAINS".to_string(), f.args),
20532 ))),
20533 _ => Ok(Expression::Function(f)),
20534 },
20535 // ARRAY_CONTAINS(arr, x) -> CONTAINS(arr, x) for Presto
20536 "ARRAY_CONTAINS" if f.args.len() == 2 => match target {
20537 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
20538 Ok(Expression::Function(Box::new(Function::new(
20539 "CONTAINS".to_string(),
20540 f.args,
20541 ))))
20542 }
20543 DialectType::DuckDB => Ok(Expression::Function(Box::new(
20544 Function::new("ARRAY_CONTAINS".to_string(), f.args),
20545 ))),
20546 _ => Ok(Expression::Function(f)),
20547 },
20548 // TO_UNIXTIME(x) -> UNIX_TIMESTAMP(x) for Hive/Spark
20549 "TO_UNIXTIME" if f.args.len() == 1 => match target {
20550 DialectType::Hive
20551 | DialectType::Spark
20552 | DialectType::Databricks => Ok(Expression::Function(Box::new(
20553 Function::new("UNIX_TIMESTAMP".to_string(), f.args),
20554 ))),
20555 _ => Ok(Expression::Function(f)),
20556 },
20557 // FROM_UNIXTIME(x) -> target-specific
20558 "FROM_UNIXTIME" if f.args.len() == 1 => {
20559 match target {
20560 DialectType::Hive
20561 | DialectType::Spark
20562 | DialectType::Databricks
20563 | DialectType::Presto
20564 | DialectType::Trino => Ok(Expression::Function(f)),
20565 DialectType::DuckDB => {
20566 // DuckDB: TO_TIMESTAMP(x)
20567 let arg = f.args.into_iter().next().unwrap();
20568 Ok(Expression::Function(Box::new(Function::new(
20569 "TO_TIMESTAMP".to_string(),
20570 vec![arg],
20571 ))))
20572 }
20573 DialectType::PostgreSQL => {
20574 // PG: TO_TIMESTAMP(col)
20575 let arg = f.args.into_iter().next().unwrap();
20576 Ok(Expression::Function(Box::new(Function::new(
20577 "TO_TIMESTAMP".to_string(),
20578 vec![arg],
20579 ))))
20580 }
20581 DialectType::Redshift => {
20582 // Redshift: (TIMESTAMP 'epoch' + col * INTERVAL '1 SECOND')
20583 let arg = f.args.into_iter().next().unwrap();
20584 let epoch_ts = Expression::Literal(Box::new(
20585 Literal::Timestamp("epoch".to_string()),
20586 ));
20587 let interval = Expression::Interval(Box::new(
20588 crate::expressions::Interval {
20589 this: Some(Expression::string("1 SECOND")),
20590 unit: None,
20591 },
20592 ));
20593 let mul =
20594 Expression::Mul(Box::new(BinaryOp::new(arg, interval)));
20595 let add =
20596 Expression::Add(Box::new(BinaryOp::new(epoch_ts, mul)));
20597 Ok(Expression::Paren(Box::new(crate::expressions::Paren {
20598 this: add,
20599 trailing_comments: Vec::new(),
20600 })))
20601 }
20602 _ => Ok(Expression::Function(f)),
20603 }
20604 }
20605 // FROM_UNIXTIME(x, fmt) with 2 args from Hive/Spark -> target-specific
20606 "FROM_UNIXTIME"
20607 if f.args.len() == 2
20608 && matches!(
20609 source,
20610 DialectType::Hive
20611 | DialectType::Spark
20612 | DialectType::Databricks
20613 ) =>
20614 {
20615 let mut args = f.args;
20616 let unix_ts = args.remove(0);
20617 let fmt_expr = args.remove(0);
20618 match target {
20619 DialectType::DuckDB => {
20620 // DuckDB: STRFTIME(TO_TIMESTAMP(x), c_fmt)
20621 let to_ts = Expression::Function(Box::new(Function::new(
20622 "TO_TIMESTAMP".to_string(),
20623 vec![unix_ts],
20624 )));
20625 if let Expression::Literal(lit) = &fmt_expr {
20626 if let crate::expressions::Literal::String(s) =
20627 lit.as_ref()
20628 {
20629 let c_fmt = Self::hive_format_to_c_format(s);
20630 Ok(Expression::Function(Box::new(Function::new(
20631 "STRFTIME".to_string(),
20632 vec![to_ts, Expression::string(&c_fmt)],
20633 ))))
20634 } else {
20635 Ok(Expression::Function(Box::new(Function::new(
20636 "STRFTIME".to_string(),
20637 vec![to_ts, fmt_expr],
20638 ))))
20639 }
20640 } else {
20641 Ok(Expression::Function(Box::new(Function::new(
20642 "STRFTIME".to_string(),
20643 vec![to_ts, fmt_expr],
20644 ))))
20645 }
20646 }
20647 DialectType::Presto
20648 | DialectType::Trino
20649 | DialectType::Athena => {
20650 // Presto: DATE_FORMAT(FROM_UNIXTIME(x), presto_fmt)
20651 let from_unix =
20652 Expression::Function(Box::new(Function::new(
20653 "FROM_UNIXTIME".to_string(),
20654 vec![unix_ts],
20655 )));
20656 if let Expression::Literal(lit) = &fmt_expr {
20657 if let crate::expressions::Literal::String(s) =
20658 lit.as_ref()
20659 {
20660 let p_fmt = Self::hive_format_to_presto_format(s);
20661 Ok(Expression::Function(Box::new(Function::new(
20662 "DATE_FORMAT".to_string(),
20663 vec![from_unix, Expression::string(&p_fmt)],
20664 ))))
20665 } else {
20666 Ok(Expression::Function(Box::new(Function::new(
20667 "DATE_FORMAT".to_string(),
20668 vec![from_unix, fmt_expr],
20669 ))))
20670 }
20671 } else {
20672 Ok(Expression::Function(Box::new(Function::new(
20673 "DATE_FORMAT".to_string(),
20674 vec![from_unix, fmt_expr],
20675 ))))
20676 }
20677 }
20678 _ => {
20679 // Keep as FROM_UNIXTIME(x, fmt) for other targets
20680 Ok(Expression::Function(Box::new(Function::new(
20681 "FROM_UNIXTIME".to_string(),
20682 vec![unix_ts, fmt_expr],
20683 ))))
20684 }
20685 }
20686 }
20687 // DATEPART(unit, expr) -> EXTRACT(unit FROM expr) for Spark
20688 "DATEPART" | "DATE_PART" if f.args.len() == 2 => {
20689 let unit_str = Self::get_unit_str_static(&f.args[0]);
20690 // Get the raw unit text preserving original case
20691 let raw_unit = match &f.args[0] {
20692 Expression::Identifier(id) => id.name.clone(),
20693 Expression::Var(v) => v.this.clone(),
20694 Expression::Literal(lit)
20695 if matches!(
20696 lit.as_ref(),
20697 crate::expressions::Literal::String(_)
20698 ) =>
20699 {
20700 let crate::expressions::Literal::String(s) = lit.as_ref()
20701 else {
20702 unreachable!()
20703 };
20704 s.clone()
20705 }
20706 Expression::Column(col) => col.name.name.clone(),
20707 _ => unit_str.clone(),
20708 };
20709 match target {
20710 DialectType::TSQL | DialectType::Fabric => {
20711 // Preserve original case of unit for TSQL
20712 let unit_name = match unit_str.as_str() {
20713 "YY" | "YYYY" => "YEAR".to_string(),
20714 "QQ" | "Q" => "QUARTER".to_string(),
20715 "MM" | "M" => "MONTH".to_string(),
20716 "WK" | "WW" => "WEEK".to_string(),
20717 "DD" | "D" | "DY" => "DAY".to_string(),
20718 "HH" => "HOUR".to_string(),
20719 "MI" | "N" => "MINUTE".to_string(),
20720 "SS" | "S" => "SECOND".to_string(),
20721 _ => raw_unit.clone(), // preserve original case
20722 };
20723 let mut args = f.args;
20724 args[0] =
20725 Expression::Identifier(Identifier::new(&unit_name));
20726 Ok(Expression::Function(Box::new(Function::new(
20727 "DATEPART".to_string(),
20728 args,
20729 ))))
20730 }
20731 DialectType::Spark | DialectType::Databricks => {
20732 // DATEPART(unit, expr) -> EXTRACT(unit FROM expr)
20733 // Preserve original case for non-abbreviation units
20734 let unit = match unit_str.as_str() {
20735 "YY" | "YYYY" => "YEAR".to_string(),
20736 "QQ" | "Q" => "QUARTER".to_string(),
20737 "MM" | "M" => "MONTH".to_string(),
20738 "WK" | "WW" => "WEEK".to_string(),
20739 "DD" | "D" | "DY" => "DAY".to_string(),
20740 "HH" => "HOUR".to_string(),
20741 "MI" | "N" => "MINUTE".to_string(),
20742 "SS" | "S" => "SECOND".to_string(),
20743 _ => raw_unit, // preserve original case
20744 };
20745 Ok(Expression::Extract(Box::new(
20746 crate::expressions::ExtractFunc {
20747 this: f.args[1].clone(),
20748 field: crate::expressions::DateTimeField::Custom(
20749 unit,
20750 ),
20751 },
20752 )))
20753 }
20754 _ => Ok(Expression::Function(Box::new(Function::new(
20755 "DATE_PART".to_string(),
20756 f.args,
20757 )))),
20758 }
20759 }
20760 // DATENAME(mm, date) -> FORMAT(CAST(date AS DATETIME2), 'MMMM') for TSQL
20761 // DATENAME(dw, date) -> FORMAT(CAST(date AS DATETIME2), 'dddd') for TSQL
20762 // DATENAME(mm, date) -> DATE_FORMAT(CAST(date AS TIMESTAMP), 'MMMM') for Spark
20763 // DATENAME(dw, date) -> DATE_FORMAT(CAST(date AS TIMESTAMP), 'EEEE') for Spark
20764 "DATENAME" if f.args.len() == 2 => {
20765 let unit_str = Self::get_unit_str_static(&f.args[0]);
20766 let date_expr = f.args[1].clone();
20767 match unit_str.as_str() {
20768 "MM" | "M" | "MONTH" => match target {
20769 DialectType::TSQL => {
20770 let cast_date = Expression::Cast(Box::new(
20771 crate::expressions::Cast {
20772 this: date_expr,
20773 to: DataType::Custom {
20774 name: "DATETIME2".to_string(),
20775 },
20776 trailing_comments: Vec::new(),
20777 double_colon_syntax: false,
20778 format: None,
20779 default: None,
20780 inferred_type: None,
20781 },
20782 ));
20783 Ok(Expression::Function(Box::new(Function::new(
20784 "FORMAT".to_string(),
20785 vec![cast_date, Expression::string("MMMM")],
20786 ))))
20787 }
20788 DialectType::Spark | DialectType::Databricks => {
20789 let cast_date = Expression::Cast(Box::new(
20790 crate::expressions::Cast {
20791 this: date_expr,
20792 to: DataType::Timestamp {
20793 timezone: false,
20794 precision: None,
20795 },
20796 trailing_comments: Vec::new(),
20797 double_colon_syntax: false,
20798 format: None,
20799 default: None,
20800 inferred_type: None,
20801 },
20802 ));
20803 Ok(Expression::Function(Box::new(Function::new(
20804 "DATE_FORMAT".to_string(),
20805 vec![cast_date, Expression::string("MMMM")],
20806 ))))
20807 }
20808 _ => Ok(Expression::Function(f)),
20809 },
20810 "DW" | "WEEKDAY" => match target {
20811 DialectType::TSQL => {
20812 let cast_date = Expression::Cast(Box::new(
20813 crate::expressions::Cast {
20814 this: date_expr,
20815 to: DataType::Custom {
20816 name: "DATETIME2".to_string(),
20817 },
20818 trailing_comments: Vec::new(),
20819 double_colon_syntax: false,
20820 format: None,
20821 default: None,
20822 inferred_type: None,
20823 },
20824 ));
20825 Ok(Expression::Function(Box::new(Function::new(
20826 "FORMAT".to_string(),
20827 vec![cast_date, Expression::string("dddd")],
20828 ))))
20829 }
20830 DialectType::Spark | DialectType::Databricks => {
20831 let cast_date = Expression::Cast(Box::new(
20832 crate::expressions::Cast {
20833 this: date_expr,
20834 to: DataType::Timestamp {
20835 timezone: false,
20836 precision: None,
20837 },
20838 trailing_comments: Vec::new(),
20839 double_colon_syntax: false,
20840 format: None,
20841 default: None,
20842 inferred_type: None,
20843 },
20844 ));
20845 Ok(Expression::Function(Box::new(Function::new(
20846 "DATE_FORMAT".to_string(),
20847 vec![cast_date, Expression::string("EEEE")],
20848 ))))
20849 }
20850 _ => Ok(Expression::Function(f)),
20851 },
20852 _ => Ok(Expression::Function(f)),
20853 }
20854 }
20855 // STRING_AGG(x, sep) without WITHIN GROUP -> target-specific
20856 "STRING_AGG" if f.args.len() >= 2 => {
20857 let x = f.args[0].clone();
20858 let sep = f.args[1].clone();
20859 match target {
20860 DialectType::MySQL
20861 | DialectType::SingleStore
20862 | DialectType::Doris
20863 | DialectType::StarRocks => Ok(Expression::GroupConcat(
20864 Box::new(crate::expressions::GroupConcatFunc {
20865 this: x,
20866 separator: Some(sep),
20867 order_by: None,
20868 distinct: false,
20869 filter: None,
20870 limit: None,
20871 inferred_type: None,
20872 }),
20873 )),
20874 DialectType::SQLite => Ok(Expression::GroupConcat(Box::new(
20875 crate::expressions::GroupConcatFunc {
20876 this: x,
20877 separator: Some(sep),
20878 order_by: None,
20879 distinct: false,
20880 filter: None,
20881 limit: None,
20882 inferred_type: None,
20883 },
20884 ))),
20885 DialectType::PostgreSQL | DialectType::Redshift => {
20886 Ok(Expression::StringAgg(Box::new(
20887 crate::expressions::StringAggFunc {
20888 this: x,
20889 separator: Some(sep),
20890 order_by: None,
20891 distinct: false,
20892 filter: None,
20893 limit: None,
20894 inferred_type: None,
20895 },
20896 )))
20897 }
20898 _ => Ok(Expression::Function(f)),
20899 }
20900 }
20901 "TRY_DIVIDE" if f.args.len() == 2 => {
20902 let mut args = f.args;
20903 let x = args.remove(0);
20904 let y = args.remove(0);
20905 match target {
20906 DialectType::Spark | DialectType::Databricks => {
20907 Ok(Expression::Function(Box::new(Function::new(
20908 "TRY_DIVIDE".to_string(),
20909 vec![x, y],
20910 ))))
20911 }
20912 DialectType::Snowflake => {
20913 let y_ref = match &y {
20914 Expression::Column(_)
20915 | Expression::Literal(_)
20916 | Expression::Identifier(_) => y.clone(),
20917 _ => Expression::Paren(Box::new(Paren {
20918 this: y.clone(),
20919 trailing_comments: vec![],
20920 })),
20921 };
20922 let x_ref = match &x {
20923 Expression::Column(_)
20924 | Expression::Literal(_)
20925 | Expression::Identifier(_) => x.clone(),
20926 _ => Expression::Paren(Box::new(Paren {
20927 this: x.clone(),
20928 trailing_comments: vec![],
20929 })),
20930 };
20931 let condition = Expression::Neq(Box::new(
20932 crate::expressions::BinaryOp::new(
20933 y_ref.clone(),
20934 Expression::number(0),
20935 ),
20936 ));
20937 let div_expr = Expression::Div(Box::new(
20938 crate::expressions::BinaryOp::new(x_ref, y_ref),
20939 ));
20940 Ok(Expression::IfFunc(Box::new(
20941 crate::expressions::IfFunc {
20942 condition,
20943 true_value: div_expr,
20944 false_value: Some(Expression::Null(Null)),
20945 original_name: Some("IFF".to_string()),
20946 inferred_type: None,
20947 },
20948 )))
20949 }
20950 DialectType::DuckDB => {
20951 let y_ref = match &y {
20952 Expression::Column(_)
20953 | Expression::Literal(_)
20954 | Expression::Identifier(_) => y.clone(),
20955 _ => Expression::Paren(Box::new(Paren {
20956 this: y.clone(),
20957 trailing_comments: vec![],
20958 })),
20959 };
20960 let x_ref = match &x {
20961 Expression::Column(_)
20962 | Expression::Literal(_)
20963 | Expression::Identifier(_) => x.clone(),
20964 _ => Expression::Paren(Box::new(Paren {
20965 this: x.clone(),
20966 trailing_comments: vec![],
20967 })),
20968 };
20969 let condition = Expression::Neq(Box::new(
20970 crate::expressions::BinaryOp::new(
20971 y_ref.clone(),
20972 Expression::number(0),
20973 ),
20974 ));
20975 let div_expr = Expression::Div(Box::new(
20976 crate::expressions::BinaryOp::new(x_ref, y_ref),
20977 ));
20978 Ok(Expression::Case(Box::new(Case {
20979 operand: None,
20980 whens: vec![(condition, div_expr)],
20981 else_: Some(Expression::Null(Null)),
20982 comments: Vec::new(),
20983 inferred_type: None,
20984 })))
20985 }
20986 _ => Ok(Expression::Function(Box::new(Function::new(
20987 "TRY_DIVIDE".to_string(),
20988 vec![x, y],
20989 )))),
20990 }
20991 }
20992 // JSON_ARRAYAGG -> JSON_AGG for PostgreSQL
20993 "JSON_ARRAYAGG" => match target {
20994 DialectType::PostgreSQL => {
20995 Ok(Expression::Function(Box::new(Function {
20996 name: "JSON_AGG".to_string(),
20997 ..(*f)
20998 })))
20999 }
21000 _ => Ok(Expression::Function(f)),
21001 },
21002 // SCHEMA_NAME(id) -> CURRENT_SCHEMA for PostgreSQL, 'main' for SQLite
21003 "SCHEMA_NAME" => match target {
21004 DialectType::PostgreSQL => Ok(Expression::CurrentSchema(Box::new(
21005 crate::expressions::CurrentSchema { this: None },
21006 ))),
21007 DialectType::SQLite => Ok(Expression::string("main")),
21008 _ => Ok(Expression::Function(f)),
21009 },
21010 // TO_TIMESTAMP(x, fmt) 2-arg from Spark/Hive: convert Java format to target format
21011 "TO_TIMESTAMP"
21012 if f.args.len() == 2
21013 && matches!(
21014 source,
21015 DialectType::Spark
21016 | DialectType::Databricks
21017 | DialectType::Hive
21018 )
21019 && matches!(target, DialectType::DuckDB) =>
21020 {
21021 let mut args = f.args;
21022 let val = args.remove(0);
21023 let fmt_expr = args.remove(0);
21024 if let Expression::Literal(ref lit) = fmt_expr {
21025 if let Literal::String(ref s) = lit.as_ref() {
21026 // Convert Java/Spark format to C strptime format
21027 fn java_to_c_fmt(fmt: &str) -> String {
21028 let result = fmt
21029 .replace("yyyy", "%Y")
21030 .replace("SSSSSS", "%f")
21031 .replace("EEEE", "%W")
21032 .replace("MM", "%m")
21033 .replace("dd", "%d")
21034 .replace("HH", "%H")
21035 .replace("mm", "%M")
21036 .replace("ss", "%S")
21037 .replace("yy", "%y");
21038 let mut out = String::new();
21039 let chars: Vec<char> = result.chars().collect();
21040 let mut i = 0;
21041 while i < chars.len() {
21042 if chars[i] == '%' && i + 1 < chars.len() {
21043 out.push(chars[i]);
21044 out.push(chars[i + 1]);
21045 i += 2;
21046 } else if chars[i] == 'z' {
21047 out.push_str("%Z");
21048 i += 1;
21049 } else if chars[i] == 'Z' {
21050 out.push_str("%z");
21051 i += 1;
21052 } else {
21053 out.push(chars[i]);
21054 i += 1;
21055 }
21056 }
21057 out
21058 }
21059 let c_fmt = java_to_c_fmt(s);
21060 Ok(Expression::Function(Box::new(Function::new(
21061 "STRPTIME".to_string(),
21062 vec![val, Expression::string(&c_fmt)],
21063 ))))
21064 } else {
21065 Ok(Expression::Function(Box::new(Function::new(
21066 "STRPTIME".to_string(),
21067 vec![val, fmt_expr],
21068 ))))
21069 }
21070 } else {
21071 Ok(Expression::Function(Box::new(Function::new(
21072 "STRPTIME".to_string(),
21073 vec![val, fmt_expr],
21074 ))))
21075 }
21076 }
21077 // TO_DATE(x) 1-arg from Doris: date conversion
21078 "TO_DATE"
21079 if f.args.len() == 1
21080 && matches!(
21081 source,
21082 DialectType::Doris | DialectType::StarRocks
21083 ) =>
21084 {
21085 let arg = f.args.into_iter().next().unwrap();
21086 match target {
21087 DialectType::Oracle
21088 | DialectType::DuckDB
21089 | DialectType::TSQL => {
21090 // CAST(x AS DATE)
21091 Ok(Expression::Cast(Box::new(Cast {
21092 this: arg,
21093 to: DataType::Date,
21094 double_colon_syntax: false,
21095 trailing_comments: vec![],
21096 format: None,
21097 default: None,
21098 inferred_type: None,
21099 })))
21100 }
21101 DialectType::MySQL | DialectType::SingleStore => {
21102 // DATE(x)
21103 Ok(Expression::Function(Box::new(Function::new(
21104 "DATE".to_string(),
21105 vec![arg],
21106 ))))
21107 }
21108 _ => {
21109 // Default: keep as TO_DATE(x) (Spark, PostgreSQL, etc.)
21110 Ok(Expression::Function(Box::new(Function::new(
21111 "TO_DATE".to_string(),
21112 vec![arg],
21113 ))))
21114 }
21115 }
21116 }
21117 // TO_DATE(x) 1-arg from Spark/Hive: safe date conversion
21118 "TO_DATE"
21119 if f.args.len() == 1
21120 && matches!(
21121 source,
21122 DialectType::Spark
21123 | DialectType::Databricks
21124 | DialectType::Hive
21125 ) =>
21126 {
21127 let arg = f.args.into_iter().next().unwrap();
21128 match target {
21129 DialectType::DuckDB => {
21130 // Spark TO_DATE is safe -> TRY_CAST(x AS DATE)
21131 Ok(Expression::TryCast(Box::new(Cast {
21132 this: arg,
21133 to: DataType::Date,
21134 double_colon_syntax: false,
21135 trailing_comments: vec![],
21136 format: None,
21137 default: None,
21138 inferred_type: None,
21139 })))
21140 }
21141 DialectType::Presto
21142 | DialectType::Trino
21143 | DialectType::Athena => {
21144 // CAST(CAST(x AS TIMESTAMP) AS DATE)
21145 Ok(Self::double_cast_timestamp_date(arg))
21146 }
21147 DialectType::Snowflake => {
21148 // Spark's TO_DATE is safe -> TRY_TO_DATE(x, 'yyyy-mm-DD')
21149 // The default Spark format 'yyyy-MM-dd' maps to Snowflake 'yyyy-mm-DD'
21150 Ok(Expression::Function(Box::new(Function::new(
21151 "TRY_TO_DATE".to_string(),
21152 vec![arg, Expression::string("yyyy-mm-DD")],
21153 ))))
21154 }
21155 _ => {
21156 // Default: keep as TO_DATE(x)
21157 Ok(Expression::Function(Box::new(Function::new(
21158 "TO_DATE".to_string(),
21159 vec![arg],
21160 ))))
21161 }
21162 }
21163 }
21164 // TO_DATE(x, fmt) 2-arg from Spark/Hive: format-based date conversion
21165 "TO_DATE"
21166 if f.args.len() == 2
21167 && matches!(
21168 source,
21169 DialectType::Spark
21170 | DialectType::Databricks
21171 | DialectType::Hive
21172 ) =>
21173 {
21174 let mut args = f.args;
21175 let val = args.remove(0);
21176 let fmt_expr = args.remove(0);
21177 let is_default_format = matches!(&fmt_expr, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(s) if s == "yyyy-MM-dd"));
21178
21179 if is_default_format {
21180 // Default format: same as 1-arg form
21181 match target {
21182 DialectType::DuckDB => {
21183 Ok(Expression::TryCast(Box::new(Cast {
21184 this: val,
21185 to: DataType::Date,
21186 double_colon_syntax: false,
21187 trailing_comments: vec![],
21188 format: None,
21189 default: None,
21190 inferred_type: None,
21191 })))
21192 }
21193 DialectType::Presto
21194 | DialectType::Trino
21195 | DialectType::Athena => {
21196 Ok(Self::double_cast_timestamp_date(val))
21197 }
21198 DialectType::Snowflake => {
21199 // TRY_TO_DATE(x, format) with Snowflake format mapping
21200 let sf_fmt = "yyyy-MM-dd"
21201 .replace("yyyy", "yyyy")
21202 .replace("MM", "mm")
21203 .replace("dd", "DD");
21204 Ok(Expression::Function(Box::new(Function::new(
21205 "TRY_TO_DATE".to_string(),
21206 vec![val, Expression::string(&sf_fmt)],
21207 ))))
21208 }
21209 _ => Ok(Expression::Function(Box::new(Function::new(
21210 "TO_DATE".to_string(),
21211 vec![val],
21212 )))),
21213 }
21214 } else {
21215 // Non-default format: use format-based parsing
21216 if let Expression::Literal(ref lit) = fmt_expr {
21217 if let Literal::String(ref s) = lit.as_ref() {
21218 match target {
21219 DialectType::DuckDB => {
21220 // CAST(CAST(TRY_STRPTIME(x, c_fmt) AS TIMESTAMP) AS DATE)
21221 fn java_to_c_fmt_todate(fmt: &str) -> String {
21222 let result = fmt
21223 .replace("yyyy", "%Y")
21224 .replace("SSSSSS", "%f")
21225 .replace("EEEE", "%W")
21226 .replace("MM", "%m")
21227 .replace("dd", "%d")
21228 .replace("HH", "%H")
21229 .replace("mm", "%M")
21230 .replace("ss", "%S")
21231 .replace("yy", "%y");
21232 let mut out = String::new();
21233 let chars: Vec<char> =
21234 result.chars().collect();
21235 let mut i = 0;
21236 while i < chars.len() {
21237 if chars[i] == '%'
21238 && i + 1 < chars.len()
21239 {
21240 out.push(chars[i]);
21241 out.push(chars[i + 1]);
21242 i += 2;
21243 } else if chars[i] == 'z' {
21244 out.push_str("%Z");
21245 i += 1;
21246 } else if chars[i] == 'Z' {
21247 out.push_str("%z");
21248 i += 1;
21249 } else {
21250 out.push(chars[i]);
21251 i += 1;
21252 }
21253 }
21254 out
21255 }
21256 let c_fmt = java_to_c_fmt_todate(s);
21257 // CAST(CAST(TRY_STRPTIME(x, fmt) AS TIMESTAMP) AS DATE)
21258 let try_strptime = Expression::Function(
21259 Box::new(Function::new(
21260 "TRY_STRPTIME".to_string(),
21261 vec![val, Expression::string(&c_fmt)],
21262 )),
21263 );
21264 let cast_ts =
21265 Expression::Cast(Box::new(Cast {
21266 this: try_strptime,
21267 to: DataType::Timestamp {
21268 precision: None,
21269 timezone: false,
21270 },
21271 double_colon_syntax: false,
21272 trailing_comments: vec![],
21273 format: None,
21274 default: None,
21275 inferred_type: None,
21276 }));
21277 Ok(Expression::Cast(Box::new(Cast {
21278 this: cast_ts,
21279 to: DataType::Date,
21280 double_colon_syntax: false,
21281 trailing_comments: vec![],
21282 format: None,
21283 default: None,
21284 inferred_type: None,
21285 })))
21286 }
21287 DialectType::Presto
21288 | DialectType::Trino
21289 | DialectType::Athena => {
21290 // CAST(DATE_PARSE(x, presto_fmt) AS DATE)
21291 let p_fmt = s
21292 .replace("yyyy", "%Y")
21293 .replace("SSSSSS", "%f")
21294 .replace("MM", "%m")
21295 .replace("dd", "%d")
21296 .replace("HH", "%H")
21297 .replace("mm", "%M")
21298 .replace("ss", "%S")
21299 .replace("yy", "%y");
21300 let date_parse = Expression::Function(
21301 Box::new(Function::new(
21302 "DATE_PARSE".to_string(),
21303 vec![val, Expression::string(&p_fmt)],
21304 )),
21305 );
21306 Ok(Expression::Cast(Box::new(Cast {
21307 this: date_parse,
21308 to: DataType::Date,
21309 double_colon_syntax: false,
21310 trailing_comments: vec![],
21311 format: None,
21312 default: None,
21313 inferred_type: None,
21314 })))
21315 }
21316 DialectType::Snowflake => {
21317 // TRY_TO_DATE(x, snowflake_fmt)
21318 Ok(Expression::Function(Box::new(
21319 Function::new(
21320 "TRY_TO_DATE".to_string(),
21321 vec![val, Expression::string(s)],
21322 ),
21323 )))
21324 }
21325 _ => Ok(Expression::Function(Box::new(
21326 Function::new(
21327 "TO_DATE".to_string(),
21328 vec![val, fmt_expr],
21329 ),
21330 ))),
21331 }
21332 } else {
21333 Ok(Expression::Function(Box::new(Function::new(
21334 "TO_DATE".to_string(),
21335 vec![val, fmt_expr],
21336 ))))
21337 }
21338 } else {
21339 Ok(Expression::Function(Box::new(Function::new(
21340 "TO_DATE".to_string(),
21341 vec![val, fmt_expr],
21342 ))))
21343 }
21344 }
21345 }
21346 // TO_TIMESTAMP(x) 1-arg: epoch conversion
21347 "TO_TIMESTAMP"
21348 if f.args.len() == 1
21349 && matches!(source, DialectType::DuckDB)
21350 && matches!(
21351 target,
21352 DialectType::BigQuery
21353 | DialectType::Presto
21354 | DialectType::Trino
21355 | DialectType::Hive
21356 | DialectType::Spark
21357 | DialectType::Databricks
21358 | DialectType::Athena
21359 ) =>
21360 {
21361 let arg = f.args.into_iter().next().unwrap();
21362 let func_name = match target {
21363 DialectType::BigQuery => "TIMESTAMP_SECONDS",
21364 DialectType::Presto
21365 | DialectType::Trino
21366 | DialectType::Athena
21367 | DialectType::Hive
21368 | DialectType::Spark
21369 | DialectType::Databricks => "FROM_UNIXTIME",
21370 _ => "TO_TIMESTAMP",
21371 };
21372 Ok(Expression::Function(Box::new(Function::new(
21373 func_name.to_string(),
21374 vec![arg],
21375 ))))
21376 }
21377 // CONCAT(x) single-arg: -> CONCAT(COALESCE(x, '')) for Spark
21378 "CONCAT" if f.args.len() == 1 => {
21379 let arg = f.args.into_iter().next().unwrap();
21380 match target {
21381 DialectType::Presto
21382 | DialectType::Trino
21383 | DialectType::Athena => {
21384 // CONCAT(a) -> CAST(a AS VARCHAR)
21385 Ok(Expression::Cast(Box::new(Cast {
21386 this: arg,
21387 to: DataType::VarChar {
21388 length: None,
21389 parenthesized_length: false,
21390 },
21391 trailing_comments: vec![],
21392 double_colon_syntax: false,
21393 format: None,
21394 default: None,
21395 inferred_type: None,
21396 })))
21397 }
21398 DialectType::TSQL => {
21399 // CONCAT(a) -> a
21400 Ok(arg)
21401 }
21402 DialectType::DuckDB => {
21403 // Keep CONCAT(a) for DuckDB (native support)
21404 Ok(Expression::Function(Box::new(Function::new(
21405 "CONCAT".to_string(),
21406 vec![arg],
21407 ))))
21408 }
21409 DialectType::Spark | DialectType::Databricks => {
21410 let coalesced = Expression::Coalesce(Box::new(
21411 crate::expressions::VarArgFunc {
21412 expressions: vec![arg, Expression::string("")],
21413 original_name: None,
21414 inferred_type: None,
21415 },
21416 ));
21417 Ok(Expression::Function(Box::new(Function::new(
21418 "CONCAT".to_string(),
21419 vec![coalesced],
21420 ))))
21421 }
21422 _ => Ok(Expression::Function(Box::new(Function::new(
21423 "CONCAT".to_string(),
21424 vec![arg],
21425 )))),
21426 }
21427 }
21428 // REGEXP_EXTRACT(a, p) 2-arg: BigQuery default group is 0 (no 3rd arg needed)
21429 "REGEXP_EXTRACT"
21430 if f.args.len() == 3 && matches!(target, DialectType::BigQuery) =>
21431 {
21432 // If group_index is 0, drop it
21433 let drop_group = match &f.args[2] {
21434 Expression::Literal(lit)
21435 if matches!(lit.as_ref(), Literal::Number(_)) =>
21436 {
21437 let Literal::Number(n) = lit.as_ref() else {
21438 unreachable!()
21439 };
21440 n == "0"
21441 }
21442 _ => false,
21443 };
21444 if drop_group {
21445 let mut args = f.args;
21446 args.truncate(2);
21447 Ok(Expression::Function(Box::new(Function::new(
21448 "REGEXP_EXTRACT".to_string(),
21449 args,
21450 ))))
21451 } else {
21452 Ok(Expression::Function(f))
21453 }
21454 }
21455 // REGEXP_EXTRACT(a, pattern, group, flags) 4-arg -> REGEXP_SUBSTR for Snowflake
21456 "REGEXP_EXTRACT"
21457 if f.args.len() == 4
21458 && matches!(target, DialectType::Snowflake) =>
21459 {
21460 // REGEXP_EXTRACT(a, 'pattern', 2, 'i') -> REGEXP_SUBSTR(a, 'pattern', 1, 1, 'i', 2)
21461 let mut args = f.args;
21462 let this = args.remove(0);
21463 let pattern = args.remove(0);
21464 let group = args.remove(0);
21465 let flags = args.remove(0);
21466 Ok(Expression::Function(Box::new(Function::new(
21467 "REGEXP_SUBSTR".to_string(),
21468 vec![
21469 this,
21470 pattern,
21471 Expression::number(1),
21472 Expression::number(1),
21473 flags,
21474 group,
21475 ],
21476 ))))
21477 }
21478 // REGEXP_SUBSTR(a, pattern, position) 3-arg -> REGEXP_EXTRACT(SUBSTRING(a, pos), pattern)
21479 "REGEXP_SUBSTR"
21480 if f.args.len() == 3
21481 && matches!(
21482 target,
21483 DialectType::DuckDB
21484 | DialectType::Presto
21485 | DialectType::Trino
21486 | DialectType::Spark
21487 | DialectType::Databricks
21488 ) =>
21489 {
21490 let mut args = f.args;
21491 let this = args.remove(0);
21492 let pattern = args.remove(0);
21493 let position = args.remove(0);
21494 // Wrap subject in SUBSTRING(this, position) to apply the offset
21495 let substring_expr = Expression::Function(Box::new(Function::new(
21496 "SUBSTRING".to_string(),
21497 vec![this, position],
21498 )));
21499 let target_name = match target {
21500 DialectType::DuckDB => "REGEXP_EXTRACT",
21501 _ => "REGEXP_EXTRACT",
21502 };
21503 Ok(Expression::Function(Box::new(Function::new(
21504 target_name.to_string(),
21505 vec![substring_expr, pattern],
21506 ))))
21507 }
21508 // TO_DAYS(x) -> (DATEDIFF(x, '0000-01-01') + 1) or target-specific
21509 "TO_DAYS" if f.args.len() == 1 => {
21510 let x = f.args.into_iter().next().unwrap();
21511 let epoch = Expression::string("0000-01-01");
21512 // Build the final target-specific expression directly
21513 let datediff_expr = match target {
21514 DialectType::MySQL | DialectType::SingleStore => {
21515 // MySQL: (DATEDIFF(x, '0000-01-01') + 1)
21516 Expression::Function(Box::new(Function::new(
21517 "DATEDIFF".to_string(),
21518 vec![x, epoch],
21519 )))
21520 }
21521 DialectType::DuckDB => {
21522 // DuckDB: (DATE_DIFF('DAY', CAST('0000-01-01' AS DATE), CAST(x AS DATE)) + 1)
21523 let cast_epoch = Expression::Cast(Box::new(Cast {
21524 this: epoch,
21525 to: DataType::Date,
21526 trailing_comments: Vec::new(),
21527 double_colon_syntax: false,
21528 format: None,
21529 default: None,
21530 inferred_type: None,
21531 }));
21532 let cast_x = Expression::Cast(Box::new(Cast {
21533 this: x,
21534 to: DataType::Date,
21535 trailing_comments: Vec::new(),
21536 double_colon_syntax: false,
21537 format: None,
21538 default: None,
21539 inferred_type: None,
21540 }));
21541 Expression::Function(Box::new(Function::new(
21542 "DATE_DIFF".to_string(),
21543 vec![Expression::string("DAY"), cast_epoch, cast_x],
21544 )))
21545 }
21546 DialectType::Presto
21547 | DialectType::Trino
21548 | DialectType::Athena => {
21549 // Presto: (DATE_DIFF('DAY', CAST(CAST('0000-01-01' AS TIMESTAMP) AS DATE), CAST(CAST(x AS TIMESTAMP) AS DATE)) + 1)
21550 let cast_epoch = Self::double_cast_timestamp_date(epoch);
21551 let cast_x = Self::double_cast_timestamp_date(x);
21552 Expression::Function(Box::new(Function::new(
21553 "DATE_DIFF".to_string(),
21554 vec![Expression::string("DAY"), cast_epoch, cast_x],
21555 )))
21556 }
21557 _ => {
21558 // Default: (DATEDIFF(x, '0000-01-01') + 1)
21559 Expression::Function(Box::new(Function::new(
21560 "DATEDIFF".to_string(),
21561 vec![x, epoch],
21562 )))
21563 }
21564 };
21565 let add_one = Expression::Add(Box::new(BinaryOp::new(
21566 datediff_expr,
21567 Expression::number(1),
21568 )));
21569 Ok(Expression::Paren(Box::new(crate::expressions::Paren {
21570 this: add_one,
21571 trailing_comments: Vec::new(),
21572 })))
21573 }
21574 // STR_TO_DATE(x, format) -> DATE_PARSE / STRPTIME / TO_DATE etc.
21575 "STR_TO_DATE"
21576 if f.args.len() == 2
21577 && matches!(
21578 target,
21579 DialectType::Presto | DialectType::Trino
21580 ) =>
21581 {
21582 let mut args = f.args;
21583 let x = args.remove(0);
21584 let format_expr = args.remove(0);
21585 // Check if the format contains time components
21586 let has_time = if let Expression::Literal(ref lit) = format_expr {
21587 if let Literal::String(ref fmt) = lit.as_ref() {
21588 fmt.contains("%H")
21589 || fmt.contains("%T")
21590 || fmt.contains("%M")
21591 || fmt.contains("%S")
21592 || fmt.contains("%I")
21593 || fmt.contains("%p")
21594 } else {
21595 false
21596 }
21597 } else {
21598 false
21599 };
21600 let date_parse = Expression::Function(Box::new(Function::new(
21601 "DATE_PARSE".to_string(),
21602 vec![x, format_expr],
21603 )));
21604 if has_time {
21605 // Has time components: just DATE_PARSE
21606 Ok(date_parse)
21607 } else {
21608 // Date-only: CAST(DATE_PARSE(...) AS DATE)
21609 Ok(Expression::Cast(Box::new(Cast {
21610 this: date_parse,
21611 to: DataType::Date,
21612 trailing_comments: Vec::new(),
21613 double_colon_syntax: false,
21614 format: None,
21615 default: None,
21616 inferred_type: None,
21617 })))
21618 }
21619 }
21620 "STR_TO_DATE"
21621 if f.args.len() == 2
21622 && matches!(
21623 target,
21624 DialectType::PostgreSQL | DialectType::Redshift
21625 ) =>
21626 {
21627 let mut args = f.args;
21628 let x = args.remove(0);
21629 let fmt = args.remove(0);
21630 let pg_fmt = match fmt {
21631 Expression::Literal(lit)
21632 if matches!(lit.as_ref(), Literal::String(_)) =>
21633 {
21634 let Literal::String(s) = lit.as_ref() else {
21635 unreachable!()
21636 };
21637 Expression::string(
21638 &s.replace("%Y", "YYYY")
21639 .replace("%m", "MM")
21640 .replace("%d", "DD")
21641 .replace("%H", "HH24")
21642 .replace("%M", "MI")
21643 .replace("%S", "SS"),
21644 )
21645 }
21646 other => other,
21647 };
21648 let to_date = Expression::Function(Box::new(Function::new(
21649 "TO_DATE".to_string(),
21650 vec![x, pg_fmt],
21651 )));
21652 Ok(Expression::Cast(Box::new(Cast {
21653 this: to_date,
21654 to: DataType::Timestamp {
21655 timezone: false,
21656 precision: None,
21657 },
21658 trailing_comments: Vec::new(),
21659 double_colon_syntax: false,
21660 format: None,
21661 default: None,
21662 inferred_type: None,
21663 })))
21664 }
21665 // RANGE(start, end) -> GENERATE_SERIES for SQLite
21666 "RANGE"
21667 if (f.args.len() == 1 || f.args.len() == 2)
21668 && matches!(target, DialectType::SQLite) =>
21669 {
21670 if f.args.len() == 2 {
21671 // RANGE(start, end) -> (SELECT value AS col_alias FROM GENERATE_SERIES(start, end))
21672 // For SQLite, RANGE is exclusive on end, GENERATE_SERIES is inclusive
21673 let mut args = f.args;
21674 let start = args.remove(0);
21675 let end = args.remove(0);
21676 Ok(Expression::Function(Box::new(Function::new(
21677 "GENERATE_SERIES".to_string(),
21678 vec![start, end],
21679 ))))
21680 } else {
21681 Ok(Expression::Function(f))
21682 }
21683 }
21684 // UNIFORM(low, high[, seed]) -> UNIFORM(low, high, RANDOM([seed])) for Snowflake
21685 // When source is Snowflake, keep as-is (args already in correct form)
21686 "UNIFORM"
21687 if matches!(target, DialectType::Snowflake)
21688 && (f.args.len() == 2 || f.args.len() == 3) =>
21689 {
21690 if matches!(source, DialectType::Snowflake) {
21691 // Snowflake -> Snowflake: keep as-is
21692 Ok(Expression::Function(f))
21693 } else {
21694 let mut args = f.args;
21695 let low = args.remove(0);
21696 let high = args.remove(0);
21697 let random = if !args.is_empty() {
21698 let seed = args.remove(0);
21699 Expression::Function(Box::new(Function::new(
21700 "RANDOM".to_string(),
21701 vec![seed],
21702 )))
21703 } else {
21704 Expression::Function(Box::new(Function::new(
21705 "RANDOM".to_string(),
21706 vec![],
21707 )))
21708 };
21709 Ok(Expression::Function(Box::new(Function::new(
21710 "UNIFORM".to_string(),
21711 vec![low, high, random],
21712 ))))
21713 }
21714 }
21715 // TO_UTC_TIMESTAMP(ts, tz) -> target-specific UTC conversion
21716 "TO_UTC_TIMESTAMP" if f.args.len() == 2 => {
21717 let mut args = f.args;
21718 let ts_arg = args.remove(0);
21719 let tz_arg = args.remove(0);
21720 // Cast string literal to TIMESTAMP for all targets
21721 let ts_cast = if matches!(&ts_arg, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)))
21722 {
21723 Expression::Cast(Box::new(Cast {
21724 this: ts_arg,
21725 to: DataType::Timestamp {
21726 timezone: false,
21727 precision: None,
21728 },
21729 trailing_comments: vec![],
21730 double_colon_syntax: false,
21731 format: None,
21732 default: None,
21733 inferred_type: None,
21734 }))
21735 } else {
21736 ts_arg
21737 };
21738 match target {
21739 DialectType::Spark | DialectType::Databricks => {
21740 Ok(Expression::Function(Box::new(Function::new(
21741 "TO_UTC_TIMESTAMP".to_string(),
21742 vec![ts_cast, tz_arg],
21743 ))))
21744 }
21745 DialectType::Snowflake => {
21746 // CONVERT_TIMEZONE(tz, 'UTC', CAST(ts AS TIMESTAMP))
21747 Ok(Expression::Function(Box::new(Function::new(
21748 "CONVERT_TIMEZONE".to_string(),
21749 vec![tz_arg, Expression::string("UTC"), ts_cast],
21750 ))))
21751 }
21752 DialectType::Presto
21753 | DialectType::Trino
21754 | DialectType::Athena => {
21755 // WITH_TIMEZONE(CAST(ts AS TIMESTAMP), tz) AT TIME ZONE 'UTC'
21756 let wtz = Expression::Function(Box::new(Function::new(
21757 "WITH_TIMEZONE".to_string(),
21758 vec![ts_cast, tz_arg],
21759 )));
21760 Ok(Expression::AtTimeZone(Box::new(
21761 crate::expressions::AtTimeZone {
21762 this: wtz,
21763 zone: Expression::string("UTC"),
21764 },
21765 )))
21766 }
21767 DialectType::BigQuery => {
21768 // DATETIME(TIMESTAMP(CAST(ts AS DATETIME), tz), 'UTC')
21769 let cast_dt = Expression::Cast(Box::new(Cast {
21770 this: if let Expression::Cast(c) = ts_cast {
21771 c.this
21772 } else {
21773 ts_cast.clone()
21774 },
21775 to: DataType::Custom {
21776 name: "DATETIME".to_string(),
21777 },
21778 trailing_comments: vec![],
21779 double_colon_syntax: false,
21780 format: None,
21781 default: None,
21782 inferred_type: None,
21783 }));
21784 let ts_func =
21785 Expression::Function(Box::new(Function::new(
21786 "TIMESTAMP".to_string(),
21787 vec![cast_dt, tz_arg],
21788 )));
21789 Ok(Expression::Function(Box::new(Function::new(
21790 "DATETIME".to_string(),
21791 vec![ts_func, Expression::string("UTC")],
21792 ))))
21793 }
21794 _ => {
21795 // DuckDB, PostgreSQL, Redshift: CAST(ts AS TIMESTAMP) AT TIME ZONE tz AT TIME ZONE 'UTC'
21796 let atz1 = Expression::AtTimeZone(Box::new(
21797 crate::expressions::AtTimeZone {
21798 this: ts_cast,
21799 zone: tz_arg,
21800 },
21801 ));
21802 Ok(Expression::AtTimeZone(Box::new(
21803 crate::expressions::AtTimeZone {
21804 this: atz1,
21805 zone: Expression::string("UTC"),
21806 },
21807 )))
21808 }
21809 }
21810 }
21811 // FROM_UTC_TIMESTAMP(ts, tz) -> target-specific UTC conversion
21812 "FROM_UTC_TIMESTAMP" if f.args.len() == 2 => {
21813 let mut args = f.args;
21814 let ts_arg = args.remove(0);
21815 let tz_arg = args.remove(0);
21816 // Cast string literal to TIMESTAMP
21817 let ts_cast = if matches!(&ts_arg, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)))
21818 {
21819 Expression::Cast(Box::new(Cast {
21820 this: ts_arg,
21821 to: DataType::Timestamp {
21822 timezone: false,
21823 precision: None,
21824 },
21825 trailing_comments: vec![],
21826 double_colon_syntax: false,
21827 format: None,
21828 default: None,
21829 inferred_type: None,
21830 }))
21831 } else {
21832 ts_arg
21833 };
21834 match target {
21835 DialectType::Spark | DialectType::Databricks => {
21836 Ok(Expression::Function(Box::new(Function::new(
21837 "FROM_UTC_TIMESTAMP".to_string(),
21838 vec![ts_cast, tz_arg],
21839 ))))
21840 }
21841 DialectType::Presto
21842 | DialectType::Trino
21843 | DialectType::Athena => {
21844 // AT_TIMEZONE(CAST(ts AS TIMESTAMP), tz)
21845 Ok(Expression::Function(Box::new(Function::new(
21846 "AT_TIMEZONE".to_string(),
21847 vec![ts_cast, tz_arg],
21848 ))))
21849 }
21850 DialectType::Snowflake => {
21851 // CONVERT_TIMEZONE('UTC', tz, CAST(ts AS TIMESTAMP))
21852 Ok(Expression::Function(Box::new(Function::new(
21853 "CONVERT_TIMEZONE".to_string(),
21854 vec![Expression::string("UTC"), tz_arg, ts_cast],
21855 ))))
21856 }
21857 _ => {
21858 // DuckDB, PostgreSQL, Redshift: CAST(ts AS TIMESTAMP) AT TIME ZONE tz
21859 Ok(Expression::AtTimeZone(Box::new(
21860 crate::expressions::AtTimeZone {
21861 this: ts_cast,
21862 zone: tz_arg,
21863 },
21864 )))
21865 }
21866 }
21867 }
21868 // MAP_FROM_ARRAYS(keys, values) -> target-specific map construction
21869 "MAP_FROM_ARRAYS" if f.args.len() == 2 => {
21870 let name = match target {
21871 DialectType::Snowflake => "OBJECT_CONSTRUCT",
21872 _ => "MAP",
21873 };
21874 Ok(Expression::Function(Box::new(Function::new(
21875 name.to_string(),
21876 f.args,
21877 ))))
21878 }
21879 // STR_TO_MAP(s, pair_delim, kv_delim) -> SPLIT_TO_MAP for Presto
21880 "STR_TO_MAP" if f.args.len() >= 1 => match target {
21881 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
21882 Ok(Expression::Function(Box::new(Function::new(
21883 "SPLIT_TO_MAP".to_string(),
21884 f.args,
21885 ))))
21886 }
21887 _ => Ok(Expression::Function(f)),
21888 },
21889 // TIME_TO_STR(x, fmt) -> Expression::TimeToStr for proper generation
21890 "TIME_TO_STR" if f.args.len() == 2 => {
21891 let mut args = f.args;
21892 let this = args.remove(0);
21893 let fmt_expr = args.remove(0);
21894 let format = if let Expression::Literal(lit) = fmt_expr {
21895 if let Literal::String(s) = lit.as_ref() {
21896 s.clone()
21897 } else {
21898 String::new()
21899 }
21900 } else {
21901 "%Y-%m-%d %H:%M:%S".to_string()
21902 };
21903 Ok(Expression::TimeToStr(Box::new(
21904 crate::expressions::TimeToStr {
21905 this: Box::new(this),
21906 format,
21907 culture: None,
21908 zone: None,
21909 },
21910 )))
21911 }
21912 // STR_TO_TIME(x, fmt) -> Expression::StrToTime for proper generation
21913 "STR_TO_TIME" if f.args.len() == 2 => {
21914 let mut args = f.args;
21915 let this = args.remove(0);
21916 let fmt_expr = args.remove(0);
21917 let format = if let Expression::Literal(lit) = fmt_expr {
21918 if let Literal::String(s) = lit.as_ref() {
21919 s.clone()
21920 } else {
21921 String::new()
21922 }
21923 } else {
21924 "%Y-%m-%d %H:%M:%S".to_string()
21925 };
21926 Ok(Expression::StrToTime(Box::new(
21927 crate::expressions::StrToTime {
21928 this: Box::new(this),
21929 format,
21930 zone: None,
21931 safe: None,
21932 target_type: None,
21933 },
21934 )))
21935 }
21936 // STR_TO_UNIX(x, fmt) -> Expression::StrToUnix for proper generation
21937 "STR_TO_UNIX" if f.args.len() >= 1 => {
21938 let mut args = f.args;
21939 let this = args.remove(0);
21940 let format = if !args.is_empty() {
21941 if let Expression::Literal(lit) = args.remove(0) {
21942 if let Literal::String(s) = lit.as_ref() {
21943 Some(s.clone())
21944 } else {
21945 None
21946 }
21947 } else {
21948 None
21949 }
21950 } else {
21951 None
21952 };
21953 Ok(Expression::StrToUnix(Box::new(
21954 crate::expressions::StrToUnix {
21955 this: Some(Box::new(this)),
21956 format,
21957 },
21958 )))
21959 }
21960 // TIME_TO_UNIX(x) -> Expression::TimeToUnix for proper generation
21961 "TIME_TO_UNIX" if f.args.len() == 1 => {
21962 let mut args = f.args;
21963 let this = args.remove(0);
21964 Ok(Expression::TimeToUnix(Box::new(
21965 crate::expressions::UnaryFunc {
21966 this,
21967 original_name: None,
21968 inferred_type: None,
21969 },
21970 )))
21971 }
21972 // UNIX_TO_STR(x, fmt) -> Expression::UnixToStr for proper generation
21973 "UNIX_TO_STR" if f.args.len() >= 1 => {
21974 let mut args = f.args;
21975 let this = args.remove(0);
21976 let format = if !args.is_empty() {
21977 if let Expression::Literal(lit) = args.remove(0) {
21978 if let Literal::String(s) = lit.as_ref() {
21979 Some(s.clone())
21980 } else {
21981 None
21982 }
21983 } else {
21984 None
21985 }
21986 } else {
21987 None
21988 };
21989 Ok(Expression::UnixToStr(Box::new(
21990 crate::expressions::UnixToStr {
21991 this: Box::new(this),
21992 format,
21993 },
21994 )))
21995 }
21996 // UNIX_TO_TIME(x) -> Expression::UnixToTime for proper generation
21997 "UNIX_TO_TIME" if f.args.len() == 1 => {
21998 let mut args = f.args;
21999 let this = args.remove(0);
22000 Ok(Expression::UnixToTime(Box::new(
22001 crate::expressions::UnixToTime {
22002 this: Box::new(this),
22003 scale: None,
22004 zone: None,
22005 hours: None,
22006 minutes: None,
22007 format: None,
22008 target_type: None,
22009 },
22010 )))
22011 }
22012 // TIME_STR_TO_DATE(x) -> Expression::TimeStrToDate for proper generation
22013 "TIME_STR_TO_DATE" if f.args.len() == 1 => {
22014 let mut args = f.args;
22015 let this = args.remove(0);
22016 Ok(Expression::TimeStrToDate(Box::new(
22017 crate::expressions::UnaryFunc {
22018 this,
22019 original_name: None,
22020 inferred_type: None,
22021 },
22022 )))
22023 }
22024 // TIME_STR_TO_TIME(x) -> Expression::TimeStrToTime for proper generation
22025 "TIME_STR_TO_TIME" if f.args.len() == 1 => {
22026 let mut args = f.args;
22027 let this = args.remove(0);
22028 Ok(Expression::TimeStrToTime(Box::new(
22029 crate::expressions::TimeStrToTime {
22030 this: Box::new(this),
22031 zone: None,
22032 },
22033 )))
22034 }
22035 // MONTHS_BETWEEN(end, start) -> DuckDB complex expansion
22036 "MONTHS_BETWEEN" if f.args.len() == 2 => {
22037 match target {
22038 DialectType::DuckDB => {
22039 let mut args = f.args;
22040 let end_date = args.remove(0);
22041 let start_date = args.remove(0);
22042 let cast_end = Self::ensure_cast_date(end_date);
22043 let cast_start = Self::ensure_cast_date(start_date);
22044 // DATE_DIFF('MONTH', start, end) + CASE WHEN DAY(end) = DAY(LAST_DAY(end)) AND DAY(start) = DAY(LAST_DAY(start)) THEN 0 ELSE (DAY(end) - DAY(start)) / 31.0 END
22045 let dd = Expression::Function(Box::new(Function::new(
22046 "DATE_DIFF".to_string(),
22047 vec![
22048 Expression::string("MONTH"),
22049 cast_start.clone(),
22050 cast_end.clone(),
22051 ],
22052 )));
22053 let day_end =
22054 Expression::Function(Box::new(Function::new(
22055 "DAY".to_string(),
22056 vec![cast_end.clone()],
22057 )));
22058 let day_start =
22059 Expression::Function(Box::new(Function::new(
22060 "DAY".to_string(),
22061 vec![cast_start.clone()],
22062 )));
22063 let last_day_end =
22064 Expression::Function(Box::new(Function::new(
22065 "LAST_DAY".to_string(),
22066 vec![cast_end.clone()],
22067 )));
22068 let last_day_start =
22069 Expression::Function(Box::new(Function::new(
22070 "LAST_DAY".to_string(),
22071 vec![cast_start.clone()],
22072 )));
22073 let day_last_end = Expression::Function(Box::new(
22074 Function::new("DAY".to_string(), vec![last_day_end]),
22075 ));
22076 let day_last_start = Expression::Function(Box::new(
22077 Function::new("DAY".to_string(), vec![last_day_start]),
22078 ));
22079 let cond1 = Expression::Eq(Box::new(BinaryOp::new(
22080 day_end.clone(),
22081 day_last_end,
22082 )));
22083 let cond2 = Expression::Eq(Box::new(BinaryOp::new(
22084 day_start.clone(),
22085 day_last_start,
22086 )));
22087 let both_cond =
22088 Expression::And(Box::new(BinaryOp::new(cond1, cond2)));
22089 let day_diff = Expression::Sub(Box::new(BinaryOp::new(
22090 day_end, day_start,
22091 )));
22092 let day_diff_paren = Expression::Paren(Box::new(
22093 crate::expressions::Paren {
22094 this: day_diff,
22095 trailing_comments: Vec::new(),
22096 },
22097 ));
22098 let frac = Expression::Div(Box::new(BinaryOp::new(
22099 day_diff_paren,
22100 Expression::Literal(Box::new(Literal::Number(
22101 "31.0".to_string(),
22102 ))),
22103 )));
22104 let case_expr = Expression::Case(Box::new(Case {
22105 operand: None,
22106 whens: vec![(both_cond, Expression::number(0))],
22107 else_: Some(frac),
22108 comments: Vec::new(),
22109 inferred_type: None,
22110 }));
22111 Ok(Expression::Add(Box::new(BinaryOp::new(dd, case_expr))))
22112 }
22113 DialectType::Snowflake | DialectType::Redshift => {
22114 let mut args = f.args;
22115 let end_date = args.remove(0);
22116 let start_date = args.remove(0);
22117 let unit = Expression::Identifier(Identifier::new("MONTH"));
22118 Ok(Expression::Function(Box::new(Function::new(
22119 "DATEDIFF".to_string(),
22120 vec![unit, start_date, end_date],
22121 ))))
22122 }
22123 DialectType::Presto
22124 | DialectType::Trino
22125 | DialectType::Athena => {
22126 let mut args = f.args;
22127 let end_date = args.remove(0);
22128 let start_date = args.remove(0);
22129 Ok(Expression::Function(Box::new(Function::new(
22130 "DATE_DIFF".to_string(),
22131 vec![Expression::string("MONTH"), start_date, end_date],
22132 ))))
22133 }
22134 _ => Ok(Expression::Function(f)),
22135 }
22136 }
22137 // MONTHS_BETWEEN(end, start, roundOff) - 3-arg form (Spark-specific)
22138 // Drop the roundOff arg for non-Spark targets, keep it for Spark
22139 "MONTHS_BETWEEN" if f.args.len() == 3 => {
22140 match target {
22141 DialectType::Spark | DialectType::Databricks => {
22142 Ok(Expression::Function(f))
22143 }
22144 _ => {
22145 // Drop the 3rd arg and delegate to the 2-arg logic
22146 let mut args = f.args;
22147 let end_date = args.remove(0);
22148 let start_date = args.remove(0);
22149 // Re-create as 2-arg and process
22150 let f2 = Function::new(
22151 "MONTHS_BETWEEN".to_string(),
22152 vec![end_date, start_date],
22153 );
22154 let e2 = Expression::Function(Box::new(f2));
22155 Self::cross_dialect_normalize(e2, source, target)
22156 }
22157 }
22158 }
22159 // TO_TIMESTAMP(x) with 1 arg -> CAST(x AS TIMESTAMP) for most targets
22160 "TO_TIMESTAMP"
22161 if f.args.len() == 1
22162 && matches!(
22163 source,
22164 DialectType::Spark
22165 | DialectType::Databricks
22166 | DialectType::Hive
22167 ) =>
22168 {
22169 let arg = f.args.into_iter().next().unwrap();
22170 Ok(Expression::Cast(Box::new(Cast {
22171 this: arg,
22172 to: DataType::Timestamp {
22173 timezone: false,
22174 precision: None,
22175 },
22176 trailing_comments: vec![],
22177 double_colon_syntax: false,
22178 format: None,
22179 default: None,
22180 inferred_type: None,
22181 })))
22182 }
22183 // STRING(x) -> CAST(x AS STRING) for Spark target
22184 "STRING"
22185 if f.args.len() == 1
22186 && matches!(
22187 source,
22188 DialectType::Spark | DialectType::Databricks
22189 ) =>
22190 {
22191 let arg = f.args.into_iter().next().unwrap();
22192 let dt = match target {
22193 DialectType::Spark
22194 | DialectType::Databricks
22195 | DialectType::Hive => DataType::Custom {
22196 name: "STRING".to_string(),
22197 },
22198 _ => DataType::Text,
22199 };
22200 Ok(Expression::Cast(Box::new(Cast {
22201 this: arg,
22202 to: dt,
22203 trailing_comments: vec![],
22204 double_colon_syntax: false,
22205 format: None,
22206 default: None,
22207 inferred_type: None,
22208 })))
22209 }
22210 // LOGICAL_OR(x) -> BOOL_OR(x) for Spark target
22211 "LOGICAL_OR" if f.args.len() == 1 => {
22212 let name = match target {
22213 DialectType::Spark | DialectType::Databricks => "BOOL_OR",
22214 _ => "LOGICAL_OR",
22215 };
22216 Ok(Expression::Function(Box::new(Function::new(
22217 name.to_string(),
22218 f.args,
22219 ))))
22220 }
22221 // SPLIT(x, pattern) from Spark -> STR_SPLIT_REGEX for DuckDB, REGEXP_SPLIT for Presto
22222 "SPLIT"
22223 if f.args.len() == 2
22224 && matches!(
22225 source,
22226 DialectType::Spark
22227 | DialectType::Databricks
22228 | DialectType::Hive
22229 ) =>
22230 {
22231 let name = match target {
22232 DialectType::DuckDB => "STR_SPLIT_REGEX",
22233 DialectType::Presto
22234 | DialectType::Trino
22235 | DialectType::Athena => "REGEXP_SPLIT",
22236 DialectType::Spark
22237 | DialectType::Databricks
22238 | DialectType::Hive => "SPLIT",
22239 _ => "SPLIT",
22240 };
22241 Ok(Expression::Function(Box::new(Function::new(
22242 name.to_string(),
22243 f.args,
22244 ))))
22245 }
22246 // TRY_ELEMENT_AT -> ELEMENT_AT for Presto, array[idx] for DuckDB
22247 "TRY_ELEMENT_AT" if f.args.len() == 2 => match target {
22248 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
22249 Ok(Expression::Function(Box::new(Function::new(
22250 "ELEMENT_AT".to_string(),
22251 f.args,
22252 ))))
22253 }
22254 DialectType::DuckDB => {
22255 let mut args = f.args;
22256 let arr = args.remove(0);
22257 let idx = args.remove(0);
22258 Ok(Expression::Subscript(Box::new(
22259 crate::expressions::Subscript {
22260 this: arr,
22261 index: idx,
22262 },
22263 )))
22264 }
22265 _ => Ok(Expression::Function(f)),
22266 },
22267 // ARRAY_FILTER(arr, lambda) -> FILTER for Hive/Spark/Presto, LIST_FILTER for DuckDB
22268 "ARRAY_FILTER" if f.args.len() == 2 => {
22269 let name = match target {
22270 DialectType::DuckDB => "LIST_FILTER",
22271 DialectType::StarRocks => "ARRAY_FILTER",
22272 _ => "FILTER",
22273 };
22274 Ok(Expression::Function(Box::new(Function::new(
22275 name.to_string(),
22276 f.args,
22277 ))))
22278 }
22279 // FILTER(arr, lambda) -> ARRAY_FILTER for StarRocks, LIST_FILTER for DuckDB
22280 "FILTER" if f.args.len() == 2 => {
22281 let name = match target {
22282 DialectType::DuckDB => "LIST_FILTER",
22283 DialectType::StarRocks => "ARRAY_FILTER",
22284 _ => "FILTER",
22285 };
22286 Ok(Expression::Function(Box::new(Function::new(
22287 name.to_string(),
22288 f.args,
22289 ))))
22290 }
22291 // REDUCE(arr, init, lambda1, lambda2) -> AGGREGATE for Spark
22292 "REDUCE" if f.args.len() >= 3 => {
22293 let name = match target {
22294 DialectType::Spark | DialectType::Databricks => "AGGREGATE",
22295 _ => "REDUCE",
22296 };
22297 Ok(Expression::Function(Box::new(Function::new(
22298 name.to_string(),
22299 f.args,
22300 ))))
22301 }
22302 // CURRENT_SCHEMA() -> dialect-specific
22303 "CURRENT_SCHEMA" => {
22304 match target {
22305 DialectType::PostgreSQL => {
22306 // PostgreSQL: CURRENT_SCHEMA (no parens)
22307 Ok(Expression::Function(Box::new(Function {
22308 name: "CURRENT_SCHEMA".to_string(),
22309 args: vec![],
22310 distinct: false,
22311 trailing_comments: vec![],
22312 use_bracket_syntax: false,
22313 no_parens: true,
22314 quoted: false,
22315 span: None,
22316 inferred_type: None,
22317 })))
22318 }
22319 DialectType::MySQL
22320 | DialectType::Doris
22321 | DialectType::StarRocks => Ok(Expression::Function(Box::new(
22322 Function::new("SCHEMA".to_string(), vec![]),
22323 ))),
22324 DialectType::TSQL => Ok(Expression::Function(Box::new(
22325 Function::new("SCHEMA_NAME".to_string(), vec![]),
22326 ))),
22327 DialectType::SQLite => Ok(Expression::Literal(Box::new(
22328 Literal::String("main".to_string()),
22329 ))),
22330 _ => Ok(Expression::Function(f)),
22331 }
22332 }
22333 // LTRIM(str, chars) 2-arg -> TRIM(LEADING chars FROM str) for Spark/Hive/Databricks/ClickHouse
22334 "LTRIM" if f.args.len() == 2 => match target {
22335 DialectType::Spark
22336 | DialectType::Hive
22337 | DialectType::Databricks
22338 | DialectType::ClickHouse => {
22339 let mut args = f.args;
22340 let str_expr = args.remove(0);
22341 let chars = args.remove(0);
22342 Ok(Expression::Trim(Box::new(crate::expressions::TrimFunc {
22343 this: str_expr,
22344 characters: Some(chars),
22345 position: crate::expressions::TrimPosition::Leading,
22346 sql_standard_syntax: true,
22347 position_explicit: true,
22348 })))
22349 }
22350 _ => Ok(Expression::Function(f)),
22351 },
22352 // RTRIM(str, chars) 2-arg -> TRIM(TRAILING chars FROM str) for Spark/Hive/Databricks/ClickHouse
22353 "RTRIM" if f.args.len() == 2 => match target {
22354 DialectType::Spark
22355 | DialectType::Hive
22356 | DialectType::Databricks
22357 | DialectType::ClickHouse => {
22358 let mut args = f.args;
22359 let str_expr = args.remove(0);
22360 let chars = args.remove(0);
22361 Ok(Expression::Trim(Box::new(crate::expressions::TrimFunc {
22362 this: str_expr,
22363 characters: Some(chars),
22364 position: crate::expressions::TrimPosition::Trailing,
22365 sql_standard_syntax: true,
22366 position_explicit: true,
22367 })))
22368 }
22369 _ => Ok(Expression::Function(f)),
22370 },
22371 // ARRAY_REVERSE(x) -> arrayReverse(x) for ClickHouse
22372 "ARRAY_REVERSE" if f.args.len() == 1 => match target {
22373 DialectType::ClickHouse => {
22374 let mut new_f = *f;
22375 new_f.name = "arrayReverse".to_string();
22376 Ok(Expression::Function(Box::new(new_f)))
22377 }
22378 _ => Ok(Expression::Function(f)),
22379 },
22380 // UUID() -> NEWID() for TSQL
22381 "UUID" if f.args.is_empty() => match target {
22382 DialectType::TSQL | DialectType::Fabric => {
22383 Ok(Expression::Function(Box::new(Function::new(
22384 "NEWID".to_string(),
22385 vec![],
22386 ))))
22387 }
22388 _ => Ok(Expression::Function(f)),
22389 },
22390 // FARM_FINGERPRINT(x) -> farmFingerprint64(x) for ClickHouse, FARMFINGERPRINT64(x) for Redshift
22391 "FARM_FINGERPRINT" if f.args.len() == 1 => match target {
22392 DialectType::ClickHouse => {
22393 let mut new_f = *f;
22394 new_f.name = "farmFingerprint64".to_string();
22395 Ok(Expression::Function(Box::new(new_f)))
22396 }
22397 DialectType::Redshift => {
22398 let mut new_f = *f;
22399 new_f.name = "FARMFINGERPRINT64".to_string();
22400 Ok(Expression::Function(Box::new(new_f)))
22401 }
22402 _ => Ok(Expression::Function(f)),
22403 },
22404 // JSON_KEYS(x) -> JSON_OBJECT_KEYS(x) for Databricks/Spark, OBJECT_KEYS(x) for Snowflake
22405 "JSON_KEYS" => match target {
22406 DialectType::Databricks | DialectType::Spark => {
22407 let mut new_f = *f;
22408 new_f.name = "JSON_OBJECT_KEYS".to_string();
22409 Ok(Expression::Function(Box::new(new_f)))
22410 }
22411 DialectType::Snowflake => {
22412 let mut new_f = *f;
22413 new_f.name = "OBJECT_KEYS".to_string();
22414 Ok(Expression::Function(Box::new(new_f)))
22415 }
22416 _ => Ok(Expression::Function(f)),
22417 },
22418 // WEEKOFYEAR(x) -> WEEKISO(x) for Snowflake
22419 "WEEKOFYEAR" => match target {
22420 DialectType::Snowflake => {
22421 let mut new_f = *f;
22422 new_f.name = "WEEKISO".to_string();
22423 Ok(Expression::Function(Box::new(new_f)))
22424 }
22425 _ => Ok(Expression::Function(f)),
22426 },
22427 // FORMAT(fmt, args...) -> FORMAT_STRING(fmt, args...) for Databricks
22428 "FORMAT"
22429 if f.args.len() >= 2 && matches!(source, DialectType::Generic) =>
22430 {
22431 match target {
22432 DialectType::Databricks | DialectType::Spark => {
22433 let mut new_f = *f;
22434 new_f.name = "FORMAT_STRING".to_string();
22435 Ok(Expression::Function(Box::new(new_f)))
22436 }
22437 _ => Ok(Expression::Function(f)),
22438 }
22439 }
22440 // CONCAT_WS from Generic is null-propagating in SQLGlot fixtures.
22441 // Trino also requires non-separator arguments cast to VARCHAR.
22442 "CONCAT_WS" if f.args.len() >= 2 => {
22443 fn concat_ws_null_case(
22444 args: Vec<Expression>,
22445 else_expr: Expression,
22446 ) -> Expression {
22447 let mut null_checks = args.iter().cloned().map(|arg| {
22448 Expression::IsNull(Box::new(crate::expressions::IsNull {
22449 this: arg,
22450 not: false,
22451 postfix_form: false,
22452 }))
22453 });
22454 let first_null_check = null_checks
22455 .next()
22456 .expect("CONCAT_WS with >= 2 args must yield a null check");
22457 let null_check =
22458 null_checks.fold(first_null_check, |left, right| {
22459 Expression::Or(Box::new(BinaryOp {
22460 left,
22461 right,
22462 left_comments: Vec::new(),
22463 operator_comments: Vec::new(),
22464 trailing_comments: Vec::new(),
22465 inferred_type: None,
22466 }))
22467 });
22468 Expression::Case(Box::new(Case {
22469 operand: None,
22470 whens: vec![(null_check, Expression::Null(Null))],
22471 else_: Some(else_expr),
22472 comments: vec![],
22473 inferred_type: None,
22474 }))
22475 }
22476
22477 match target {
22478 DialectType::Trino
22479 if matches!(source, DialectType::Generic) =>
22480 {
22481 let original_args = f.args.clone();
22482 let mut args = f.args;
22483 let sep = args.remove(0);
22484 let cast_args: Vec<Expression> = args
22485 .into_iter()
22486 .map(|a| {
22487 Expression::Cast(Box::new(Cast {
22488 this: a,
22489 to: DataType::VarChar {
22490 length: None,
22491 parenthesized_length: false,
22492 },
22493 double_colon_syntax: false,
22494 trailing_comments: Vec::new(),
22495 format: None,
22496 default: None,
22497 inferred_type: None,
22498 }))
22499 })
22500 .collect();
22501 let mut new_args = vec![sep];
22502 new_args.extend(cast_args);
22503 let else_expr = Expression::Function(Box::new(
22504 Function::new("CONCAT_WS".to_string(), new_args),
22505 ));
22506 Ok(concat_ws_null_case(original_args, else_expr))
22507 }
22508 DialectType::Presto
22509 | DialectType::Trino
22510 | DialectType::Athena => {
22511 let mut args = f.args;
22512 let sep = args.remove(0);
22513 let cast_args: Vec<Expression> = args
22514 .into_iter()
22515 .map(|a| {
22516 Expression::Cast(Box::new(Cast {
22517 this: a,
22518 to: DataType::VarChar {
22519 length: None,
22520 parenthesized_length: false,
22521 },
22522 double_colon_syntax: false,
22523 trailing_comments: Vec::new(),
22524 format: None,
22525 default: None,
22526 inferred_type: None,
22527 }))
22528 })
22529 .collect();
22530 let mut new_args = vec![sep];
22531 new_args.extend(cast_args);
22532 Ok(Expression::Function(Box::new(Function::new(
22533 "CONCAT_WS".to_string(),
22534 new_args,
22535 ))))
22536 }
22537 DialectType::Spark
22538 | DialectType::Hive
22539 | DialectType::DuckDB
22540 if matches!(source, DialectType::Generic) =>
22541 {
22542 let args = f.args;
22543 let else_expr = Expression::Function(Box::new(
22544 Function::new("CONCAT_WS".to_string(), args.clone()),
22545 ));
22546 Ok(concat_ws_null_case(args, else_expr))
22547 }
22548 _ => Ok(Expression::Function(f)),
22549 }
22550 }
22551 // ARRAY_SLICE(x, start, end) -> SLICE(x, start, end) for Presto/Trino/Databricks, arraySlice for ClickHouse
22552 "ARRAY_SLICE" if f.args.len() >= 2 => match target {
22553 DialectType::DuckDB
22554 if f.args.len() == 3
22555 && matches!(source, DialectType::Snowflake) =>
22556 {
22557 // Snowflake ARRAY_SLICE (0-indexed, exclusive end)
22558 // -> DuckDB ARRAY_SLICE (1-indexed, inclusive end)
22559 let mut args = f.args;
22560 let arr = args.remove(0);
22561 let start = args.remove(0);
22562 let end = args.remove(0);
22563
22564 // CASE WHEN start >= 0 THEN start + 1 ELSE start END
22565 let adjusted_start = Expression::Case(Box::new(Case {
22566 operand: None,
22567 whens: vec![(
22568 Expression::Gte(Box::new(BinaryOp {
22569 left: start.clone(),
22570 right: Expression::number(0),
22571 left_comments: vec![],
22572 operator_comments: vec![],
22573 trailing_comments: vec![],
22574 inferred_type: None,
22575 })),
22576 Expression::Add(Box::new(BinaryOp {
22577 left: start.clone(),
22578 right: Expression::number(1),
22579 left_comments: vec![],
22580 operator_comments: vec![],
22581 trailing_comments: vec![],
22582 inferred_type: None,
22583 })),
22584 )],
22585 else_: Some(start),
22586 comments: vec![],
22587 inferred_type: None,
22588 }));
22589
22590 // CASE WHEN end < 0 THEN end - 1 ELSE end END
22591 let adjusted_end = Expression::Case(Box::new(Case {
22592 operand: None,
22593 whens: vec![(
22594 Expression::Lt(Box::new(BinaryOp {
22595 left: end.clone(),
22596 right: Expression::number(0),
22597 left_comments: vec![],
22598 operator_comments: vec![],
22599 trailing_comments: vec![],
22600 inferred_type: None,
22601 })),
22602 Expression::Sub(Box::new(BinaryOp {
22603 left: end.clone(),
22604 right: Expression::number(1),
22605 left_comments: vec![],
22606 operator_comments: vec![],
22607 trailing_comments: vec![],
22608 inferred_type: None,
22609 })),
22610 )],
22611 else_: Some(end),
22612 comments: vec![],
22613 inferred_type: None,
22614 }));
22615
22616 Ok(Expression::Function(Box::new(Function::new(
22617 "ARRAY_SLICE".to_string(),
22618 vec![arr, adjusted_start, adjusted_end],
22619 ))))
22620 }
22621 DialectType::Presto
22622 | DialectType::Trino
22623 | DialectType::Athena
22624 | DialectType::Databricks
22625 | DialectType::Spark => {
22626 let mut new_f = *f;
22627 new_f.name = "SLICE".to_string();
22628 Ok(Expression::Function(Box::new(new_f)))
22629 }
22630 DialectType::ClickHouse => {
22631 let mut new_f = *f;
22632 new_f.name = "arraySlice".to_string();
22633 Ok(Expression::Function(Box::new(new_f)))
22634 }
22635 _ => Ok(Expression::Function(f)),
22636 },
22637 // ARRAY_PREPEND(arr, x) -> LIST_PREPEND(x, arr) for DuckDB (swap args)
22638 "ARRAY_PREPEND" if f.args.len() == 2 => match target {
22639 DialectType::DuckDB => {
22640 let mut args = f.args;
22641 let arr = args.remove(0);
22642 let val = args.remove(0);
22643 Ok(Expression::Function(Box::new(Function::new(
22644 "LIST_PREPEND".to_string(),
22645 vec![val, arr],
22646 ))))
22647 }
22648 _ => Ok(Expression::Function(f)),
22649 },
22650 // ARRAY_REMOVE(arr, target) -> dialect-specific
22651 "ARRAY_REMOVE" if f.args.len() == 2 => {
22652 match target {
22653 DialectType::DuckDB => {
22654 let mut args = f.args;
22655 let arr = args.remove(0);
22656 let target_val = args.remove(0);
22657 let u_id = crate::expressions::Identifier::new("_u");
22658 // LIST_FILTER(arr, _u -> _u <> target)
22659 let lambda = Expression::Lambda(Box::new(
22660 crate::expressions::LambdaExpr {
22661 parameters: vec![u_id.clone()],
22662 body: Expression::Neq(Box::new(BinaryOp {
22663 left: Expression::Identifier(u_id),
22664 right: target_val,
22665 left_comments: Vec::new(),
22666 operator_comments: Vec::new(),
22667 trailing_comments: Vec::new(),
22668 inferred_type: None,
22669 })),
22670 colon: false,
22671 parameter_types: Vec::new(),
22672 },
22673 ));
22674 Ok(Expression::Function(Box::new(Function::new(
22675 "LIST_FILTER".to_string(),
22676 vec![arr, lambda],
22677 ))))
22678 }
22679 DialectType::ClickHouse => {
22680 let mut args = f.args;
22681 let arr = args.remove(0);
22682 let target_val = args.remove(0);
22683 let u_id = crate::expressions::Identifier::new("_u");
22684 // arrayFilter(_u -> _u <> target, arr)
22685 let lambda = Expression::Lambda(Box::new(
22686 crate::expressions::LambdaExpr {
22687 parameters: vec![u_id.clone()],
22688 body: Expression::Neq(Box::new(BinaryOp {
22689 left: Expression::Identifier(u_id),
22690 right: target_val,
22691 left_comments: Vec::new(),
22692 operator_comments: Vec::new(),
22693 trailing_comments: Vec::new(),
22694 inferred_type: None,
22695 })),
22696 colon: false,
22697 parameter_types: Vec::new(),
22698 },
22699 ));
22700 Ok(Expression::Function(Box::new(Function::new(
22701 "arrayFilter".to_string(),
22702 vec![lambda, arr],
22703 ))))
22704 }
22705 DialectType::BigQuery => {
22706 // ARRAY(SELECT _u FROM UNNEST(the_array) AS _u WHERE _u <> target)
22707 let mut args = f.args;
22708 let arr = args.remove(0);
22709 let target_val = args.remove(0);
22710 let u_id = crate::expressions::Identifier::new("_u");
22711 let u_col = Expression::Column(Box::new(
22712 crate::expressions::Column {
22713 name: u_id.clone(),
22714 table: None,
22715 join_mark: false,
22716 trailing_comments: Vec::new(),
22717 span: None,
22718 inferred_type: None,
22719 },
22720 ));
22721 // UNNEST(the_array) AS _u
22722 let unnest_expr = Expression::Unnest(Box::new(
22723 crate::expressions::UnnestFunc {
22724 this: arr,
22725 expressions: Vec::new(),
22726 with_ordinality: false,
22727 alias: None,
22728 offset_alias: None,
22729 },
22730 ));
22731 let aliased_unnest = Expression::Alias(Box::new(
22732 crate::expressions::Alias {
22733 this: unnest_expr,
22734 alias: u_id.clone(),
22735 column_aliases: Vec::new(),
22736 alias_explicit_as: false,
22737 alias_keyword: None,
22738 pre_alias_comments: Vec::new(),
22739 trailing_comments: Vec::new(),
22740 inferred_type: None,
22741 },
22742 ));
22743 // _u <> target
22744 let where_cond = Expression::Neq(Box::new(BinaryOp {
22745 left: u_col.clone(),
22746 right: target_val,
22747 left_comments: Vec::new(),
22748 operator_comments: Vec::new(),
22749 trailing_comments: Vec::new(),
22750 inferred_type: None,
22751 }));
22752 // SELECT _u FROM UNNEST(the_array) AS _u WHERE _u <> target
22753 let subquery = Expression::Select(Box::new(
22754 crate::expressions::Select::new()
22755 .column(u_col)
22756 .from(aliased_unnest)
22757 .where_(where_cond),
22758 ));
22759 // ARRAY(subquery) -- use ArrayFunc with subquery as single element
22760 Ok(Expression::ArrayFunc(Box::new(
22761 crate::expressions::ArrayConstructor {
22762 expressions: vec![subquery],
22763 bracket_notation: false,
22764 use_list_keyword: false,
22765 },
22766 )))
22767 }
22768 _ => Ok(Expression::Function(f)),
22769 }
22770 }
22771 // PARSE_JSON(str) -> remove for SQLite/Doris (just use the string literal)
22772 "PARSE_JSON" if f.args.len() == 1 => {
22773 match target {
22774 DialectType::SQLite
22775 | DialectType::Doris
22776 | DialectType::MySQL
22777 | DialectType::StarRocks => {
22778 // Strip PARSE_JSON, return the inner argument
22779 Ok(f.args.into_iter().next().unwrap())
22780 }
22781 _ => Ok(Expression::Function(f)),
22782 }
22783 }
22784 // JSON_REMOVE(PARSE_JSON(str), path...) -> for SQLite strip PARSE_JSON
22785 // This is handled by PARSE_JSON stripping above; JSON_REMOVE is passed through
22786 "JSON_REMOVE" => Ok(Expression::Function(f)),
22787 // JSON_SET(PARSE_JSON(str), path, PARSE_JSON(val)) -> for SQLite strip PARSE_JSON
22788 // This is handled by PARSE_JSON stripping above; JSON_SET is passed through
22789 "JSON_SET" => Ok(Expression::Function(f)),
22790 // DECODE(x, search1, result1, ..., default) -> CASE WHEN
22791 // Behavior per search value type:
22792 // NULL literal -> CASE WHEN x IS NULL THEN result
22793 // Literal (number, string, bool) -> CASE WHEN x = literal THEN result
22794 // Non-literal (column, expr) -> CASE WHEN x = search OR (x IS NULL AND search IS NULL) THEN result
22795 "DECODE" if f.args.len() >= 3 => {
22796 // Keep as DECODE for targets that support it natively
22797 let keep_as_decode = matches!(
22798 target,
22799 DialectType::Oracle
22800 | DialectType::Snowflake
22801 | DialectType::Redshift
22802 | DialectType::Teradata
22803 | DialectType::Spark
22804 | DialectType::Databricks
22805 );
22806 if keep_as_decode {
22807 return Ok(Expression::Function(f));
22808 }
22809
22810 let mut args = f.args;
22811 let this_expr = args.remove(0);
22812 let mut pairs = Vec::new();
22813 let mut default = None;
22814 let mut i = 0;
22815 while i + 1 < args.len() {
22816 pairs.push((args[i].clone(), args[i + 1].clone()));
22817 i += 2;
22818 }
22819 if i < args.len() {
22820 default = Some(args[i].clone());
22821 }
22822 // Helper: check if expression is a literal value
22823 fn is_literal(e: &Expression) -> bool {
22824 matches!(
22825 e,
22826 Expression::Literal(_)
22827 | Expression::Boolean(_)
22828 | Expression::Neg(_)
22829 )
22830 }
22831 let whens: Vec<(Expression, Expression)> = pairs
22832 .into_iter()
22833 .map(|(search, result)| {
22834 if matches!(&search, Expression::Null(_)) {
22835 // NULL search -> IS NULL
22836 let condition = Expression::Is(Box::new(BinaryOp {
22837 left: this_expr.clone(),
22838 right: Expression::Null(crate::expressions::Null),
22839 left_comments: Vec::new(),
22840 operator_comments: Vec::new(),
22841 trailing_comments: Vec::new(),
22842 inferred_type: None,
22843 }));
22844 (condition, result)
22845 } else if is_literal(&search) {
22846 // Literal search -> simple equality
22847 let eq = Expression::Eq(Box::new(BinaryOp {
22848 left: this_expr.clone(),
22849 right: search,
22850 left_comments: Vec::new(),
22851 operator_comments: Vec::new(),
22852 trailing_comments: Vec::new(),
22853 inferred_type: None,
22854 }));
22855 (eq, result)
22856 } else {
22857 // Non-literal (column ref, expression) -> null-safe comparison
22858 let needs_paren = matches!(
22859 &search,
22860 Expression::Eq(_)
22861 | Expression::Neq(_)
22862 | Expression::Gt(_)
22863 | Expression::Gte(_)
22864 | Expression::Lt(_)
22865 | Expression::Lte(_)
22866 );
22867 let search_for_eq = if needs_paren {
22868 Expression::Paren(Box::new(
22869 crate::expressions::Paren {
22870 this: search.clone(),
22871 trailing_comments: Vec::new(),
22872 },
22873 ))
22874 } else {
22875 search.clone()
22876 };
22877 let eq = Expression::Eq(Box::new(BinaryOp {
22878 left: this_expr.clone(),
22879 right: search_for_eq,
22880 left_comments: Vec::new(),
22881 operator_comments: Vec::new(),
22882 trailing_comments: Vec::new(),
22883 inferred_type: None,
22884 }));
22885 let search_for_null = if needs_paren {
22886 Expression::Paren(Box::new(
22887 crate::expressions::Paren {
22888 this: search.clone(),
22889 trailing_comments: Vec::new(),
22890 },
22891 ))
22892 } else {
22893 search.clone()
22894 };
22895 let x_is_null = Expression::Is(Box::new(BinaryOp {
22896 left: this_expr.clone(),
22897 right: Expression::Null(crate::expressions::Null),
22898 left_comments: Vec::new(),
22899 operator_comments: Vec::new(),
22900 trailing_comments: Vec::new(),
22901 inferred_type: None,
22902 }));
22903 let s_is_null = Expression::Is(Box::new(BinaryOp {
22904 left: search_for_null,
22905 right: Expression::Null(crate::expressions::Null),
22906 left_comments: Vec::new(),
22907 operator_comments: Vec::new(),
22908 trailing_comments: Vec::new(),
22909 inferred_type: None,
22910 }));
22911 let both_null = Expression::And(Box::new(BinaryOp {
22912 left: x_is_null,
22913 right: s_is_null,
22914 left_comments: Vec::new(),
22915 operator_comments: Vec::new(),
22916 trailing_comments: Vec::new(),
22917 inferred_type: None,
22918 }));
22919 let condition = Expression::Or(Box::new(BinaryOp {
22920 left: eq,
22921 right: Expression::Paren(Box::new(
22922 crate::expressions::Paren {
22923 this: both_null,
22924 trailing_comments: Vec::new(),
22925 },
22926 )),
22927 left_comments: Vec::new(),
22928 operator_comments: Vec::new(),
22929 trailing_comments: Vec::new(),
22930 inferred_type: None,
22931 }));
22932 (condition, result)
22933 }
22934 })
22935 .collect();
22936 Ok(Expression::Case(Box::new(Case {
22937 operand: None,
22938 whens,
22939 else_: default,
22940 comments: Vec::new(),
22941 inferred_type: None,
22942 })))
22943 }
22944 // LEVENSHTEIN(a, b, ...) -> dialect-specific
22945 "LEVENSHTEIN" => {
22946 match target {
22947 DialectType::BigQuery => {
22948 let mut new_f = *f;
22949 new_f.name = "EDIT_DISTANCE".to_string();
22950 Ok(Expression::Function(Box::new(new_f)))
22951 }
22952 DialectType::Drill => {
22953 let mut new_f = *f;
22954 new_f.name = "LEVENSHTEIN_DISTANCE".to_string();
22955 Ok(Expression::Function(Box::new(new_f)))
22956 }
22957 DialectType::PostgreSQL if f.args.len() == 6 => {
22958 // PostgreSQL: LEVENSHTEIN(src, tgt, ins, del, sub, max_d) -> LEVENSHTEIN_LESS_EQUAL
22959 // 2 args: basic, 5 args: with costs, 6 args: with costs + max_distance
22960 let mut new_f = *f;
22961 new_f.name = "LEVENSHTEIN_LESS_EQUAL".to_string();
22962 Ok(Expression::Function(Box::new(new_f)))
22963 }
22964 _ => Ok(Expression::Function(f)),
22965 }
22966 }
22967 // ARRAY_MAX(x) -> arrayMax(x) for ClickHouse, LIST_MAX(x) for DuckDB
22968 "ARRAY_MAX" => {
22969 let name = match target {
22970 DialectType::ClickHouse => "arrayMax",
22971 DialectType::DuckDB => "LIST_MAX",
22972 _ => "ARRAY_MAX",
22973 };
22974 let mut new_f = *f;
22975 new_f.name = name.to_string();
22976 Ok(Expression::Function(Box::new(new_f)))
22977 }
22978 // ARRAY_MIN(x) -> arrayMin(x) for ClickHouse, LIST_MIN(x) for DuckDB
22979 "ARRAY_MIN" => {
22980 let name = match target {
22981 DialectType::ClickHouse => "arrayMin",
22982 DialectType::DuckDB => "LIST_MIN",
22983 _ => "ARRAY_MIN",
22984 };
22985 let mut new_f = *f;
22986 new_f.name = name.to_string();
22987 Ok(Expression::Function(Box::new(new_f)))
22988 }
22989 // JAROWINKLER_SIMILARITY(a, b) -> jaroWinklerSimilarity(UPPER(a), UPPER(b)) for ClickHouse
22990 // -> JARO_WINKLER_SIMILARITY(UPPER(a), UPPER(b)) for DuckDB
22991 "JAROWINKLER_SIMILARITY" if f.args.len() == 2 => {
22992 let mut args = f.args;
22993 let b = args.pop().unwrap();
22994 let a = args.pop().unwrap();
22995 match target {
22996 DialectType::ClickHouse => {
22997 let upper_a = Expression::Upper(Box::new(
22998 crate::expressions::UnaryFunc::new(a),
22999 ));
23000 let upper_b = Expression::Upper(Box::new(
23001 crate::expressions::UnaryFunc::new(b),
23002 ));
23003 Ok(Expression::Function(Box::new(Function::new(
23004 "jaroWinklerSimilarity".to_string(),
23005 vec![upper_a, upper_b],
23006 ))))
23007 }
23008 DialectType::DuckDB => {
23009 let upper_a = Expression::Upper(Box::new(
23010 crate::expressions::UnaryFunc::new(a),
23011 ));
23012 let upper_b = Expression::Upper(Box::new(
23013 crate::expressions::UnaryFunc::new(b),
23014 ));
23015 let score = Expression::Function(Box::new(Function::new(
23016 "JARO_WINKLER_SIMILARITY".to_string(),
23017 vec![upper_a, upper_b],
23018 )));
23019 let scaled = Expression::Mul(Box::new(BinaryOp {
23020 left: score,
23021 right: Expression::number(100),
23022 left_comments: Vec::new(),
23023 operator_comments: Vec::new(),
23024 trailing_comments: Vec::new(),
23025 inferred_type: None,
23026 }));
23027 Ok(Expression::Cast(Box::new(Cast {
23028 this: scaled,
23029 to: DataType::Int {
23030 length: None,
23031 integer_spelling: false,
23032 },
23033 trailing_comments: Vec::new(),
23034 double_colon_syntax: false,
23035 format: None,
23036 default: None,
23037 inferred_type: None,
23038 })))
23039 }
23040 _ => Ok(Expression::Function(Box::new(Function::new(
23041 "JAROWINKLER_SIMILARITY".to_string(),
23042 vec![a, b],
23043 )))),
23044 }
23045 }
23046 // CURRENT_SCHEMAS(x) -> CURRENT_SCHEMAS() for Snowflake (drop arg)
23047 "CURRENT_SCHEMAS" => match target {
23048 DialectType::Snowflake => Ok(Expression::Function(Box::new(
23049 Function::new("CURRENT_SCHEMAS".to_string(), vec![]),
23050 ))),
23051 _ => Ok(Expression::Function(f)),
23052 },
23053 // TRUNC/TRUNCATE (numeric) -> dialect-specific
23054 "TRUNC" | "TRUNCATE" if f.args.len() <= 2 => {
23055 match target {
23056 DialectType::TSQL | DialectType::Fabric => {
23057 // ROUND(x, decimals, 1) - the 1 flag means truncation
23058 let mut args = f.args;
23059 let this = if args.is_empty() {
23060 return Ok(Expression::Function(Box::new(
23061 Function::new("TRUNC".to_string(), args),
23062 )));
23063 } else {
23064 args.remove(0)
23065 };
23066 let decimals = if args.is_empty() {
23067 Expression::Literal(Box::new(Literal::Number(
23068 "0".to_string(),
23069 )))
23070 } else {
23071 args.remove(0)
23072 };
23073 Ok(Expression::Function(Box::new(Function::new(
23074 "ROUND".to_string(),
23075 vec![
23076 this,
23077 decimals,
23078 Expression::Literal(Box::new(Literal::Number(
23079 "1".to_string(),
23080 ))),
23081 ],
23082 ))))
23083 }
23084 DialectType::Presto
23085 | DialectType::Trino
23086 | DialectType::Athena => {
23087 // TRUNCATE(x, decimals)
23088 let mut new_f = *f;
23089 new_f.name = "TRUNCATE".to_string();
23090 Ok(Expression::Function(Box::new(new_f)))
23091 }
23092 DialectType::MySQL
23093 | DialectType::SingleStore
23094 | DialectType::TiDB => {
23095 // TRUNCATE(x, decimals)
23096 let mut new_f = *f;
23097 new_f.name = "TRUNCATE".to_string();
23098 Ok(Expression::Function(Box::new(new_f)))
23099 }
23100 DialectType::DuckDB => {
23101 // DuckDB supports TRUNC(x, decimals) — preserve both args
23102 let mut args = f.args;
23103 // Snowflake fractions_supported: wrap non-INT decimals in CAST(... AS INT)
23104 if args.len() == 2
23105 && matches!(source, DialectType::Snowflake)
23106 {
23107 let decimals = args.remove(1);
23108 let is_int = matches!(&decimals, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(_)))
23109 || matches!(&decimals, Expression::Cast(c) if matches!(c.to, DataType::Int { .. } | DataType::SmallInt { .. } | DataType::BigInt { .. } | DataType::TinyInt { .. }));
23110 let wrapped = if !is_int {
23111 Expression::Cast(Box::new(
23112 crate::expressions::Cast {
23113 this: decimals,
23114 to: DataType::Int {
23115 length: None,
23116 integer_spelling: false,
23117 },
23118 double_colon_syntax: false,
23119 trailing_comments: Vec::new(),
23120 format: None,
23121 default: None,
23122 inferred_type: None,
23123 },
23124 ))
23125 } else {
23126 decimals
23127 };
23128 args.push(wrapped);
23129 }
23130 Ok(Expression::Function(Box::new(Function::new(
23131 "TRUNC".to_string(),
23132 args,
23133 ))))
23134 }
23135 DialectType::ClickHouse => {
23136 // trunc(x, decimals) - lowercase
23137 let mut new_f = *f;
23138 new_f.name = "trunc".to_string();
23139 Ok(Expression::Function(Box::new(new_f)))
23140 }
23141 DialectType::Spark | DialectType::Databricks => {
23142 // Spark: TRUNC is date-only; numeric TRUNC → CAST(x AS BIGINT)
23143 let this = f.args.into_iter().next().unwrap_or(
23144 Expression::Literal(Box::new(Literal::Number(
23145 "0".to_string(),
23146 ))),
23147 );
23148 Ok(Expression::Cast(Box::new(crate::expressions::Cast {
23149 this,
23150 to: crate::expressions::DataType::BigInt {
23151 length: None,
23152 },
23153 double_colon_syntax: false,
23154 trailing_comments: Vec::new(),
23155 format: None,
23156 default: None,
23157 inferred_type: None,
23158 })))
23159 }
23160 _ => {
23161 // TRUNC(x, decimals) for PostgreSQL, Oracle, Snowflake, etc.
23162 let mut new_f = *f;
23163 new_f.name = "TRUNC".to_string();
23164 Ok(Expression::Function(Box::new(new_f)))
23165 }
23166 }
23167 }
23168 // CURRENT_VERSION() -> VERSION() for most dialects
23169 "CURRENT_VERSION" => match target {
23170 DialectType::Snowflake
23171 | DialectType::Databricks
23172 | DialectType::StarRocks => Ok(Expression::Function(f)),
23173 DialectType::SQLite => {
23174 let mut new_f = *f;
23175 new_f.name = "SQLITE_VERSION".to_string();
23176 Ok(Expression::Function(Box::new(new_f)))
23177 }
23178 _ => {
23179 let mut new_f = *f;
23180 new_f.name = "VERSION".to_string();
23181 Ok(Expression::Function(Box::new(new_f)))
23182 }
23183 },
23184 // ARRAY_REVERSE(x) -> arrayReverse(x) for ClickHouse
23185 "ARRAY_REVERSE" => match target {
23186 DialectType::ClickHouse => {
23187 let mut new_f = *f;
23188 new_f.name = "arrayReverse".to_string();
23189 Ok(Expression::Function(Box::new(new_f)))
23190 }
23191 _ => Ok(Expression::Function(f)),
23192 },
23193 // GENERATE_DATE_ARRAY(start, end[, step]) -> target-specific
23194 "GENERATE_DATE_ARRAY" => {
23195 let mut args = f.args;
23196 if matches!(target, DialectType::BigQuery) {
23197 // BigQuery keeps GENERATE_DATE_ARRAY; add default interval if not present
23198 if args.len() == 2 {
23199 let default_interval = Expression::Interval(Box::new(
23200 crate::expressions::Interval {
23201 this: Some(Expression::Literal(Box::new(
23202 Literal::String("1".to_string()),
23203 ))),
23204 unit: Some(
23205 crate::expressions::IntervalUnitSpec::Simple {
23206 unit: crate::expressions::IntervalUnit::Day,
23207 use_plural: false,
23208 },
23209 ),
23210 },
23211 ));
23212 args.push(default_interval);
23213 }
23214 Ok(Expression::Function(Box::new(Function::new(
23215 "GENERATE_DATE_ARRAY".to_string(),
23216 args,
23217 ))))
23218 } else if matches!(target, DialectType::DuckDB) {
23219 // DuckDB: CAST(GENERATE_SERIES(start, end, step) AS DATE[])
23220 let start = args.get(0).cloned();
23221 let end = args.get(1).cloned();
23222 let step = args.get(2).cloned().or_else(|| {
23223 Some(Expression::Interval(Box::new(
23224 crate::expressions::Interval {
23225 this: Some(Expression::Literal(Box::new(
23226 Literal::String("1".to_string()),
23227 ))),
23228 unit: Some(
23229 crate::expressions::IntervalUnitSpec::Simple {
23230 unit: crate::expressions::IntervalUnit::Day,
23231 use_plural: false,
23232 },
23233 ),
23234 },
23235 )))
23236 });
23237 let gen_series = Expression::GenerateSeries(Box::new(
23238 crate::expressions::GenerateSeries {
23239 start: start.map(Box::new),
23240 end: end.map(Box::new),
23241 step: step.map(Box::new),
23242 is_end_exclusive: None,
23243 },
23244 ));
23245 Ok(Expression::Cast(Box::new(Cast {
23246 this: gen_series,
23247 to: DataType::Array {
23248 element_type: Box::new(DataType::Date),
23249 dimension: None,
23250 },
23251 trailing_comments: vec![],
23252 double_colon_syntax: false,
23253 format: None,
23254 default: None,
23255 inferred_type: None,
23256 })))
23257 } else if matches!(
23258 target,
23259 DialectType::Presto | DialectType::Trino | DialectType::Athena
23260 ) {
23261 // Presto/Trino: SEQUENCE(start, end, interval) with interval normalization
23262 let start = args.get(0).cloned();
23263 let end = args.get(1).cloned();
23264 let step = args.get(2).cloned().or_else(|| {
23265 Some(Expression::Interval(Box::new(
23266 crate::expressions::Interval {
23267 this: Some(Expression::Literal(Box::new(
23268 Literal::String("1".to_string()),
23269 ))),
23270 unit: Some(
23271 crate::expressions::IntervalUnitSpec::Simple {
23272 unit: crate::expressions::IntervalUnit::Day,
23273 use_plural: false,
23274 },
23275 ),
23276 },
23277 )))
23278 });
23279 let gen_series = Expression::GenerateSeries(Box::new(
23280 crate::expressions::GenerateSeries {
23281 start: start.map(Box::new),
23282 end: end.map(Box::new),
23283 step: step.map(Box::new),
23284 is_end_exclusive: None,
23285 },
23286 ));
23287 Ok(gen_series)
23288 } else if matches!(
23289 target,
23290 DialectType::Spark | DialectType::Databricks
23291 ) {
23292 // Spark/Databricks: SEQUENCE(start, end, step) - keep step as-is
23293 let start = args.get(0).cloned();
23294 let end = args.get(1).cloned();
23295 let step = args.get(2).cloned().or_else(|| {
23296 Some(Expression::Interval(Box::new(
23297 crate::expressions::Interval {
23298 this: Some(Expression::Literal(Box::new(
23299 Literal::String("1".to_string()),
23300 ))),
23301 unit: Some(
23302 crate::expressions::IntervalUnitSpec::Simple {
23303 unit: crate::expressions::IntervalUnit::Day,
23304 use_plural: false,
23305 },
23306 ),
23307 },
23308 )))
23309 });
23310 let gen_series = Expression::GenerateSeries(Box::new(
23311 crate::expressions::GenerateSeries {
23312 start: start.map(Box::new),
23313 end: end.map(Box::new),
23314 step: step.map(Box::new),
23315 is_end_exclusive: None,
23316 },
23317 ));
23318 Ok(gen_series)
23319 } else if matches!(target, DialectType::Snowflake) {
23320 // Snowflake: keep as GENERATE_DATE_ARRAY for later transform
23321 if args.len() == 2 {
23322 let default_interval = Expression::Interval(Box::new(
23323 crate::expressions::Interval {
23324 this: Some(Expression::Literal(Box::new(
23325 Literal::String("1".to_string()),
23326 ))),
23327 unit: Some(
23328 crate::expressions::IntervalUnitSpec::Simple {
23329 unit: crate::expressions::IntervalUnit::Day,
23330 use_plural: false,
23331 },
23332 ),
23333 },
23334 ));
23335 args.push(default_interval);
23336 }
23337 Ok(Expression::Function(Box::new(Function::new(
23338 "GENERATE_DATE_ARRAY".to_string(),
23339 args,
23340 ))))
23341 } else if matches!(
23342 target,
23343 DialectType::MySQL
23344 | DialectType::TSQL
23345 | DialectType::Fabric
23346 | DialectType::Redshift
23347 ) {
23348 // MySQL/TSQL/Redshift: keep as GENERATE_DATE_ARRAY for the preprocess
23349 // step (unnest_generate_date_array_using_recursive_cte) to convert to CTE
23350 Ok(Expression::Function(Box::new(Function::new(
23351 "GENERATE_DATE_ARRAY".to_string(),
23352 args,
23353 ))))
23354 } else {
23355 // PostgreSQL/others: convert to GenerateSeries
23356 let start = args.get(0).cloned();
23357 let end = args.get(1).cloned();
23358 let step = args.get(2).cloned().or_else(|| {
23359 Some(Expression::Interval(Box::new(
23360 crate::expressions::Interval {
23361 this: Some(Expression::Literal(Box::new(
23362 Literal::String("1".to_string()),
23363 ))),
23364 unit: Some(
23365 crate::expressions::IntervalUnitSpec::Simple {
23366 unit: crate::expressions::IntervalUnit::Day,
23367 use_plural: false,
23368 },
23369 ),
23370 },
23371 )))
23372 });
23373 Ok(Expression::GenerateSeries(Box::new(
23374 crate::expressions::GenerateSeries {
23375 start: start.map(Box::new),
23376 end: end.map(Box::new),
23377 step: step.map(Box::new),
23378 is_end_exclusive: None,
23379 },
23380 )))
23381 }
23382 }
23383 // ARRAYS_OVERLAP(arr1, arr2) from Snowflake -> DuckDB:
23384 // (arr1 && arr2) OR (ARRAY_LENGTH(arr1) <> LIST_COUNT(arr1) AND ARRAY_LENGTH(arr2) <> LIST_COUNT(arr2))
23385 "ARRAYS_OVERLAP"
23386 if f.args.len() == 2
23387 && matches!(source, DialectType::Snowflake)
23388 && matches!(target, DialectType::DuckDB) =>
23389 {
23390 let mut args = f.args;
23391 let arr1 = args.remove(0);
23392 let arr2 = args.remove(0);
23393
23394 // (arr1 && arr2)
23395 let overlap = Expression::Paren(Box::new(Paren {
23396 this: Expression::ArrayOverlaps(Box::new(BinaryOp {
23397 left: arr1.clone(),
23398 right: arr2.clone(),
23399 left_comments: vec![],
23400 operator_comments: vec![],
23401 trailing_comments: vec![],
23402 inferred_type: None,
23403 })),
23404 trailing_comments: vec![],
23405 }));
23406
23407 // ARRAY_LENGTH(arr1) <> LIST_COUNT(arr1)
23408 let arr1_has_null = Expression::Neq(Box::new(BinaryOp {
23409 left: Expression::Function(Box::new(Function::new(
23410 "ARRAY_LENGTH".to_string(),
23411 vec![arr1.clone()],
23412 ))),
23413 right: Expression::Function(Box::new(Function::new(
23414 "LIST_COUNT".to_string(),
23415 vec![arr1],
23416 ))),
23417 left_comments: vec![],
23418 operator_comments: vec![],
23419 trailing_comments: vec![],
23420 inferred_type: None,
23421 }));
23422
23423 // ARRAY_LENGTH(arr2) <> LIST_COUNT(arr2)
23424 let arr2_has_null = Expression::Neq(Box::new(BinaryOp {
23425 left: Expression::Function(Box::new(Function::new(
23426 "ARRAY_LENGTH".to_string(),
23427 vec![arr2.clone()],
23428 ))),
23429 right: Expression::Function(Box::new(Function::new(
23430 "LIST_COUNT".to_string(),
23431 vec![arr2],
23432 ))),
23433 left_comments: vec![],
23434 operator_comments: vec![],
23435 trailing_comments: vec![],
23436 inferred_type: None,
23437 }));
23438
23439 // (ARRAY_LENGTH(arr1) <> LIST_COUNT(arr1) AND ARRAY_LENGTH(arr2) <> LIST_COUNT(arr2))
23440 let null_check = Expression::Paren(Box::new(Paren {
23441 this: Expression::And(Box::new(BinaryOp {
23442 left: arr1_has_null,
23443 right: arr2_has_null,
23444 left_comments: vec![],
23445 operator_comments: vec![],
23446 trailing_comments: vec![],
23447 inferred_type: None,
23448 })),
23449 trailing_comments: vec![],
23450 }));
23451
23452 // (arr1 && arr2) OR (null_check)
23453 Ok(Expression::Or(Box::new(BinaryOp {
23454 left: overlap,
23455 right: null_check,
23456 left_comments: vec![],
23457 operator_comments: vec![],
23458 trailing_comments: vec![],
23459 inferred_type: None,
23460 })))
23461 }
23462 // ARRAY_INTERSECTION([1, 2], [2, 3]) from Snowflake -> DuckDB:
23463 // Bag semantics using LIST_TRANSFORM/LIST_FILTER with GENERATE_SERIES
23464 "ARRAY_INTERSECTION"
23465 if f.args.len() == 2
23466 && matches!(source, DialectType::Snowflake)
23467 && matches!(target, DialectType::DuckDB) =>
23468 {
23469 let mut args = f.args;
23470 let arr1 = args.remove(0);
23471 let arr2 = args.remove(0);
23472
23473 // Build: arr1 IS NULL
23474 let arr1_is_null = Expression::IsNull(Box::new(IsNull {
23475 this: arr1.clone(),
23476 not: false,
23477 postfix_form: false,
23478 }));
23479 let arr2_is_null = Expression::IsNull(Box::new(IsNull {
23480 this: arr2.clone(),
23481 not: false,
23482 postfix_form: false,
23483 }));
23484 let null_check = Expression::Or(Box::new(BinaryOp {
23485 left: arr1_is_null,
23486 right: arr2_is_null,
23487 left_comments: vec![],
23488 operator_comments: vec![],
23489 trailing_comments: vec![],
23490 inferred_type: None,
23491 }));
23492
23493 // GENERATE_SERIES(1, LENGTH(arr1))
23494 let gen_series = Expression::Function(Box::new(Function::new(
23495 "GENERATE_SERIES".to_string(),
23496 vec![
23497 Expression::number(1),
23498 Expression::Function(Box::new(Function::new(
23499 "LENGTH".to_string(),
23500 vec![arr1.clone()],
23501 ))),
23502 ],
23503 )));
23504
23505 // LIST_ZIP(arr1, GENERATE_SERIES(1, LENGTH(arr1)))
23506 let list_zip = Expression::Function(Box::new(Function::new(
23507 "LIST_ZIP".to_string(),
23508 vec![arr1.clone(), gen_series],
23509 )));
23510
23511 // pair[1] and pair[2]
23512 let pair_col = Expression::column("pair");
23513 let pair_1 = Expression::Subscript(Box::new(
23514 crate::expressions::Subscript {
23515 this: pair_col.clone(),
23516 index: Expression::number(1),
23517 },
23518 ));
23519 let pair_2 = Expression::Subscript(Box::new(
23520 crate::expressions::Subscript {
23521 this: pair_col.clone(),
23522 index: Expression::number(2),
23523 },
23524 ));
23525
23526 // arr1[1:pair[2]]
23527 let arr1_slice = Expression::ArraySlice(Box::new(
23528 crate::expressions::ArraySlice {
23529 this: arr1.clone(),
23530 start: Some(Expression::number(1)),
23531 end: Some(pair_2),
23532 },
23533 ));
23534
23535 // e IS NOT DISTINCT FROM pair[1]
23536 let e_col = Expression::column("e");
23537 let is_not_distinct = Expression::NullSafeEq(Box::new(BinaryOp {
23538 left: e_col.clone(),
23539 right: pair_1.clone(),
23540 left_comments: vec![],
23541 operator_comments: vec![],
23542 trailing_comments: vec![],
23543 inferred_type: None,
23544 }));
23545
23546 // e -> e IS NOT DISTINCT FROM pair[1]
23547 let inner_lambda1 =
23548 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
23549 parameters: vec![crate::expressions::Identifier::new("e")],
23550 body: is_not_distinct,
23551 colon: false,
23552 parameter_types: vec![],
23553 }));
23554
23555 // LIST_FILTER(arr1[1:pair[2]], e -> e IS NOT DISTINCT FROM pair[1])
23556 let inner_filter1 = Expression::Function(Box::new(Function::new(
23557 "LIST_FILTER".to_string(),
23558 vec![arr1_slice, inner_lambda1],
23559 )));
23560
23561 // LENGTH(LIST_FILTER(arr1[1:pair[2]], ...))
23562 let len1 = Expression::Function(Box::new(Function::new(
23563 "LENGTH".to_string(),
23564 vec![inner_filter1],
23565 )));
23566
23567 // e -> e IS NOT DISTINCT FROM pair[1]
23568 let inner_lambda2 =
23569 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
23570 parameters: vec![crate::expressions::Identifier::new("e")],
23571 body: Expression::NullSafeEq(Box::new(BinaryOp {
23572 left: e_col,
23573 right: pair_1.clone(),
23574 left_comments: vec![],
23575 operator_comments: vec![],
23576 trailing_comments: vec![],
23577 inferred_type: None,
23578 })),
23579 colon: false,
23580 parameter_types: vec![],
23581 }));
23582
23583 // LIST_FILTER(arr2, e -> e IS NOT DISTINCT FROM pair[1])
23584 let inner_filter2 = Expression::Function(Box::new(Function::new(
23585 "LIST_FILTER".to_string(),
23586 vec![arr2.clone(), inner_lambda2],
23587 )));
23588
23589 // LENGTH(LIST_FILTER(arr2, ...))
23590 let len2 = Expression::Function(Box::new(Function::new(
23591 "LENGTH".to_string(),
23592 vec![inner_filter2],
23593 )));
23594
23595 // LENGTH(...) <= LENGTH(...)
23596 let cond = Expression::Paren(Box::new(Paren {
23597 this: Expression::Lte(Box::new(BinaryOp {
23598 left: len1,
23599 right: len2,
23600 left_comments: vec![],
23601 operator_comments: vec![],
23602 trailing_comments: vec![],
23603 inferred_type: None,
23604 })),
23605 trailing_comments: vec![],
23606 }));
23607
23608 // pair -> (condition)
23609 let filter_lambda =
23610 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
23611 parameters: vec![crate::expressions::Identifier::new(
23612 "pair",
23613 )],
23614 body: cond,
23615 colon: false,
23616 parameter_types: vec![],
23617 }));
23618
23619 // LIST_FILTER(LIST_ZIP(...), pair -> ...)
23620 let outer_filter = Expression::Function(Box::new(Function::new(
23621 "LIST_FILTER".to_string(),
23622 vec![list_zip, filter_lambda],
23623 )));
23624
23625 // pair -> pair[1]
23626 let transform_lambda =
23627 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
23628 parameters: vec![crate::expressions::Identifier::new(
23629 "pair",
23630 )],
23631 body: pair_1,
23632 colon: false,
23633 parameter_types: vec![],
23634 }));
23635
23636 // LIST_TRANSFORM(LIST_FILTER(...), pair -> pair[1])
23637 let list_transform = Expression::Function(Box::new(Function::new(
23638 "LIST_TRANSFORM".to_string(),
23639 vec![outer_filter, transform_lambda],
23640 )));
23641
23642 // CASE WHEN arr1 IS NULL OR arr2 IS NULL THEN NULL
23643 // ELSE LIST_TRANSFORM(LIST_FILTER(...), pair -> pair[1])
23644 // END
23645 Ok(Expression::Case(Box::new(Case {
23646 operand: None,
23647 whens: vec![(null_check, Expression::Null(Null))],
23648 else_: Some(list_transform),
23649 comments: vec![],
23650 inferred_type: None,
23651 })))
23652 }
23653 // ARRAY_CONSTRUCT(args) -> Expression::Array for all targets
23654 "ARRAY_CONSTRUCT" => {
23655 if matches!(target, DialectType::Snowflake) {
23656 Ok(Expression::Function(f))
23657 } else {
23658 Ok(Expression::Array(Box::new(crate::expressions::Array {
23659 expressions: f.args,
23660 })))
23661 }
23662 }
23663 // ARRAY(args) function -> Expression::Array for DuckDB/Snowflake/Presto/Trino/Athena
23664 "ARRAY"
23665 if !f.args.iter().any(|a| {
23666 matches!(a, Expression::Select(_) | Expression::Subquery(_))
23667 }) =>
23668 {
23669 match target {
23670 DialectType::DuckDB
23671 | DialectType::Snowflake
23672 | DialectType::Presto
23673 | DialectType::Trino
23674 | DialectType::Athena => {
23675 Ok(Expression::Array(Box::new(crate::expressions::Array {
23676 expressions: f.args,
23677 })))
23678 }
23679 _ => Ok(Expression::Function(f)),
23680 }
23681 }
23682 _ => Ok(Expression::Function(f)),
23683 }
23684 } else if let Expression::AggregateFunction(mut af) = e {
23685 let name = af.name.to_ascii_uppercase();
23686 match name.as_str() {
23687 "ARBITRARY" if af.args.len() == 1 => {
23688 let arg = af.args.into_iter().next().unwrap();
23689 Ok(convert_arbitrary(arg, target))
23690 }
23691 "JSON_ARRAYAGG" => {
23692 match target {
23693 DialectType::PostgreSQL => {
23694 af.name = "JSON_AGG".to_string();
23695 // Add NULLS FIRST to ORDER BY items for PostgreSQL
23696 for ordered in af.order_by.iter_mut() {
23697 if ordered.nulls_first.is_none() {
23698 ordered.nulls_first = Some(true);
23699 }
23700 }
23701 Ok(Expression::AggregateFunction(af))
23702 }
23703 _ => Ok(Expression::AggregateFunction(af)),
23704 }
23705 }
23706 _ => Ok(Expression::AggregateFunction(af)),
23707 }
23708 } else if let Expression::JSONArrayAgg(ja) = e {
23709 // JSONArrayAgg -> JSON_AGG for PostgreSQL, JSON_ARRAYAGG for others
23710 match target {
23711 DialectType::PostgreSQL => {
23712 let mut order_by = Vec::new();
23713 if let Some(order_expr) = ja.order {
23714 if let Expression::OrderBy(ob) = *order_expr {
23715 for mut ordered in ob.expressions {
23716 if ordered.nulls_first.is_none() {
23717 ordered.nulls_first = Some(true);
23718 }
23719 order_by.push(ordered);
23720 }
23721 }
23722 }
23723 Ok(Expression::AggregateFunction(Box::new(
23724 crate::expressions::AggregateFunction {
23725 name: "JSON_AGG".to_string(),
23726 args: vec![*ja.this],
23727 distinct: false,
23728 filter: None,
23729 order_by,
23730 limit: None,
23731 ignore_nulls: None,
23732 inferred_type: None,
23733 },
23734 )))
23735 }
23736 _ => Ok(Expression::JSONArrayAgg(ja)),
23737 }
23738 } else if let Expression::JSONArray(ja) = e {
23739 match target {
23740 DialectType::Snowflake
23741 if ja.null_handling.is_none()
23742 && ja.return_type.is_none()
23743 && ja.strict.is_none() =>
23744 {
23745 let array_construct = Expression::ArrayFunc(Box::new(
23746 crate::expressions::ArrayConstructor {
23747 expressions: ja.expressions,
23748 bracket_notation: false,
23749 use_list_keyword: false,
23750 },
23751 ));
23752 Ok(Expression::Function(Box::new(Function::new(
23753 "TO_VARIANT".to_string(),
23754 vec![array_construct],
23755 ))))
23756 }
23757 _ => Ok(Expression::JSONArray(ja)),
23758 }
23759 } else if let Expression::JsonArray(f) = e {
23760 match target {
23761 DialectType::Snowflake => {
23762 let array_construct = Expression::ArrayFunc(Box::new(
23763 crate::expressions::ArrayConstructor {
23764 expressions: f.expressions,
23765 bracket_notation: false,
23766 use_list_keyword: false,
23767 },
23768 ));
23769 Ok(Expression::Function(Box::new(Function::new(
23770 "TO_VARIANT".to_string(),
23771 vec![array_construct],
23772 ))))
23773 }
23774 _ => Ok(Expression::JsonArray(f)),
23775 }
23776 } else if let Expression::CombinedParameterizedAgg(cpa) = e {
23777 let function_name = match cpa.this.as_ref() {
23778 Expression::Identifier(ident) => Some(ident.name.as_str()),
23779 _ => None,
23780 };
23781 match function_name {
23782 Some(name)
23783 if name.eq_ignore_ascii_case("groupConcat")
23784 && cpa.expressions.len() == 1 =>
23785 {
23786 match target {
23787 DialectType::MySQL | DialectType::SingleStore => {
23788 let this = cpa.expressions[0].clone();
23789 let separator = cpa.params.first().cloned();
23790 Ok(Expression::GroupConcat(Box::new(
23791 crate::expressions::GroupConcatFunc {
23792 this,
23793 separator,
23794 order_by: None,
23795 distinct: false,
23796 filter: None,
23797 limit: None,
23798 inferred_type: None,
23799 },
23800 )))
23801 }
23802 DialectType::DuckDB => Ok(Expression::ListAgg(Box::new({
23803 let this = cpa.expressions[0].clone();
23804 let separator = cpa.params.first().cloned();
23805 crate::expressions::ListAggFunc {
23806 this,
23807 separator,
23808 on_overflow: None,
23809 order_by: None,
23810 distinct: false,
23811 filter: None,
23812 inferred_type: None,
23813 }
23814 }))),
23815 _ => Ok(Expression::CombinedParameterizedAgg(cpa)),
23816 }
23817 }
23818 _ => Ok(Expression::CombinedParameterizedAgg(cpa)),
23819 }
23820 } else if let Expression::ToNumber(tn) = e {
23821 // TO_NUMBER(x) with no format/precision/scale -> CAST(x AS DOUBLE)
23822 let arg = *tn.this;
23823 Ok(Expression::Cast(Box::new(crate::expressions::Cast {
23824 this: arg,
23825 to: crate::expressions::DataType::Double {
23826 precision: None,
23827 scale: None,
23828 },
23829 double_colon_syntax: false,
23830 trailing_comments: Vec::new(),
23831 format: None,
23832 default: None,
23833 inferred_type: None,
23834 })))
23835 } else {
23836 Ok(e)
23837 }
23838 }
23839
23840 Action::RegexpLikeToDuckDB => {
23841 if let Expression::RegexpLike(f) = e {
23842 let mut args = vec![f.this, f.pattern];
23843 if let Some(flags) = f.flags {
23844 args.push(flags);
23845 }
23846 Ok(Expression::Function(Box::new(Function::new(
23847 "REGEXP_MATCHES".to_string(),
23848 args,
23849 ))))
23850 } else {
23851 Ok(e)
23852 }
23853 }
23854 Action::EpochConvert => {
23855 if let Expression::Epoch(f) = e {
23856 let arg = f.this;
23857 let name = match target {
23858 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
23859 "UNIX_TIMESTAMP"
23860 }
23861 DialectType::Presto | DialectType::Trino => "TO_UNIXTIME",
23862 DialectType::BigQuery => "TIME_TO_UNIX",
23863 _ => "EPOCH",
23864 };
23865 Ok(Expression::Function(Box::new(Function::new(
23866 name.to_string(),
23867 vec![arg],
23868 ))))
23869 } else {
23870 Ok(e)
23871 }
23872 }
23873 Action::EpochMsConvert => {
23874 use crate::expressions::{BinaryOp, Cast};
23875 if let Expression::EpochMs(f) = e {
23876 let arg = f.this;
23877 match target {
23878 DialectType::Spark | DialectType::Databricks => {
23879 Ok(Expression::Function(Box::new(Function::new(
23880 "TIMESTAMP_MILLIS".to_string(),
23881 vec![arg],
23882 ))))
23883 }
23884 DialectType::BigQuery => Ok(Expression::Function(Box::new(
23885 Function::new("TIMESTAMP_MILLIS".to_string(), vec![arg]),
23886 ))),
23887 DialectType::Presto | DialectType::Trino => {
23888 // FROM_UNIXTIME(CAST(x AS DOUBLE) / POW(10, 3))
23889 let cast_arg = Expression::Cast(Box::new(Cast {
23890 this: arg,
23891 to: DataType::Double {
23892 precision: None,
23893 scale: None,
23894 },
23895 trailing_comments: Vec::new(),
23896 double_colon_syntax: false,
23897 format: None,
23898 default: None,
23899 inferred_type: None,
23900 }));
23901 let div = Expression::Div(Box::new(BinaryOp::new(
23902 cast_arg,
23903 Expression::Function(Box::new(Function::new(
23904 "POW".to_string(),
23905 vec![Expression::number(10), Expression::number(3)],
23906 ))),
23907 )));
23908 Ok(Expression::Function(Box::new(Function::new(
23909 "FROM_UNIXTIME".to_string(),
23910 vec![div],
23911 ))))
23912 }
23913 DialectType::MySQL => {
23914 // FROM_UNIXTIME(x / POWER(10, 3))
23915 let div = Expression::Div(Box::new(BinaryOp::new(
23916 arg,
23917 Expression::Function(Box::new(Function::new(
23918 "POWER".to_string(),
23919 vec![Expression::number(10), Expression::number(3)],
23920 ))),
23921 )));
23922 Ok(Expression::Function(Box::new(Function::new(
23923 "FROM_UNIXTIME".to_string(),
23924 vec![div],
23925 ))))
23926 }
23927 DialectType::PostgreSQL | DialectType::Redshift => {
23928 // TO_TIMESTAMP(CAST(x AS DOUBLE PRECISION) / POWER(10, 3))
23929 let cast_arg = Expression::Cast(Box::new(Cast {
23930 this: arg,
23931 to: DataType::Custom {
23932 name: "DOUBLE PRECISION".to_string(),
23933 },
23934 trailing_comments: Vec::new(),
23935 double_colon_syntax: false,
23936 format: None,
23937 default: None,
23938 inferred_type: None,
23939 }));
23940 let div = Expression::Div(Box::new(BinaryOp::new(
23941 cast_arg,
23942 Expression::Function(Box::new(Function::new(
23943 "POWER".to_string(),
23944 vec![Expression::number(10), Expression::number(3)],
23945 ))),
23946 )));
23947 Ok(Expression::Function(Box::new(Function::new(
23948 "TO_TIMESTAMP".to_string(),
23949 vec![div],
23950 ))))
23951 }
23952 DialectType::ClickHouse => {
23953 // fromUnixTimestamp64Milli(CAST(x AS Nullable(Int64)))
23954 let cast_arg = Expression::Cast(Box::new(Cast {
23955 this: arg,
23956 to: DataType::Nullable {
23957 inner: Box::new(DataType::BigInt { length: None }),
23958 },
23959 trailing_comments: Vec::new(),
23960 double_colon_syntax: false,
23961 format: None,
23962 default: None,
23963 inferred_type: None,
23964 }));
23965 Ok(Expression::Function(Box::new(Function::new(
23966 "fromUnixTimestamp64Milli".to_string(),
23967 vec![cast_arg],
23968 ))))
23969 }
23970 _ => Ok(Expression::Function(Box::new(Function::new(
23971 "EPOCH_MS".to_string(),
23972 vec![arg],
23973 )))),
23974 }
23975 } else {
23976 Ok(e)
23977 }
23978 }
23979 Action::TSQLTypeNormalize => {
23980 if let Expression::DataType(dt) = e {
23981 let new_dt = match &dt {
23982 DataType::Custom { name } if name.eq_ignore_ascii_case("MONEY") => {
23983 DataType::Decimal {
23984 precision: Some(15),
23985 scale: Some(4),
23986 }
23987 }
23988 DataType::Custom { name }
23989 if name.eq_ignore_ascii_case("SMALLMONEY") =>
23990 {
23991 DataType::Decimal {
23992 precision: Some(6),
23993 scale: Some(4),
23994 }
23995 }
23996 DataType::Custom { name } if name.eq_ignore_ascii_case("DATETIME2") => {
23997 DataType::Timestamp {
23998 timezone: false,
23999 precision: None,
24000 }
24001 }
24002 DataType::Custom { name } if name.eq_ignore_ascii_case("REAL") => {
24003 DataType::Float {
24004 precision: None,
24005 scale: None,
24006 real_spelling: false,
24007 }
24008 }
24009 DataType::Float {
24010 real_spelling: true,
24011 ..
24012 } => DataType::Float {
24013 precision: None,
24014 scale: None,
24015 real_spelling: false,
24016 },
24017 DataType::Custom { name } if name.eq_ignore_ascii_case("IMAGE") => {
24018 DataType::Custom {
24019 name: "BLOB".to_string(),
24020 }
24021 }
24022 DataType::Custom { name } if name.eq_ignore_ascii_case("BIT") => {
24023 DataType::Boolean
24024 }
24025 DataType::Custom { name }
24026 if name.eq_ignore_ascii_case("ROWVERSION") =>
24027 {
24028 DataType::Custom {
24029 name: "BINARY".to_string(),
24030 }
24031 }
24032 DataType::Custom { name }
24033 if name.eq_ignore_ascii_case("UNIQUEIDENTIFIER") =>
24034 {
24035 match target {
24036 DialectType::Spark
24037 | DialectType::Databricks
24038 | DialectType::Hive => DataType::Custom {
24039 name: "STRING".to_string(),
24040 },
24041 _ => DataType::VarChar {
24042 length: Some(36),
24043 parenthesized_length: true,
24044 },
24045 }
24046 }
24047 DataType::Custom { name }
24048 if name.eq_ignore_ascii_case("DATETIMEOFFSET") =>
24049 {
24050 match target {
24051 DialectType::Spark
24052 | DialectType::Databricks
24053 | DialectType::Hive => DataType::Timestamp {
24054 timezone: false,
24055 precision: None,
24056 },
24057 _ => DataType::Timestamp {
24058 timezone: true,
24059 precision: None,
24060 },
24061 }
24062 }
24063 DataType::Custom { ref name }
24064 if name.len() >= 10
24065 && name[..10].eq_ignore_ascii_case("DATETIME2(") =>
24066 {
24067 // DATETIME2(n) -> TIMESTAMP
24068 DataType::Timestamp {
24069 timezone: false,
24070 precision: None,
24071 }
24072 }
24073 DataType::Custom { ref name }
24074 if name.len() >= 5 && name[..5].eq_ignore_ascii_case("TIME(") =>
24075 {
24076 // TIME(n) -> TIMESTAMP for Spark, keep as TIME for others
24077 match target {
24078 DialectType::Spark
24079 | DialectType::Databricks
24080 | DialectType::Hive => DataType::Timestamp {
24081 timezone: false,
24082 precision: None,
24083 },
24084 _ => return Ok(Expression::DataType(dt)),
24085 }
24086 }
24087 DataType::Custom { ref name }
24088 if name.len() >= 7 && name[..7].eq_ignore_ascii_case("NUMERIC") =>
24089 {
24090 // Parse NUMERIC(p,s) back to Decimal(p,s)
24091 let upper = name.to_ascii_uppercase();
24092 if let Some(inner) = upper
24093 .strip_prefix("NUMERIC(")
24094 .and_then(|s| s.strip_suffix(')'))
24095 {
24096 let parts: Vec<&str> = inner.split(',').collect();
24097 let precision =
24098 parts.first().and_then(|s| s.trim().parse::<u32>().ok());
24099 let scale =
24100 parts.get(1).and_then(|s| s.trim().parse::<u32>().ok());
24101 DataType::Decimal { precision, scale }
24102 } else if upper == "NUMERIC" {
24103 DataType::Decimal {
24104 precision: None,
24105 scale: None,
24106 }
24107 } else {
24108 return Ok(Expression::DataType(dt));
24109 }
24110 }
24111 DataType::Float {
24112 precision: Some(p), ..
24113 } => {
24114 // For Hive/Spark: FLOAT(1-32) -> FLOAT, FLOAT(33+) -> DOUBLE (IEEE 754 boundary)
24115 // For other targets: FLOAT(1-24) -> FLOAT, FLOAT(25+) -> DOUBLE (TSQL boundary)
24116 let boundary = match target {
24117 DialectType::Hive
24118 | DialectType::Spark
24119 | DialectType::Databricks => 32,
24120 _ => 24,
24121 };
24122 if *p <= boundary {
24123 DataType::Float {
24124 precision: None,
24125 scale: None,
24126 real_spelling: false,
24127 }
24128 } else {
24129 DataType::Double {
24130 precision: None,
24131 scale: None,
24132 }
24133 }
24134 }
24135 DataType::TinyInt { .. } => match target {
24136 DialectType::DuckDB => DataType::Custom {
24137 name: "UTINYINT".to_string(),
24138 },
24139 DialectType::Hive
24140 | DialectType::Spark
24141 | DialectType::Databricks => DataType::SmallInt { length: None },
24142 _ => return Ok(Expression::DataType(dt)),
24143 },
24144 // INTEGER -> INT for Spark/Databricks
24145 DataType::Int {
24146 length,
24147 integer_spelling: true,
24148 } => DataType::Int {
24149 length: *length,
24150 integer_spelling: false,
24151 },
24152 _ => return Ok(Expression::DataType(dt)),
24153 };
24154 Ok(Expression::DataType(new_dt))
24155 } else {
24156 Ok(e)
24157 }
24158 }
24159 Action::MySQLSafeDivide => {
24160 use crate::expressions::{BinaryOp, Cast};
24161 if let Expression::Div(op) = e {
24162 let left = op.left;
24163 let right = op.right;
24164 // For SQLite: CAST left as REAL but NO NULLIF wrapping
24165 if matches!(target, DialectType::SQLite) {
24166 let new_left = Expression::Cast(Box::new(Cast {
24167 this: left,
24168 to: DataType::Float {
24169 precision: None,
24170 scale: None,
24171 real_spelling: true,
24172 },
24173 trailing_comments: Vec::new(),
24174 double_colon_syntax: false,
24175 format: None,
24176 default: None,
24177 inferred_type: None,
24178 }));
24179 return Ok(Expression::Div(Box::new(BinaryOp::new(new_left, right))));
24180 }
24181 // Wrap right in NULLIF(right, 0)
24182 let nullif_right = Expression::Function(Box::new(Function::new(
24183 "NULLIF".to_string(),
24184 vec![right, Expression::number(0)],
24185 )));
24186 // For some dialects, also CAST the left side
24187 let new_left = match target {
24188 DialectType::PostgreSQL
24189 | DialectType::Redshift
24190 | DialectType::Teradata
24191 | DialectType::Materialize
24192 | DialectType::RisingWave => Expression::Cast(Box::new(Cast {
24193 this: left,
24194 to: DataType::Custom {
24195 name: "DOUBLE PRECISION".to_string(),
24196 },
24197 trailing_comments: Vec::new(),
24198 double_colon_syntax: false,
24199 format: None,
24200 default: None,
24201 inferred_type: None,
24202 })),
24203 DialectType::Drill
24204 | DialectType::Trino
24205 | DialectType::Presto
24206 | DialectType::Athena => Expression::Cast(Box::new(Cast {
24207 this: left,
24208 to: DataType::Double {
24209 precision: None,
24210 scale: None,
24211 },
24212 trailing_comments: Vec::new(),
24213 double_colon_syntax: false,
24214 format: None,
24215 default: None,
24216 inferred_type: None,
24217 })),
24218 DialectType::TSQL => Expression::Cast(Box::new(Cast {
24219 this: left,
24220 to: DataType::Float {
24221 precision: None,
24222 scale: None,
24223 real_spelling: false,
24224 },
24225 trailing_comments: Vec::new(),
24226 double_colon_syntax: false,
24227 format: None,
24228 default: None,
24229 inferred_type: None,
24230 })),
24231 _ => left,
24232 };
24233 Ok(Expression::Div(Box::new(BinaryOp::new(
24234 new_left,
24235 nullif_right,
24236 ))))
24237 } else {
24238 Ok(e)
24239 }
24240 }
24241 Action::AlterTableRenameStripSchema => {
24242 if let Expression::AlterTable(mut at) = e {
24243 if let Some(crate::expressions::AlterTableAction::RenameTable(
24244 ref mut new_tbl,
24245 )) = at.actions.first_mut()
24246 {
24247 new_tbl.schema = None;
24248 new_tbl.catalog = None;
24249 }
24250 Ok(Expression::AlterTable(at))
24251 } else {
24252 Ok(e)
24253 }
24254 }
24255 Action::NullsOrdering => {
24256 // Fill in the source dialect's implied null ordering default.
24257 // This makes implicit null ordering explicit so the target generator
24258 // can correctly strip or keep it.
24259 //
24260 // Dialect null ordering categories:
24261 // nulls_are_large (Oracle, PostgreSQL, Redshift, Snowflake):
24262 // ASC -> NULLS LAST, DESC -> NULLS FIRST
24263 // nulls_are_small (Spark, Hive, BigQuery, MySQL, Databricks, ClickHouse, etc.):
24264 // ASC -> NULLS FIRST, DESC -> NULLS LAST
24265 // nulls_are_last (DuckDB, Presto, Trino, Dremio, Athena):
24266 // NULLS LAST always (both ASC and DESC)
24267 if let Expression::Ordered(mut o) = e {
24268 let is_asc = !o.desc;
24269
24270 let is_source_nulls_large = matches!(
24271 source,
24272 DialectType::Oracle
24273 | DialectType::PostgreSQL
24274 | DialectType::Redshift
24275 | DialectType::Snowflake
24276 );
24277 let is_source_nulls_last = matches!(
24278 source,
24279 DialectType::DuckDB
24280 | DialectType::Presto
24281 | DialectType::Trino
24282 | DialectType::Dremio
24283 | DialectType::Athena
24284 | DialectType::ClickHouse
24285 | DialectType::Drill
24286 | DialectType::Exasol
24287 | DialectType::DataFusion
24288 );
24289
24290 // Determine target category to check if default matches
24291 let is_target_nulls_large = matches!(
24292 target,
24293 DialectType::Oracle
24294 | DialectType::PostgreSQL
24295 | DialectType::Redshift
24296 | DialectType::Snowflake
24297 );
24298 let is_target_nulls_last = matches!(
24299 target,
24300 DialectType::DuckDB
24301 | DialectType::Presto
24302 | DialectType::Trino
24303 | DialectType::Dremio
24304 | DialectType::Athena
24305 | DialectType::ClickHouse
24306 | DialectType::Drill
24307 | DialectType::Exasol
24308 | DialectType::DataFusion
24309 );
24310
24311 // Compute the implied nulls_first for source
24312 let source_nulls_first = if is_source_nulls_large {
24313 !is_asc // ASC -> NULLS LAST (false), DESC -> NULLS FIRST (true)
24314 } else if is_source_nulls_last {
24315 false // NULLS LAST always
24316 } else {
24317 is_asc // nulls_are_small: ASC -> NULLS FIRST (true), DESC -> NULLS LAST (false)
24318 };
24319
24320 // Compute the target's default
24321 let target_nulls_first = if is_target_nulls_large {
24322 !is_asc
24323 } else if is_target_nulls_last {
24324 false
24325 } else {
24326 is_asc
24327 };
24328
24329 // Only add explicit nulls ordering if source and target defaults differ
24330 if source_nulls_first != target_nulls_first {
24331 o.nulls_first = Some(source_nulls_first);
24332 }
24333 // If they match, leave nulls_first as None so the generator won't output it
24334
24335 Ok(Expression::Ordered(o))
24336 } else {
24337 Ok(e)
24338 }
24339 }
24340 Action::StringAggConvert => {
24341 match e {
24342 Expression::WithinGroup(wg) => {
24343 // STRING_AGG(x, sep) WITHIN GROUP (ORDER BY z) -> target-specific
24344 // Extract args and distinct flag from either Function, AggregateFunction, or StringAgg
24345 let (x_opt, sep_opt, distinct) = match wg.this {
24346 Expression::AggregateFunction(ref af)
24347 if af.name.eq_ignore_ascii_case("STRING_AGG")
24348 && af.args.len() >= 2 =>
24349 {
24350 (
24351 Some(af.args[0].clone()),
24352 Some(af.args[1].clone()),
24353 af.distinct,
24354 )
24355 }
24356 Expression::Function(ref f)
24357 if f.name.eq_ignore_ascii_case("STRING_AGG")
24358 && f.args.len() >= 2 =>
24359 {
24360 (Some(f.args[0].clone()), Some(f.args[1].clone()), false)
24361 }
24362 Expression::StringAgg(ref sa) => {
24363 (Some(sa.this.clone()), sa.separator.clone(), sa.distinct)
24364 }
24365 _ => (None, None, false),
24366 };
24367 if let (Some(x), Some(sep)) = (x_opt, sep_opt) {
24368 let order_by = wg.order_by;
24369
24370 match target {
24371 DialectType::TSQL | DialectType::Fabric => {
24372 // Keep as WithinGroup(StringAgg) for TSQL
24373 Ok(Expression::WithinGroup(Box::new(
24374 crate::expressions::WithinGroup {
24375 this: Expression::StringAgg(Box::new(
24376 crate::expressions::StringAggFunc {
24377 this: x,
24378 separator: Some(sep),
24379 order_by: None, // order_by goes in WithinGroup, not StringAgg
24380 distinct,
24381 filter: None,
24382 limit: None,
24383 inferred_type: None,
24384 },
24385 )),
24386 order_by,
24387 },
24388 )))
24389 }
24390 DialectType::MySQL
24391 | DialectType::SingleStore
24392 | DialectType::Doris
24393 | DialectType::StarRocks => {
24394 // GROUP_CONCAT(x ORDER BY z SEPARATOR sep)
24395 Ok(Expression::GroupConcat(Box::new(
24396 crate::expressions::GroupConcatFunc {
24397 this: x,
24398 separator: Some(sep),
24399 order_by: Some(order_by),
24400 distinct,
24401 filter: None,
24402 limit: None,
24403 inferred_type: None,
24404 },
24405 )))
24406 }
24407 DialectType::SQLite => {
24408 // GROUP_CONCAT(x, sep) - no ORDER BY support
24409 Ok(Expression::GroupConcat(Box::new(
24410 crate::expressions::GroupConcatFunc {
24411 this: x,
24412 separator: Some(sep),
24413 order_by: None,
24414 distinct,
24415 filter: None,
24416 limit: None,
24417 inferred_type: None,
24418 },
24419 )))
24420 }
24421 DialectType::PostgreSQL | DialectType::Redshift => {
24422 // STRING_AGG(x, sep ORDER BY z)
24423 Ok(Expression::StringAgg(Box::new(
24424 crate::expressions::StringAggFunc {
24425 this: x,
24426 separator: Some(sep),
24427 order_by: Some(order_by),
24428 distinct,
24429 filter: None,
24430 limit: None,
24431 inferred_type: None,
24432 },
24433 )))
24434 }
24435 _ => {
24436 // Default: keep as STRING_AGG(x, sep) with ORDER BY inside
24437 Ok(Expression::StringAgg(Box::new(
24438 crate::expressions::StringAggFunc {
24439 this: x,
24440 separator: Some(sep),
24441 order_by: Some(order_by),
24442 distinct,
24443 filter: None,
24444 limit: None,
24445 inferred_type: None,
24446 },
24447 )))
24448 }
24449 }
24450 } else {
24451 Ok(Expression::WithinGroup(wg))
24452 }
24453 }
24454 Expression::StringAgg(sa) => {
24455 match target {
24456 DialectType::MySQL
24457 | DialectType::SingleStore
24458 | DialectType::Doris
24459 | DialectType::StarRocks => {
24460 // STRING_AGG(x, sep) -> GROUP_CONCAT(x SEPARATOR sep)
24461 Ok(Expression::GroupConcat(Box::new(
24462 crate::expressions::GroupConcatFunc {
24463 this: sa.this,
24464 separator: sa.separator,
24465 order_by: sa.order_by,
24466 distinct: sa.distinct,
24467 filter: sa.filter,
24468 limit: None,
24469 inferred_type: None,
24470 },
24471 )))
24472 }
24473 DialectType::SQLite => {
24474 // STRING_AGG(x, sep) -> GROUP_CONCAT(x, sep)
24475 Ok(Expression::GroupConcat(Box::new(
24476 crate::expressions::GroupConcatFunc {
24477 this: sa.this,
24478 separator: sa.separator,
24479 order_by: None, // SQLite doesn't support ORDER BY in GROUP_CONCAT
24480 distinct: sa.distinct,
24481 filter: sa.filter,
24482 limit: None,
24483 inferred_type: None,
24484 },
24485 )))
24486 }
24487 DialectType::Spark | DialectType::Databricks => {
24488 // STRING_AGG(x, sep) -> LISTAGG(x, sep)
24489 Ok(Expression::ListAgg(Box::new(
24490 crate::expressions::ListAggFunc {
24491 this: sa.this,
24492 separator: sa.separator,
24493 on_overflow: None,
24494 order_by: sa.order_by,
24495 distinct: sa.distinct,
24496 filter: None,
24497 inferred_type: None,
24498 },
24499 )))
24500 }
24501 _ => Ok(Expression::StringAgg(sa)),
24502 }
24503 }
24504 _ => Ok(e),
24505 }
24506 }
24507 Action::GroupConcatConvert => {
24508 // Helper to expand CONCAT(a, b, c) -> a || b || c (for PostgreSQL/SQLite)
24509 // or CONCAT(a, b, c) -> a + b + c (for TSQL)
24510 fn expand_concat_to_dpipe(expr: Expression) -> Expression {
24511 if let Expression::Function(ref f) = expr {
24512 if f.name.eq_ignore_ascii_case("CONCAT") && f.args.len() > 1 {
24513 let mut result = f.args[0].clone();
24514 for arg in &f.args[1..] {
24515 result = Expression::Concat(Box::new(BinaryOp {
24516 left: result,
24517 right: arg.clone(),
24518 left_comments: vec![],
24519 operator_comments: vec![],
24520 trailing_comments: vec![],
24521 inferred_type: None,
24522 }));
24523 }
24524 return result;
24525 }
24526 }
24527 expr
24528 }
24529 fn expand_concat_to_plus(expr: Expression) -> Expression {
24530 if let Expression::Function(ref f) = expr {
24531 if f.name.eq_ignore_ascii_case("CONCAT") && f.args.len() > 1 {
24532 let mut result = f.args[0].clone();
24533 for arg in &f.args[1..] {
24534 result = Expression::Add(Box::new(BinaryOp {
24535 left: result,
24536 right: arg.clone(),
24537 left_comments: vec![],
24538 operator_comments: vec![],
24539 trailing_comments: vec![],
24540 inferred_type: None,
24541 }));
24542 }
24543 return result;
24544 }
24545 }
24546 expr
24547 }
24548 // Helper to wrap each arg in CAST(arg AS VARCHAR) for Presto/Trino CONCAT
24549 fn wrap_concat_args_in_varchar_cast(expr: Expression) -> Expression {
24550 if let Expression::Function(ref f) = expr {
24551 if f.name.eq_ignore_ascii_case("CONCAT") && f.args.len() > 1 {
24552 let new_args: Vec<Expression> = f
24553 .args
24554 .iter()
24555 .map(|arg| {
24556 Expression::Cast(Box::new(crate::expressions::Cast {
24557 this: arg.clone(),
24558 to: crate::expressions::DataType::VarChar {
24559 length: None,
24560 parenthesized_length: false,
24561 },
24562 trailing_comments: Vec::new(),
24563 double_colon_syntax: false,
24564 format: None,
24565 default: None,
24566 inferred_type: None,
24567 }))
24568 })
24569 .collect();
24570 return Expression::Function(Box::new(
24571 crate::expressions::Function::new(
24572 "CONCAT".to_string(),
24573 new_args,
24574 ),
24575 ));
24576 }
24577 }
24578 expr
24579 }
24580 if let Expression::GroupConcat(gc) = e {
24581 match target {
24582 DialectType::Presto => {
24583 // GROUP_CONCAT(x [, sep]) -> ARRAY_JOIN(ARRAY_AGG(x), sep)
24584 let sep = gc.separator.unwrap_or(Expression::string(","));
24585 // For multi-arg CONCAT, wrap each arg in CAST(... AS VARCHAR)
24586 let this = wrap_concat_args_in_varchar_cast(gc.this);
24587 let array_agg =
24588 Expression::ArrayAgg(Box::new(crate::expressions::AggFunc {
24589 this,
24590 distinct: gc.distinct,
24591 filter: gc.filter,
24592 order_by: gc.order_by.unwrap_or_default(),
24593 name: None,
24594 ignore_nulls: None,
24595 having_max: None,
24596 limit: None,
24597 inferred_type: None,
24598 }));
24599 Ok(Expression::ArrayJoin(Box::new(
24600 crate::expressions::ArrayJoinFunc {
24601 this: array_agg,
24602 separator: sep,
24603 null_replacement: None,
24604 },
24605 )))
24606 }
24607 DialectType::Trino => {
24608 // GROUP_CONCAT(x [, sep]) -> LISTAGG(x, sep)
24609 let sep = gc.separator.unwrap_or(Expression::string(","));
24610 // For multi-arg CONCAT, wrap each arg in CAST(... AS VARCHAR)
24611 let this = wrap_concat_args_in_varchar_cast(gc.this);
24612 Ok(Expression::ListAgg(Box::new(
24613 crate::expressions::ListAggFunc {
24614 this,
24615 separator: Some(sep),
24616 on_overflow: None,
24617 order_by: gc.order_by,
24618 distinct: gc.distinct,
24619 filter: gc.filter,
24620 inferred_type: None,
24621 },
24622 )))
24623 }
24624 DialectType::PostgreSQL
24625 | DialectType::Redshift
24626 | DialectType::Snowflake
24627 | DialectType::DuckDB
24628 | DialectType::Hive
24629 | DialectType::ClickHouse => {
24630 // GROUP_CONCAT(x [, sep]) -> STRING_AGG(x, sep)
24631 let sep = gc.separator.unwrap_or(Expression::string(","));
24632 // Expand CONCAT(a,b,c) -> a || b || c for || dialects
24633 let this = expand_concat_to_dpipe(gc.this);
24634 // For PostgreSQL, add NULLS LAST for DESC / NULLS FIRST for ASC
24635 let order_by = if target == DialectType::PostgreSQL {
24636 gc.order_by.map(|ords| {
24637 ords.into_iter()
24638 .map(|mut o| {
24639 if o.nulls_first.is_none() {
24640 if o.desc {
24641 o.nulls_first = Some(false);
24642 // NULLS LAST
24643 } else {
24644 o.nulls_first = Some(true);
24645 // NULLS FIRST
24646 }
24647 }
24648 o
24649 })
24650 .collect()
24651 })
24652 } else {
24653 gc.order_by
24654 };
24655 Ok(Expression::StringAgg(Box::new(
24656 crate::expressions::StringAggFunc {
24657 this,
24658 separator: Some(sep),
24659 order_by,
24660 distinct: gc.distinct,
24661 filter: gc.filter,
24662 limit: None,
24663 inferred_type: None,
24664 },
24665 )))
24666 }
24667 DialectType::TSQL => {
24668 // GROUP_CONCAT(x [, sep]) -> STRING_AGG(x, sep) WITHIN GROUP (ORDER BY ...)
24669 // TSQL doesn't support DISTINCT in STRING_AGG
24670 let sep = gc.separator.unwrap_or(Expression::string(","));
24671 // Expand CONCAT(a,b,c) -> a + b + c for TSQL
24672 let this = expand_concat_to_plus(gc.this);
24673 Ok(Expression::StringAgg(Box::new(
24674 crate::expressions::StringAggFunc {
24675 this,
24676 separator: Some(sep),
24677 order_by: gc.order_by,
24678 distinct: false, // TSQL doesn't support DISTINCT in STRING_AGG
24679 filter: gc.filter,
24680 limit: None,
24681 inferred_type: None,
24682 },
24683 )))
24684 }
24685 DialectType::SQLite => {
24686 // GROUP_CONCAT stays as GROUP_CONCAT but ORDER BY is removed
24687 // SQLite GROUP_CONCAT doesn't support ORDER BY
24688 // Expand CONCAT(a,b,c) -> a || b || c
24689 let this = expand_concat_to_dpipe(gc.this);
24690 Ok(Expression::GroupConcat(Box::new(
24691 crate::expressions::GroupConcatFunc {
24692 this,
24693 separator: gc.separator,
24694 order_by: None, // SQLite doesn't support ORDER BY in GROUP_CONCAT
24695 distinct: gc.distinct,
24696 filter: gc.filter,
24697 limit: None,
24698 inferred_type: None,
24699 },
24700 )))
24701 }
24702 DialectType::Spark | DialectType::Databricks => {
24703 // GROUP_CONCAT(x [, sep]) -> LISTAGG(x, sep)
24704 let sep = gc.separator.unwrap_or(Expression::string(","));
24705 Ok(Expression::ListAgg(Box::new(
24706 crate::expressions::ListAggFunc {
24707 this: gc.this,
24708 separator: Some(sep),
24709 on_overflow: None,
24710 order_by: gc.order_by,
24711 distinct: gc.distinct,
24712 filter: None,
24713 inferred_type: None,
24714 },
24715 )))
24716 }
24717 DialectType::MySQL
24718 | DialectType::SingleStore
24719 | DialectType::StarRocks => {
24720 // MySQL GROUP_CONCAT should have explicit SEPARATOR (default ',')
24721 if gc.separator.is_none() {
24722 let mut gc = gc;
24723 gc.separator = Some(Expression::string(","));
24724 Ok(Expression::GroupConcat(gc))
24725 } else {
24726 Ok(Expression::GroupConcat(gc))
24727 }
24728 }
24729 _ => Ok(Expression::GroupConcat(gc)),
24730 }
24731 } else {
24732 Ok(e)
24733 }
24734 }
24735 Action::TempTableHash => {
24736 match e {
24737 Expression::CreateTable(mut ct) => {
24738 // TSQL #table -> TEMPORARY TABLE with # stripped from name
24739 let name = &ct.name.name.name;
24740 if name.starts_with('#') {
24741 ct.name.name.name = name.trim_start_matches('#').to_string();
24742 }
24743 // Set temporary flag
24744 ct.temporary = true;
24745 Ok(Expression::CreateTable(ct))
24746 }
24747 Expression::Table(mut tr) => {
24748 // Strip # from table references
24749 let name = &tr.name.name;
24750 if name.starts_with('#') {
24751 tr.name.name = name.trim_start_matches('#').to_string();
24752 }
24753 Ok(Expression::Table(tr))
24754 }
24755 Expression::DropTable(mut dt) => {
24756 // Strip # from DROP TABLE names
24757 for table_ref in &mut dt.names {
24758 if table_ref.name.name.starts_with('#') {
24759 table_ref.name.name =
24760 table_ref.name.name.trim_start_matches('#').to_string();
24761 }
24762 }
24763 Ok(Expression::DropTable(dt))
24764 }
24765 _ => Ok(e),
24766 }
24767 }
24768 Action::NvlClearOriginal => {
24769 if let Expression::Nvl(mut f) = e {
24770 f.original_name = None;
24771 Ok(Expression::Nvl(f))
24772 } else {
24773 Ok(e)
24774 }
24775 }
24776 Action::HiveCastToTryCast => {
24777 // Convert Hive/Spark CAST to TRY_CAST for targets that support it
24778 if let Expression::Cast(mut c) = e {
24779 // For Spark/Hive -> DuckDB: TIMESTAMP -> TIMESTAMPTZ
24780 // (Spark's TIMESTAMP is always timezone-aware)
24781 if matches!(target, DialectType::DuckDB)
24782 && matches!(source, DialectType::Spark | DialectType::Databricks)
24783 && matches!(
24784 c.to,
24785 DataType::Timestamp {
24786 timezone: false,
24787 ..
24788 }
24789 )
24790 {
24791 c.to = DataType::Custom {
24792 name: "TIMESTAMPTZ".to_string(),
24793 };
24794 }
24795 // For Spark source -> Databricks: VARCHAR/CHAR -> STRING
24796 // Spark parses VARCHAR(n)/CHAR(n) as TEXT, normalize to STRING
24797 if matches!(target, DialectType::Databricks | DialectType::Spark)
24798 && matches!(
24799 source,
24800 DialectType::Spark | DialectType::Databricks | DialectType::Hive
24801 )
24802 && Self::has_varchar_char_type(&c.to)
24803 {
24804 c.to = Self::normalize_varchar_to_string(c.to);
24805 }
24806 Ok(Expression::TryCast(c))
24807 } else {
24808 Ok(e)
24809 }
24810 }
24811 Action::XorExpand => {
24812 // Expand XOR to (a AND NOT b) OR (NOT a AND b) for dialects without XOR keyword
24813 // Snowflake: use BOOLXOR(a, b) instead
24814 if let Expression::Xor(xor) = e {
24815 // Collect all XOR operands
24816 let mut operands = Vec::new();
24817 if let Some(this) = xor.this {
24818 operands.push(*this);
24819 }
24820 if let Some(expr) = xor.expression {
24821 operands.push(*expr);
24822 }
24823 operands.extend(xor.expressions);
24824
24825 // Snowflake: use BOOLXOR(a, b)
24826 if matches!(target, DialectType::Snowflake) && operands.len() == 2 {
24827 let a = operands.remove(0);
24828 let b = operands.remove(0);
24829 return Ok(Expression::Function(Box::new(Function::new(
24830 "BOOLXOR".to_string(),
24831 vec![a, b],
24832 ))));
24833 }
24834
24835 // Helper to build (a AND NOT b) OR (NOT a AND b)
24836 let make_xor = |a: Expression, b: Expression| -> Expression {
24837 let not_b = Expression::Not(Box::new(
24838 crate::expressions::UnaryOp::new(b.clone()),
24839 ));
24840 let not_a = Expression::Not(Box::new(
24841 crate::expressions::UnaryOp::new(a.clone()),
24842 ));
24843 let left_and = Expression::And(Box::new(BinaryOp {
24844 left: a,
24845 right: Expression::Paren(Box::new(Paren {
24846 this: not_b,
24847 trailing_comments: Vec::new(),
24848 })),
24849 left_comments: Vec::new(),
24850 operator_comments: Vec::new(),
24851 trailing_comments: Vec::new(),
24852 inferred_type: None,
24853 }));
24854 let right_and = Expression::And(Box::new(BinaryOp {
24855 left: Expression::Paren(Box::new(Paren {
24856 this: not_a,
24857 trailing_comments: Vec::new(),
24858 })),
24859 right: b,
24860 left_comments: Vec::new(),
24861 operator_comments: Vec::new(),
24862 trailing_comments: Vec::new(),
24863 inferred_type: None,
24864 }));
24865 Expression::Or(Box::new(BinaryOp {
24866 left: Expression::Paren(Box::new(Paren {
24867 this: left_and,
24868 trailing_comments: Vec::new(),
24869 })),
24870 right: Expression::Paren(Box::new(Paren {
24871 this: right_and,
24872 trailing_comments: Vec::new(),
24873 })),
24874 left_comments: Vec::new(),
24875 operator_comments: Vec::new(),
24876 trailing_comments: Vec::new(),
24877 inferred_type: None,
24878 }))
24879 };
24880
24881 if operands.len() >= 2 {
24882 let mut result = make_xor(operands.remove(0), operands.remove(0));
24883 for operand in operands {
24884 result = make_xor(result, operand);
24885 }
24886 Ok(result)
24887 } else if operands.len() == 1 {
24888 Ok(operands.remove(0))
24889 } else {
24890 // No operands - return FALSE (shouldn't happen)
24891 Ok(Expression::Boolean(crate::expressions::BooleanLiteral {
24892 value: false,
24893 }))
24894 }
24895 } else {
24896 Ok(e)
24897 }
24898 }
24899 Action::DatePartUnquote => {
24900 // DATE_PART('month', x) -> DATE_PART(month, x) for Snowflake target
24901 // Convert the quoted string first arg to a bare Column/Identifier
24902 if let Expression::Function(mut f) = e {
24903 if let Some(Expression::Literal(lit)) = f.args.first() {
24904 if let crate::expressions::Literal::String(s) = lit.as_ref() {
24905 let bare_name = s.to_ascii_lowercase();
24906 f.args[0] =
24907 Expression::Column(Box::new(crate::expressions::Column {
24908 name: Identifier::new(bare_name),
24909 table: None,
24910 join_mark: false,
24911 trailing_comments: Vec::new(),
24912 span: None,
24913 inferred_type: None,
24914 }));
24915 }
24916 }
24917 Ok(Expression::Function(f))
24918 } else {
24919 Ok(e)
24920 }
24921 }
24922 Action::ArrayLengthConvert => {
24923 // Extract the argument from the expression
24924 let arg = match e {
24925 Expression::Cardinality(ref f) => f.this.clone(),
24926 Expression::ArrayLength(ref f) => f.this.clone(),
24927 Expression::ArraySize(ref f) => f.this.clone(),
24928 _ => return Ok(e),
24929 };
24930 match target {
24931 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
24932 Ok(Expression::Function(Box::new(Function::new(
24933 "SIZE".to_string(),
24934 vec![arg],
24935 ))))
24936 }
24937 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
24938 Ok(Expression::Cardinality(Box::new(
24939 crate::expressions::UnaryFunc::new(arg),
24940 )))
24941 }
24942 DialectType::BigQuery => Ok(Expression::ArrayLength(Box::new(
24943 crate::expressions::UnaryFunc::new(arg),
24944 ))),
24945 DialectType::DuckDB => Ok(Expression::ArrayLength(Box::new(
24946 crate::expressions::UnaryFunc::new(arg),
24947 ))),
24948 DialectType::PostgreSQL | DialectType::Redshift => {
24949 // PostgreSQL ARRAY_LENGTH requires dimension arg
24950 Ok(Expression::Function(Box::new(Function::new(
24951 "ARRAY_LENGTH".to_string(),
24952 vec![arg, Expression::number(1)],
24953 ))))
24954 }
24955 DialectType::Snowflake => Ok(Expression::ArraySize(Box::new(
24956 crate::expressions::UnaryFunc::new(arg),
24957 ))),
24958 _ => Ok(e), // Keep original
24959 }
24960 }
24961
24962 Action::JsonExtractToArrow => {
24963 // JSON_EXTRACT(x, path) -> x -> path for SQLite/DuckDB (set arrow_syntax = true)
24964 if let Expression::JsonExtract(mut f) = e {
24965 f.arrow_syntax = true;
24966 // Transform path: convert bracket notation to dot notation
24967 // SQLite strips wildcards, DuckDB preserves them
24968 if let Expression::Literal(ref lit) = f.path {
24969 if let Literal::String(ref s) = lit.as_ref() {
24970 let mut transformed = s.clone();
24971 if matches!(target, DialectType::SQLite) {
24972 transformed = Self::strip_json_wildcards(&transformed);
24973 }
24974 transformed = Self::bracket_to_dot_notation(&transformed);
24975 if transformed != *s {
24976 f.path = Expression::string(&transformed);
24977 }
24978 }
24979 }
24980 Ok(Expression::JsonExtract(f))
24981 } else {
24982 Ok(e)
24983 }
24984 }
24985
24986 Action::JsonExtractToGetJsonObject => {
24987 if let Expression::JsonExtract(f) = e {
24988 if matches!(target, DialectType::PostgreSQL | DialectType::Redshift) {
24989 // JSON_EXTRACT(x, '$.key') -> JSON_EXTRACT_PATH(x, 'key') for PostgreSQL
24990 // Use proper decomposition that handles brackets
24991 let keys: Vec<Expression> = if let Expression::Literal(lit) = f.path {
24992 if let Literal::String(ref s) = lit.as_ref() {
24993 let parts = Self::decompose_json_path(s);
24994 parts.into_iter().map(|k| Expression::string(&k)).collect()
24995 } else {
24996 vec![]
24997 }
24998 } else {
24999 vec![f.path]
25000 };
25001 let func_name = if matches!(target, DialectType::Redshift) {
25002 "JSON_EXTRACT_PATH_TEXT"
25003 } else {
25004 "JSON_EXTRACT_PATH"
25005 };
25006 let mut args = vec![f.this];
25007 args.extend(keys);
25008 Ok(Expression::Function(Box::new(Function::new(
25009 func_name.to_string(),
25010 args,
25011 ))))
25012 } else {
25013 // GET_JSON_OBJECT(x, '$.path') for Hive/Spark
25014 // Convert bracket double quotes to single quotes
25015 let path = if let Expression::Literal(ref lit) = f.path {
25016 if let Literal::String(ref s) = lit.as_ref() {
25017 let normalized = Self::bracket_to_single_quotes(s);
25018 if normalized != *s {
25019 Expression::string(&normalized)
25020 } else {
25021 f.path.clone()
25022 }
25023 } else {
25024 f.path.clone()
25025 }
25026 } else {
25027 f.path.clone()
25028 };
25029 Ok(Expression::Function(Box::new(Function::new(
25030 "GET_JSON_OBJECT".to_string(),
25031 vec![f.this, path],
25032 ))))
25033 }
25034 } else {
25035 Ok(e)
25036 }
25037 }
25038
25039 Action::JsonExtractScalarToGetJsonObject => {
25040 // JSON_EXTRACT_SCALAR(x, '$.path') -> GET_JSON_OBJECT(x, '$.path') for Hive/Spark
25041 if let Expression::JsonExtractScalar(f) = e {
25042 Ok(Expression::Function(Box::new(Function::new(
25043 "GET_JSON_OBJECT".to_string(),
25044 vec![f.this, f.path],
25045 ))))
25046 } else {
25047 Ok(e)
25048 }
25049 }
25050
25051 Action::JsonExtractToTsql => {
25052 // JSON_EXTRACT/JSON_EXTRACT_SCALAR -> ISNULL(JSON_QUERY(x, path), JSON_VALUE(x, path)) for TSQL
25053 let (this, path) = match e {
25054 Expression::JsonExtract(f) => (f.this, f.path),
25055 Expression::JsonExtractScalar(f) => (f.this, f.path),
25056 _ => return Ok(e),
25057 };
25058 // Transform path: strip wildcards, convert bracket notation to dot notation
25059 let transformed_path = if let Expression::Literal(ref lit) = path {
25060 if let Literal::String(ref s) = lit.as_ref() {
25061 let stripped = Self::strip_json_wildcards(s);
25062 let dotted = Self::bracket_to_dot_notation(&stripped);
25063 Expression::string(&dotted)
25064 } else {
25065 path.clone()
25066 }
25067 } else {
25068 path
25069 };
25070 let json_query = Expression::Function(Box::new(Function::new(
25071 "JSON_QUERY".to_string(),
25072 vec![this.clone(), transformed_path.clone()],
25073 )));
25074 let json_value = Expression::Function(Box::new(Function::new(
25075 "JSON_VALUE".to_string(),
25076 vec![this, transformed_path],
25077 )));
25078 Ok(Expression::Function(Box::new(Function::new(
25079 "ISNULL".to_string(),
25080 vec![json_query, json_value],
25081 ))))
25082 }
25083
25084 Action::JsonExtractToClickHouse => {
25085 // JSON_EXTRACT/JSON_EXTRACT_SCALAR -> JSONExtractString(x, 'key1', idx, 'key2') for ClickHouse
25086 let (this, path) = match e {
25087 Expression::JsonExtract(f) => (f.this, f.path),
25088 Expression::JsonExtractScalar(f) => (f.this, f.path),
25089 _ => return Ok(e),
25090 };
25091 let args: Vec<Expression> = if let Expression::Literal(lit) = path {
25092 if let Literal::String(ref s) = lit.as_ref() {
25093 let parts = Self::decompose_json_path(s);
25094 let mut result = vec![this];
25095 for part in parts {
25096 // ClickHouse uses 1-based integer indices for array access
25097 if let Ok(idx) = part.parse::<i64>() {
25098 result.push(Expression::number(idx + 1));
25099 } else {
25100 result.push(Expression::string(&part));
25101 }
25102 }
25103 result
25104 } else {
25105 vec![]
25106 }
25107 } else {
25108 vec![this, path]
25109 };
25110 Ok(Expression::Function(Box::new(Function::new(
25111 "JSONExtractString".to_string(),
25112 args,
25113 ))))
25114 }
25115
25116 Action::JsonExtractScalarConvert => {
25117 // JSON_EXTRACT_SCALAR -> target-specific
25118 if let Expression::JsonExtractScalar(f) = e {
25119 match target {
25120 DialectType::PostgreSQL | DialectType::Redshift => {
25121 // JSON_EXTRACT_SCALAR(x, '$.path') -> JSON_EXTRACT_PATH_TEXT(x, 'key1', 'key2')
25122 let keys: Vec<Expression> = if let Expression::Literal(lit) = f.path
25123 {
25124 if let Literal::String(ref s) = lit.as_ref() {
25125 let parts = Self::decompose_json_path(s);
25126 parts.into_iter().map(|k| Expression::string(&k)).collect()
25127 } else {
25128 vec![]
25129 }
25130 } else {
25131 vec![f.path]
25132 };
25133 let mut args = vec![f.this];
25134 args.extend(keys);
25135 Ok(Expression::Function(Box::new(Function::new(
25136 "JSON_EXTRACT_PATH_TEXT".to_string(),
25137 args,
25138 ))))
25139 }
25140 DialectType::Snowflake => {
25141 // JSON_EXTRACT_SCALAR(x, '$.path') -> JSON_EXTRACT_PATH_TEXT(x, 'stripped_path')
25142 let stripped_path = if let Expression::Literal(ref lit) = f.path {
25143 if let Literal::String(ref s) = lit.as_ref() {
25144 let stripped = Self::strip_json_dollar_prefix(s);
25145 Expression::string(&stripped)
25146 } else {
25147 f.path.clone()
25148 }
25149 } else {
25150 f.path
25151 };
25152 Ok(Expression::Function(Box::new(Function::new(
25153 "JSON_EXTRACT_PATH_TEXT".to_string(),
25154 vec![f.this, stripped_path],
25155 ))))
25156 }
25157 DialectType::SQLite | DialectType::DuckDB => {
25158 // JSON_EXTRACT_SCALAR(x, '$.path') -> x ->> '$.path'
25159 Ok(Expression::JsonExtractScalar(Box::new(
25160 crate::expressions::JsonExtractFunc {
25161 this: f.this,
25162 path: f.path,
25163 returning: f.returning,
25164 arrow_syntax: true,
25165 hash_arrow_syntax: false,
25166 wrapper_option: None,
25167 quotes_option: None,
25168 on_scalar_string: false,
25169 on_error: None,
25170 },
25171 )))
25172 }
25173 _ => Ok(Expression::JsonExtractScalar(f)),
25174 }
25175 } else {
25176 Ok(e)
25177 }
25178 }
25179
25180 Action::JsonPathNormalize => {
25181 // Normalize JSON path format for BigQuery, MySQL, etc.
25182 if let Expression::JsonExtract(mut f) = e {
25183 if let Expression::Literal(ref lit) = f.path {
25184 if let Literal::String(ref s) = lit.as_ref() {
25185 let mut normalized = s.clone();
25186 // Convert bracket notation and handle wildcards per dialect
25187 match target {
25188 DialectType::BigQuery => {
25189 // BigQuery strips wildcards and uses single quotes in brackets
25190 normalized = Self::strip_json_wildcards(&normalized);
25191 normalized = Self::bracket_to_single_quotes(&normalized);
25192 }
25193 DialectType::MySQL => {
25194 // MySQL preserves wildcards, converts brackets to dot notation
25195 normalized = Self::bracket_to_dot_notation(&normalized);
25196 }
25197 _ => {}
25198 }
25199 if normalized != *s {
25200 f.path = Expression::string(&normalized);
25201 }
25202 }
25203 }
25204 Ok(Expression::JsonExtract(f))
25205 } else {
25206 Ok(e)
25207 }
25208 }
25209
25210 Action::JsonQueryValueConvert => {
25211 // JsonQuery/JsonValue -> target-specific
25212 let (f, is_query) = match e {
25213 Expression::JsonQuery(f) => (f, true),
25214 Expression::JsonValue(f) => (f, false),
25215 _ => return Ok(e),
25216 };
25217 match target {
25218 DialectType::TSQL | DialectType::Fabric => {
25219 // ISNULL(JSON_QUERY(...), JSON_VALUE(...))
25220 let json_query = Expression::Function(Box::new(Function::new(
25221 "JSON_QUERY".to_string(),
25222 vec![f.this.clone(), f.path.clone()],
25223 )));
25224 let json_value = Expression::Function(Box::new(Function::new(
25225 "JSON_VALUE".to_string(),
25226 vec![f.this, f.path],
25227 )));
25228 Ok(Expression::Function(Box::new(Function::new(
25229 "ISNULL".to_string(),
25230 vec![json_query, json_value],
25231 ))))
25232 }
25233 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
25234 Ok(Expression::Function(Box::new(Function::new(
25235 "GET_JSON_OBJECT".to_string(),
25236 vec![f.this, f.path],
25237 ))))
25238 }
25239 DialectType::PostgreSQL | DialectType::Redshift => {
25240 Ok(Expression::Function(Box::new(Function::new(
25241 "JSON_EXTRACT_PATH_TEXT".to_string(),
25242 vec![f.this, f.path],
25243 ))))
25244 }
25245 DialectType::DuckDB | DialectType::SQLite => {
25246 // json -> path arrow syntax
25247 Ok(Expression::JsonExtract(Box::new(
25248 crate::expressions::JsonExtractFunc {
25249 this: f.this,
25250 path: f.path,
25251 returning: f.returning,
25252 arrow_syntax: true,
25253 hash_arrow_syntax: false,
25254 wrapper_option: f.wrapper_option,
25255 quotes_option: f.quotes_option,
25256 on_scalar_string: f.on_scalar_string,
25257 on_error: f.on_error,
25258 },
25259 )))
25260 }
25261 DialectType::Snowflake => {
25262 // GET_PATH(PARSE_JSON(json), 'path')
25263 // Strip $. prefix from path
25264 // Only wrap in PARSE_JSON if not already a PARSE_JSON call or ParseJson expression
25265 let json_expr = match &f.this {
25266 Expression::Function(ref inner_f)
25267 if inner_f.name.eq_ignore_ascii_case("PARSE_JSON") =>
25268 {
25269 f.this
25270 }
25271 Expression::ParseJson(_) => {
25272 // Already a ParseJson expression, which generates as PARSE_JSON(...)
25273 f.this
25274 }
25275 _ => Expression::Function(Box::new(Function::new(
25276 "PARSE_JSON".to_string(),
25277 vec![f.this],
25278 ))),
25279 };
25280 let path_str = match &f.path {
25281 Expression::Literal(lit)
25282 if matches!(lit.as_ref(), Literal::String(_)) =>
25283 {
25284 let Literal::String(s) = lit.as_ref() else {
25285 unreachable!()
25286 };
25287 let stripped = s.strip_prefix("$.").unwrap_or(s);
25288 Expression::Literal(Box::new(Literal::String(
25289 stripped.to_string(),
25290 )))
25291 }
25292 other => other.clone(),
25293 };
25294 Ok(Expression::Function(Box::new(Function::new(
25295 "GET_PATH".to_string(),
25296 vec![json_expr, path_str],
25297 ))))
25298 }
25299 _ => {
25300 // Default: keep as JSON_QUERY/JSON_VALUE function
25301 let func_name = if is_query { "JSON_QUERY" } else { "JSON_VALUE" };
25302 Ok(Expression::Function(Box::new(Function::new(
25303 func_name.to_string(),
25304 vec![f.this, f.path],
25305 ))))
25306 }
25307 }
25308 }
25309
25310 Action::JsonLiteralToJsonParse => {
25311 // CAST('x' AS JSON) -> JSON_PARSE('x') for Presto, PARSE_JSON for Snowflake
25312 // Also DuckDB CAST(x AS JSON) -> JSON_PARSE(x) for Trino/Presto/Athena
25313 if let Expression::Cast(c) = e {
25314 let func_name = if matches!(target, DialectType::Snowflake) {
25315 "PARSE_JSON"
25316 } else {
25317 "JSON_PARSE"
25318 };
25319 Ok(Expression::Function(Box::new(Function::new(
25320 func_name.to_string(),
25321 vec![c.this],
25322 ))))
25323 } else {
25324 Ok(e)
25325 }
25326 }
25327
25328 Action::DuckDBCastJsonToVariant => {
25329 if let Expression::Cast(c) = e {
25330 Ok(Expression::Cast(Box::new(Cast {
25331 this: c.this,
25332 to: DataType::Custom {
25333 name: "VARIANT".to_string(),
25334 },
25335 trailing_comments: c.trailing_comments,
25336 double_colon_syntax: false,
25337 format: None,
25338 default: None,
25339 inferred_type: None,
25340 })))
25341 } else {
25342 Ok(e)
25343 }
25344 }
25345
25346 Action::DuckDBTryCastJsonToTryJsonParse => {
25347 // DuckDB TRY_CAST(x AS JSON) -> TRY(JSON_PARSE(x)) for Trino/Presto/Athena
25348 if let Expression::TryCast(c) = e {
25349 let json_parse = Expression::Function(Box::new(Function::new(
25350 "JSON_PARSE".to_string(),
25351 vec![c.this],
25352 )));
25353 Ok(Expression::Function(Box::new(Function::new(
25354 "TRY".to_string(),
25355 vec![json_parse],
25356 ))))
25357 } else {
25358 Ok(e)
25359 }
25360 }
25361
25362 Action::DuckDBJsonFuncToJsonParse => {
25363 // DuckDB json(x) -> JSON_PARSE(x) for Trino/Presto/Athena
25364 if let Expression::Function(f) = e {
25365 let args = f.args;
25366 Ok(Expression::Function(Box::new(Function::new(
25367 "JSON_PARSE".to_string(),
25368 args,
25369 ))))
25370 } else {
25371 Ok(e)
25372 }
25373 }
25374
25375 Action::DuckDBJsonValidToIsJson => {
25376 // DuckDB json_valid(x) -> x IS JSON (SQL:2016 predicate) for Trino/Presto/Athena
25377 if let Expression::Function(mut f) = e {
25378 let arg = f.args.remove(0);
25379 Ok(Expression::IsJson(Box::new(crate::expressions::IsJson {
25380 this: arg,
25381 json_type: None,
25382 unique_keys: None,
25383 negated: false,
25384 })))
25385 } else {
25386 Ok(e)
25387 }
25388 }
25389
25390 Action::AtTimeZoneConvert => {
25391 // AT TIME ZONE -> target-specific conversion
25392 if let Expression::AtTimeZone(atz) = e {
25393 match target {
25394 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
25395 Ok(Expression::Function(Box::new(Function::new(
25396 "AT_TIMEZONE".to_string(),
25397 vec![atz.this, atz.zone],
25398 ))))
25399 }
25400 DialectType::Spark | DialectType::Databricks => {
25401 Ok(Expression::Function(Box::new(Function::new(
25402 "FROM_UTC_TIMESTAMP".to_string(),
25403 vec![atz.this, atz.zone],
25404 ))))
25405 }
25406 DialectType::Snowflake => {
25407 // CONVERT_TIMEZONE('zone', expr)
25408 Ok(Expression::Function(Box::new(Function::new(
25409 "CONVERT_TIMEZONE".to_string(),
25410 vec![atz.zone, atz.this],
25411 ))))
25412 }
25413 DialectType::BigQuery => {
25414 // TIMESTAMP(DATETIME(expr, 'zone'))
25415 let datetime_call = Expression::Function(Box::new(Function::new(
25416 "DATETIME".to_string(),
25417 vec![atz.this, atz.zone],
25418 )));
25419 Ok(Expression::Function(Box::new(Function::new(
25420 "TIMESTAMP".to_string(),
25421 vec![datetime_call],
25422 ))))
25423 }
25424 _ => Ok(Expression::Function(Box::new(Function::new(
25425 "AT_TIMEZONE".to_string(),
25426 vec![atz.this, atz.zone],
25427 )))),
25428 }
25429 } else {
25430 Ok(e)
25431 }
25432 }
25433
25434 Action::DayOfWeekConvert => {
25435 // DAY_OF_WEEK -> ISODOW for DuckDB, ((DAYOFWEEK(x) % 7) + 1) for Spark
25436 if let Expression::DayOfWeek(f) = e {
25437 match target {
25438 DialectType::DuckDB => Ok(Expression::Function(Box::new(
25439 Function::new("ISODOW".to_string(), vec![f.this]),
25440 ))),
25441 DialectType::Spark | DialectType::Databricks => {
25442 // ((DAYOFWEEK(x) % 7) + 1)
25443 let dayofweek = Expression::Function(Box::new(Function::new(
25444 "DAYOFWEEK".to_string(),
25445 vec![f.this],
25446 )));
25447 let modulo = Expression::Mod(Box::new(BinaryOp {
25448 left: dayofweek,
25449 right: Expression::number(7),
25450 left_comments: Vec::new(),
25451 operator_comments: Vec::new(),
25452 trailing_comments: Vec::new(),
25453 inferred_type: None,
25454 }));
25455 let paren_mod = Expression::Paren(Box::new(Paren {
25456 this: modulo,
25457 trailing_comments: Vec::new(),
25458 }));
25459 let add_one = Expression::Add(Box::new(BinaryOp {
25460 left: paren_mod,
25461 right: Expression::number(1),
25462 left_comments: Vec::new(),
25463 operator_comments: Vec::new(),
25464 trailing_comments: Vec::new(),
25465 inferred_type: None,
25466 }));
25467 Ok(Expression::Paren(Box::new(Paren {
25468 this: add_one,
25469 trailing_comments: Vec::new(),
25470 })))
25471 }
25472 _ => Ok(Expression::DayOfWeek(f)),
25473 }
25474 } else {
25475 Ok(e)
25476 }
25477 }
25478
25479 Action::MaxByMinByConvert => {
25480 // MAX_BY -> argMax for ClickHouse, drop 3rd arg for Spark
25481 // MIN_BY -> argMin for ClickHouse, ARG_MIN for DuckDB, drop 3rd arg for Spark/ClickHouse
25482 // Handle both Expression::Function and Expression::AggregateFunction
25483 let (is_max, args) = match &e {
25484 Expression::Function(f) => {
25485 (f.name.eq_ignore_ascii_case("MAX_BY"), f.args.clone())
25486 }
25487 Expression::AggregateFunction(af) => {
25488 (af.name.eq_ignore_ascii_case("MAX_BY"), af.args.clone())
25489 }
25490 _ => return Ok(e),
25491 };
25492 match target {
25493 DialectType::ClickHouse => {
25494 let name = if is_max { "argMax" } else { "argMin" };
25495 let mut args = args;
25496 args.truncate(2);
25497 Ok(Expression::Function(Box::new(Function::new(
25498 name.to_string(),
25499 args,
25500 ))))
25501 }
25502 DialectType::DuckDB => {
25503 let name = if is_max { "ARG_MAX" } else { "ARG_MIN" };
25504 Ok(Expression::Function(Box::new(Function::new(
25505 name.to_string(),
25506 args,
25507 ))))
25508 }
25509 DialectType::Spark | DialectType::Databricks => {
25510 let mut args = args;
25511 args.truncate(2);
25512 let name = if is_max { "MAX_BY" } else { "MIN_BY" };
25513 Ok(Expression::Function(Box::new(Function::new(
25514 name.to_string(),
25515 args,
25516 ))))
25517 }
25518 _ => Ok(e),
25519 }
25520 }
25521
25522 Action::ElementAtConvert => {
25523 // ELEMENT_AT(arr, idx) -> arr[idx] for PostgreSQL, arr[SAFE_ORDINAL(idx)] for BigQuery
25524 let (arr, idx) = if let Expression::ElementAt(bf) = e {
25525 (bf.this, bf.expression)
25526 } else if let Expression::Function(ref f) = e {
25527 if f.args.len() >= 2 {
25528 if let Expression::Function(f) = e {
25529 let mut args = f.args;
25530 let arr = args.remove(0);
25531 let idx = args.remove(0);
25532 (arr, idx)
25533 } else {
25534 unreachable!("outer condition already matched Expression::Function")
25535 }
25536 } else {
25537 return Ok(e);
25538 }
25539 } else {
25540 return Ok(e);
25541 };
25542 match target {
25543 DialectType::PostgreSQL => {
25544 // Wrap array in parens for PostgreSQL: (ARRAY[1,2,3])[4]
25545 let arr_expr = Expression::Paren(Box::new(Paren {
25546 this: arr,
25547 trailing_comments: vec![],
25548 }));
25549 Ok(Expression::Subscript(Box::new(
25550 crate::expressions::Subscript {
25551 this: arr_expr,
25552 index: idx,
25553 },
25554 )))
25555 }
25556 DialectType::BigQuery => {
25557 // BigQuery: convert ARRAY[...] to bare [...] for subscript
25558 let arr_expr = match arr {
25559 Expression::ArrayFunc(af) => Expression::ArrayFunc(Box::new(
25560 crate::expressions::ArrayConstructor {
25561 expressions: af.expressions,
25562 bracket_notation: true,
25563 use_list_keyword: false,
25564 },
25565 )),
25566 other => other,
25567 };
25568 let safe_ordinal = Expression::Function(Box::new(Function::new(
25569 "SAFE_ORDINAL".to_string(),
25570 vec![idx],
25571 )));
25572 Ok(Expression::Subscript(Box::new(
25573 crate::expressions::Subscript {
25574 this: arr_expr,
25575 index: safe_ordinal,
25576 },
25577 )))
25578 }
25579 _ => Ok(Expression::Function(Box::new(Function::new(
25580 "ELEMENT_AT".to_string(),
25581 vec![arr, idx],
25582 )))),
25583 }
25584 }
25585
25586 Action::CurrentUserParens => {
25587 // CURRENT_USER -> CURRENT_USER() for Snowflake
25588 Ok(Expression::Function(Box::new(Function::new(
25589 "CURRENT_USER".to_string(),
25590 vec![],
25591 ))))
25592 }
25593
25594 Action::ArrayAggToCollectList => {
25595 // ARRAY_AGG(x ORDER BY ...) -> COLLECT_LIST(x) for Hive/Spark
25596 // Python sqlglot Hive.arrayagg_sql strips ORDER BY for simple cases
25597 // but preserves it when DISTINCT/IGNORE NULLS/LIMIT are present
25598 match e {
25599 Expression::AggregateFunction(mut af) => {
25600 let is_simple =
25601 !af.distinct && af.ignore_nulls.is_none() && af.limit.is_none();
25602 let args = if af.args.is_empty() {
25603 vec![]
25604 } else {
25605 vec![af.args[0].clone()]
25606 };
25607 af.name = "COLLECT_LIST".to_string();
25608 af.args = args;
25609 if is_simple {
25610 af.order_by = Vec::new();
25611 }
25612 Ok(Expression::AggregateFunction(af))
25613 }
25614 Expression::ArrayAgg(agg) => {
25615 let is_simple =
25616 !agg.distinct && agg.ignore_nulls.is_none() && agg.limit.is_none();
25617 Ok(Expression::AggregateFunction(Box::new(
25618 crate::expressions::AggregateFunction {
25619 name: "COLLECT_LIST".to_string(),
25620 args: vec![agg.this.clone()],
25621 distinct: agg.distinct,
25622 filter: agg.filter.clone(),
25623 order_by: if is_simple {
25624 Vec::new()
25625 } else {
25626 agg.order_by.clone()
25627 },
25628 limit: agg.limit.clone(),
25629 ignore_nulls: agg.ignore_nulls,
25630 inferred_type: None,
25631 },
25632 )))
25633 }
25634 _ => Ok(e),
25635 }
25636 }
25637
25638 Action::ArraySyntaxConvert => {
25639 match e {
25640 // ARRAY[1, 2] (ArrayFunc bracket_notation=false) -> set bracket_notation=true
25641 // so the generator uses dialect-specific output (ARRAY() for Spark, [] for BigQuery)
25642 Expression::ArrayFunc(arr) if !arr.bracket_notation => Ok(
25643 Expression::ArrayFunc(Box::new(crate::expressions::ArrayConstructor {
25644 expressions: arr.expressions,
25645 bracket_notation: true,
25646 use_list_keyword: false,
25647 })),
25648 ),
25649 // ARRAY(y) function style -> ArrayFunc for target dialect
25650 // bracket_notation=true for BigQuery/DuckDB/ClickHouse/StarRocks (output []), false for Presto (output ARRAY[])
25651 Expression::Function(f) if f.name.eq_ignore_ascii_case("ARRAY") => {
25652 let bracket = matches!(
25653 target,
25654 DialectType::BigQuery
25655 | DialectType::DuckDB
25656 | DialectType::Snowflake
25657 | DialectType::ClickHouse
25658 | DialectType::StarRocks
25659 );
25660 Ok(Expression::ArrayFunc(Box::new(
25661 crate::expressions::ArrayConstructor {
25662 expressions: f.args,
25663 bracket_notation: bracket,
25664 use_list_keyword: false,
25665 },
25666 )))
25667 }
25668 _ => Ok(e),
25669 }
25670 }
25671
25672 Action::CastToJsonForSpark => {
25673 // CAST(x AS JSON) -> TO_JSON(x) for Spark
25674 if let Expression::Cast(c) = e {
25675 Ok(Expression::Function(Box::new(Function::new(
25676 "TO_JSON".to_string(),
25677 vec![c.this],
25678 ))))
25679 } else {
25680 Ok(e)
25681 }
25682 }
25683
25684 Action::CastJsonToFromJson => {
25685 // CAST(ParseJson(literal) AS ARRAY/MAP/STRUCT) -> FROM_JSON(literal, type_string) for Spark
25686 if let Expression::Cast(c) = e {
25687 // Extract the string literal from ParseJson
25688 let literal_expr = if let Expression::ParseJson(pj) = c.this {
25689 pj.this
25690 } else {
25691 c.this
25692 };
25693 // Convert the target DataType to Spark's type string format
25694 let type_str = Self::data_type_to_spark_string(&c.to);
25695 Ok(Expression::Function(Box::new(Function::new(
25696 "FROM_JSON".to_string(),
25697 vec![
25698 literal_expr,
25699 Expression::Literal(Box::new(Literal::String(type_str))),
25700 ],
25701 ))))
25702 } else {
25703 Ok(e)
25704 }
25705 }
25706
25707 Action::ToJsonConvert => {
25708 // TO_JSON(x) -> target-specific conversion
25709 if let Expression::ToJson(f) = e {
25710 let arg = f.this;
25711 match target {
25712 DialectType::Presto | DialectType::Trino => {
25713 // JSON_FORMAT(CAST(x AS JSON))
25714 let cast_json = Expression::Cast(Box::new(Cast {
25715 this: arg,
25716 to: DataType::Custom {
25717 name: "JSON".to_string(),
25718 },
25719 trailing_comments: vec![],
25720 double_colon_syntax: false,
25721 format: None,
25722 default: None,
25723 inferred_type: None,
25724 }));
25725 Ok(Expression::Function(Box::new(Function::new(
25726 "JSON_FORMAT".to_string(),
25727 vec![cast_json],
25728 ))))
25729 }
25730 DialectType::BigQuery => Ok(Expression::Function(Box::new(
25731 Function::new("TO_JSON_STRING".to_string(), vec![arg]),
25732 ))),
25733 DialectType::DuckDB => {
25734 // CAST(TO_JSON(x) AS TEXT)
25735 let to_json =
25736 Expression::ToJson(Box::new(crate::expressions::UnaryFunc {
25737 this: arg,
25738 original_name: None,
25739 inferred_type: None,
25740 }));
25741 Ok(Expression::Cast(Box::new(Cast {
25742 this: to_json,
25743 to: DataType::Text,
25744 trailing_comments: vec![],
25745 double_colon_syntax: false,
25746 format: None,
25747 default: None,
25748 inferred_type: None,
25749 })))
25750 }
25751 _ => Ok(Expression::ToJson(Box::new(
25752 crate::expressions::UnaryFunc {
25753 this: arg,
25754 original_name: None,
25755 inferred_type: None,
25756 },
25757 ))),
25758 }
25759 } else {
25760 Ok(e)
25761 }
25762 }
25763
25764 Action::VarianceToClickHouse => {
25765 if let Expression::Variance(f) = e {
25766 Ok(Expression::Function(Box::new(Function::new(
25767 "varSamp".to_string(),
25768 vec![f.this],
25769 ))))
25770 } else {
25771 Ok(e)
25772 }
25773 }
25774
25775 Action::StddevToClickHouse => {
25776 if let Expression::Stddev(f) = e {
25777 Ok(Expression::Function(Box::new(Function::new(
25778 "stddevSamp".to_string(),
25779 vec![f.this],
25780 ))))
25781 } else {
25782 Ok(e)
25783 }
25784 }
25785
25786 Action::ApproxQuantileConvert => {
25787 if let Expression::ApproxQuantile(aq) = e {
25788 let mut args = vec![*aq.this];
25789 if let Some(q) = aq.quantile {
25790 args.push(*q);
25791 }
25792 Ok(Expression::Function(Box::new(Function::new(
25793 "APPROX_PERCENTILE".to_string(),
25794 args,
25795 ))))
25796 } else {
25797 Ok(e)
25798 }
25799 }
25800
25801 Action::DollarParamConvert => {
25802 if let Expression::Parameter(p) = e {
25803 Ok(Expression::Parameter(Box::new(
25804 crate::expressions::Parameter {
25805 name: p.name,
25806 index: p.index,
25807 style: crate::expressions::ParameterStyle::At,
25808 quoted: p.quoted,
25809 string_quoted: p.string_quoted,
25810 expression: p.expression,
25811 },
25812 )))
25813 } else {
25814 Ok(e)
25815 }
25816 }
25817
25818 Action::EscapeStringNormalize => {
25819 if let Expression::Literal(ref lit) = e {
25820 if let Literal::EscapeString(s) = lit.as_ref() {
25821 // Strip prefix (e.g., "e:" or "E:") if present from tokenizer
25822 let stripped = if s.starts_with("e:") || s.starts_with("E:") {
25823 s[2..].to_string()
25824 } else {
25825 s.clone()
25826 };
25827 let normalized = stripped
25828 .replace('\n', "\\n")
25829 .replace('\r', "\\r")
25830 .replace('\t', "\\t");
25831 match target {
25832 DialectType::BigQuery => {
25833 // BigQuery: e'...' -> CAST(b'...' AS STRING)
25834 // Use Raw for the b'...' part to avoid double-escaping
25835 let raw_sql = format!("CAST(b'{}' AS STRING)", normalized);
25836 Ok(Expression::Raw(crate::expressions::Raw { sql: raw_sql }))
25837 }
25838 _ => Ok(Expression::Literal(Box::new(Literal::EscapeString(
25839 normalized,
25840 )))),
25841 }
25842 } else {
25843 Ok(e)
25844 }
25845 } else {
25846 Ok(e)
25847 }
25848 }
25849
25850 Action::StraightJoinCase => {
25851 // straight_join: keep lowercase for DuckDB, quote for MySQL
25852 if let Expression::Column(col) = e {
25853 if col.name.name == "STRAIGHT_JOIN" {
25854 let mut new_col = col;
25855 new_col.name.name = "straight_join".to_string();
25856 if matches!(target, DialectType::MySQL) {
25857 // MySQL: needs quoting since it's a reserved keyword
25858 new_col.name.quoted = true;
25859 }
25860 Ok(Expression::Column(new_col))
25861 } else {
25862 Ok(Expression::Column(col))
25863 }
25864 } else {
25865 Ok(e)
25866 }
25867 }
25868
25869 Action::TablesampleReservoir => {
25870 // TABLESAMPLE -> TABLESAMPLE RESERVOIR for DuckDB
25871 if let Expression::TableSample(mut ts) = e {
25872 if let Some(ref mut sample) = ts.sample {
25873 sample.method = crate::expressions::SampleMethod::Reservoir;
25874 sample.explicit_method = true;
25875 }
25876 Ok(Expression::TableSample(ts))
25877 } else {
25878 Ok(e)
25879 }
25880 }
25881
25882 Action::TablesampleSnowflakeStrip => {
25883 // Strip method and PERCENT for Snowflake target from non-Snowflake source
25884 match e {
25885 Expression::TableSample(mut ts) => {
25886 if let Some(ref mut sample) = ts.sample {
25887 sample.suppress_method_output = true;
25888 sample.unit_after_size = false;
25889 sample.is_percent = false;
25890 }
25891 Ok(Expression::TableSample(ts))
25892 }
25893 Expression::Table(mut t) => {
25894 if let Some(ref mut sample) = t.table_sample {
25895 sample.suppress_method_output = true;
25896 sample.unit_after_size = false;
25897 sample.is_percent = false;
25898 }
25899 Ok(Expression::Table(t))
25900 }
25901 _ => Ok(e),
25902 }
25903 }
25904
25905 Action::FirstToAnyValue => {
25906 // FIRST(col) IGNORE NULLS -> ANY_VALUE(col) for DuckDB
25907 if let Expression::First(mut agg) = e {
25908 agg.ignore_nulls = None;
25909 agg.name = Some("ANY_VALUE".to_string());
25910 Ok(Expression::AnyValue(agg))
25911 } else {
25912 Ok(e)
25913 }
25914 }
25915
25916 Action::ArrayIndexConvert => {
25917 // Subscript index: 1-based to 0-based for BigQuery
25918 if let Expression::Subscript(mut sub) = e {
25919 if let Expression::Literal(ref lit) = sub.index {
25920 if let Literal::Number(ref n) = lit.as_ref() {
25921 if let Ok(val) = n.parse::<i64>() {
25922 sub.index = Expression::Literal(Box::new(Literal::Number(
25923 (val - 1).to_string(),
25924 )));
25925 }
25926 }
25927 }
25928 Ok(Expression::Subscript(sub))
25929 } else {
25930 Ok(e)
25931 }
25932 }
25933
25934 Action::AnyValueIgnoreNulls => {
25935 // ANY_VALUE(x) -> ANY_VALUE(x) IGNORE NULLS for Spark
25936 if let Expression::AnyValue(mut av) = e {
25937 if av.ignore_nulls.is_none() {
25938 av.ignore_nulls = Some(true);
25939 }
25940 Ok(Expression::AnyValue(av))
25941 } else {
25942 Ok(e)
25943 }
25944 }
25945
25946 Action::BigQueryNullsOrdering => {
25947 // BigQuery doesn't support NULLS FIRST/LAST in window function ORDER BY
25948 if let Expression::WindowFunction(mut wf) = e {
25949 for o in &mut wf.over.order_by {
25950 o.nulls_first = None;
25951 }
25952 Ok(Expression::WindowFunction(wf))
25953 } else if let Expression::Ordered(mut o) = e {
25954 o.nulls_first = None;
25955 Ok(Expression::Ordered(o))
25956 } else {
25957 Ok(e)
25958 }
25959 }
25960
25961 Action::SnowflakeFloatProtect => {
25962 // Convert DataType::Float to DataType::Custom("FLOAT") to prevent
25963 // Snowflake's target transform from converting it to DOUBLE.
25964 // Non-Snowflake sources should keep their FLOAT spelling.
25965 if let Expression::DataType(DataType::Float { .. }) = e {
25966 Ok(Expression::DataType(DataType::Custom {
25967 name: "FLOAT".to_string(),
25968 }))
25969 } else {
25970 Ok(e)
25971 }
25972 }
25973
25974 Action::MysqlNullsOrdering => {
25975 // MySQL doesn't support NULLS FIRST/LAST - strip or rewrite
25976 if let Expression::Ordered(mut o) = e {
25977 let nulls_last = o.nulls_first == Some(false);
25978 let desc = o.desc;
25979 // MySQL default: ASC -> NULLS LAST, DESC -> NULLS FIRST
25980 // If requested ordering matches default, just strip NULLS clause
25981 let matches_default = if desc {
25982 // DESC default is NULLS FIRST, so nulls_first=true matches
25983 o.nulls_first == Some(true)
25984 } else {
25985 // ASC default is NULLS LAST, so nulls_first=false matches
25986 nulls_last
25987 };
25988 if matches_default {
25989 o.nulls_first = None;
25990 Ok(Expression::Ordered(o))
25991 } else {
25992 // Need CASE WHEN x IS NULL THEN 0/1 ELSE 0/1 END, x
25993 // For ASC NULLS FIRST: ORDER BY CASE WHEN x IS NULL THEN 0 ELSE 1 END, x ASC
25994 // For DESC NULLS LAST: ORDER BY CASE WHEN x IS NULL THEN 1 ELSE 0 END, x DESC
25995 let null_val = if desc { 1 } else { 0 };
25996 let non_null_val = if desc { 0 } else { 1 };
25997 let _case_expr = Expression::Case(Box::new(Case {
25998 operand: None,
25999 whens: vec![(
26000 Expression::IsNull(Box::new(crate::expressions::IsNull {
26001 this: o.this.clone(),
26002 not: false,
26003 postfix_form: false,
26004 })),
26005 Expression::number(null_val),
26006 )],
26007 else_: Some(Expression::number(non_null_val)),
26008 comments: Vec::new(),
26009 inferred_type: None,
26010 }));
26011 o.nulls_first = None;
26012 // Return a tuple of [case_expr, ordered_expr]
26013 // We need to return both as part of the ORDER BY
26014 // But since transform_recursive processes individual expressions,
26015 // we can't easily add extra ORDER BY items here.
26016 // Instead, strip the nulls_first
26017 o.nulls_first = None;
26018 Ok(Expression::Ordered(o))
26019 }
26020 } else {
26021 Ok(e)
26022 }
26023 }
26024
26025 Action::MysqlNullsLastRewrite => {
26026 // DuckDB -> MySQL: Add CASE WHEN IS NULL THEN 1 ELSE 0 END to ORDER BY
26027 // to simulate NULLS LAST for ASC ordering
26028 if let Expression::WindowFunction(mut wf) = e {
26029 let mut new_order_by = Vec::new();
26030 for o in wf.over.order_by {
26031 if !o.desc {
26032 // ASC: DuckDB has NULLS LAST, MySQL has NULLS FIRST
26033 // Add CASE WHEN expr IS NULL THEN 1 ELSE 0 END before expr
26034 let case_expr = Expression::Case(Box::new(Case {
26035 operand: None,
26036 whens: vec![(
26037 Expression::IsNull(Box::new(crate::expressions::IsNull {
26038 this: o.this.clone(),
26039 not: false,
26040 postfix_form: false,
26041 })),
26042 Expression::Literal(Box::new(Literal::Number(
26043 "1".to_string(),
26044 ))),
26045 )],
26046 else_: Some(Expression::Literal(Box::new(Literal::Number(
26047 "0".to_string(),
26048 )))),
26049 comments: Vec::new(),
26050 inferred_type: None,
26051 }));
26052 new_order_by.push(crate::expressions::Ordered {
26053 this: case_expr,
26054 desc: false,
26055 nulls_first: None,
26056 explicit_asc: false,
26057 with_fill: None,
26058 });
26059 let mut ordered = o;
26060 ordered.nulls_first = None;
26061 new_order_by.push(ordered);
26062 } else {
26063 // DESC: DuckDB has NULLS LAST, MySQL also has NULLS LAST (NULLs smallest in DESC)
26064 // No change needed
26065 let mut ordered = o;
26066 ordered.nulls_first = None;
26067 new_order_by.push(ordered);
26068 }
26069 }
26070 wf.over.order_by = new_order_by;
26071 Ok(Expression::WindowFunction(wf))
26072 } else {
26073 Ok(e)
26074 }
26075 }
26076
26077 Action::RespectNullsConvert => {
26078 // RESPECT NULLS -> strip for SQLite (FIRST_VALUE(c) OVER (...))
26079 if let Expression::WindowFunction(mut wf) = e {
26080 match &mut wf.this {
26081 Expression::FirstValue(ref mut vf) => {
26082 if vf.ignore_nulls == Some(false) {
26083 vf.ignore_nulls = None;
26084 // For SQLite, we'd need to add NULLS LAST to ORDER BY in the OVER clause
26085 // but that's handled by the generator's NULLS ordering
26086 }
26087 }
26088 Expression::LastValue(ref mut vf) => {
26089 if vf.ignore_nulls == Some(false) {
26090 vf.ignore_nulls = None;
26091 }
26092 }
26093 _ => {}
26094 }
26095 Ok(Expression::WindowFunction(wf))
26096 } else {
26097 Ok(e)
26098 }
26099 }
26100
26101 Action::SnowflakeWindowFrameStrip => {
26102 // Strip the default ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
26103 // for FIRST_VALUE/LAST_VALUE/NTH_VALUE when targeting Snowflake
26104 if let Expression::WindowFunction(mut wf) = e {
26105 wf.over.frame = None;
26106 Ok(Expression::WindowFunction(wf))
26107 } else {
26108 Ok(e)
26109 }
26110 }
26111
26112 Action::SnowflakeWindowFrameAdd => {
26113 // Add default ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
26114 // for FIRST_VALUE/LAST_VALUE/NTH_VALUE when transpiling from Snowflake to non-Snowflake
26115 if let Expression::WindowFunction(mut wf) = e {
26116 wf.over.frame = Some(crate::expressions::WindowFrame {
26117 kind: crate::expressions::WindowFrameKind::Rows,
26118 start: crate::expressions::WindowFrameBound::UnboundedPreceding,
26119 end: Some(crate::expressions::WindowFrameBound::UnboundedFollowing),
26120 exclude: None,
26121 kind_text: None,
26122 start_side_text: None,
26123 end_side_text: None,
26124 });
26125 Ok(Expression::WindowFunction(wf))
26126 } else {
26127 Ok(e)
26128 }
26129 }
26130
26131 Action::CreateTableStripComment => {
26132 // Strip COMMENT column constraint, USING, PARTITIONED BY for DuckDB
26133 if let Expression::CreateTable(mut ct) = e {
26134 for col in &mut ct.columns {
26135 col.comment = None;
26136 col.constraints.retain(|c| {
26137 !matches!(c, crate::expressions::ColumnConstraint::Comment(_))
26138 });
26139 // Also remove Comment from constraint_order
26140 col.constraint_order.retain(|c| {
26141 !matches!(c, crate::expressions::ConstraintType::Comment)
26142 });
26143 }
26144 // Strip properties (USING, PARTITIONED BY, etc.)
26145 ct.properties.clear();
26146 Ok(Expression::CreateTable(ct))
26147 } else {
26148 Ok(e)
26149 }
26150 }
26151
26152 Action::AlterTableToSpRename => {
26153 // ALTER TABLE db.t1 RENAME TO db.t2 -> EXEC sp_rename 'db.t1', 't2'
26154 if let Expression::AlterTable(ref at) = e {
26155 if let Some(crate::expressions::AlterTableAction::RenameTable(
26156 ref new_tbl,
26157 )) = at.actions.first()
26158 {
26159 // Build the old table name using TSQL bracket quoting
26160 let old_name = if let Some(ref schema) = at.name.schema {
26161 if at.name.name.quoted || schema.quoted {
26162 format!("[{}].[{}]", schema.name, at.name.name.name)
26163 } else {
26164 format!("{}.{}", schema.name, at.name.name.name)
26165 }
26166 } else {
26167 if at.name.name.quoted {
26168 format!("[{}]", at.name.name.name)
26169 } else {
26170 at.name.name.name.clone()
26171 }
26172 };
26173 let new_name = new_tbl.name.name.clone();
26174 // EXEC sp_rename 'old_name', 'new_name'
26175 let sql = format!("EXEC sp_rename '{}', '{}'", old_name, new_name);
26176 Ok(Expression::Raw(crate::expressions::Raw { sql }))
26177 } else {
26178 Ok(e)
26179 }
26180 } else {
26181 Ok(e)
26182 }
26183 }
26184
26185 Action::SnowflakeIntervalFormat => {
26186 // INTERVAL '2' HOUR -> INTERVAL '2 HOUR' for Snowflake
26187 if let Expression::Interval(mut iv) = e {
26188 if let (Some(Expression::Literal(lit)), Some(ref unit_spec)) =
26189 (&iv.this, &iv.unit)
26190 {
26191 if let Literal::String(ref val) = lit.as_ref() {
26192 let unit_str = match unit_spec {
26193 crate::expressions::IntervalUnitSpec::Simple {
26194 unit, ..
26195 } => match unit {
26196 crate::expressions::IntervalUnit::Year => "YEAR",
26197 crate::expressions::IntervalUnit::Quarter => "QUARTER",
26198 crate::expressions::IntervalUnit::Month => "MONTH",
26199 crate::expressions::IntervalUnit::Week => "WEEK",
26200 crate::expressions::IntervalUnit::Day => "DAY",
26201 crate::expressions::IntervalUnit::Hour => "HOUR",
26202 crate::expressions::IntervalUnit::Minute => "MINUTE",
26203 crate::expressions::IntervalUnit::Second => "SECOND",
26204 crate::expressions::IntervalUnit::Millisecond => {
26205 "MILLISECOND"
26206 }
26207 crate::expressions::IntervalUnit::Microsecond => {
26208 "MICROSECOND"
26209 }
26210 crate::expressions::IntervalUnit::Nanosecond => {
26211 "NANOSECOND"
26212 }
26213 },
26214 _ => "",
26215 };
26216 if !unit_str.is_empty() {
26217 let combined = format!("{} {}", val, unit_str);
26218 iv.this = Some(Expression::Literal(Box::new(Literal::String(
26219 combined,
26220 ))));
26221 iv.unit = None;
26222 }
26223 }
26224 }
26225 Ok(Expression::Interval(iv))
26226 } else {
26227 Ok(e)
26228 }
26229 }
26230
26231 Action::ArrayConcatBracketConvert => {
26232 // Expression::Array/ArrayFunc -> target-specific
26233 // For PostgreSQL: Array -> ArrayFunc (bracket_notation: false)
26234 // For Redshift: Array/ArrayFunc -> Function("ARRAY", args) to produce ARRAY(1, 2) with parens
26235 match e {
26236 Expression::Array(arr) => {
26237 if matches!(target, DialectType::Redshift) {
26238 Ok(Expression::Function(Box::new(Function::new(
26239 "ARRAY".to_string(),
26240 arr.expressions,
26241 ))))
26242 } else {
26243 Ok(Expression::ArrayFunc(Box::new(
26244 crate::expressions::ArrayConstructor {
26245 expressions: arr.expressions,
26246 bracket_notation: false,
26247 use_list_keyword: false,
26248 },
26249 )))
26250 }
26251 }
26252 Expression::ArrayFunc(arr) => {
26253 // Only for Redshift: convert bracket-notation ArrayFunc to Function("ARRAY")
26254 if matches!(target, DialectType::Redshift) {
26255 Ok(Expression::Function(Box::new(Function::new(
26256 "ARRAY".to_string(),
26257 arr.expressions,
26258 ))))
26259 } else {
26260 Ok(Expression::ArrayFunc(arr))
26261 }
26262 }
26263 _ => Ok(e),
26264 }
26265 }
26266
26267 Action::BitAggFloatCast => {
26268 // BIT_OR/BIT_AND/BIT_XOR with float/decimal cast arg -> wrap with ROUND+INT cast for DuckDB
26269 // For FLOAT/DOUBLE/REAL: CAST(ROUND(CAST(val AS type)) AS INT)
26270 // For DECIMAL: CAST(CAST(val AS DECIMAL(p,s)) AS INT)
26271 let int_type = DataType::Int {
26272 length: None,
26273 integer_spelling: false,
26274 };
26275 let wrap_agg = |agg_this: Expression, int_dt: DataType| -> Expression {
26276 if let Expression::Cast(c) = agg_this {
26277 match &c.to {
26278 DataType::Float { .. }
26279 | DataType::Double { .. }
26280 | DataType::Custom { .. } => {
26281 // FLOAT/DOUBLE/REAL: CAST(ROUND(CAST(val AS type)) AS INT)
26282 // Change FLOAT to REAL (Float with real_spelling=true) for DuckDB generator
26283 let inner_type = match &c.to {
26284 DataType::Float {
26285 precision, scale, ..
26286 } => DataType::Float {
26287 precision: *precision,
26288 scale: *scale,
26289 real_spelling: true,
26290 },
26291 other => other.clone(),
26292 };
26293 let inner_cast =
26294 Expression::Cast(Box::new(crate::expressions::Cast {
26295 this: c.this.clone(),
26296 to: inner_type,
26297 trailing_comments: Vec::new(),
26298 double_colon_syntax: false,
26299 format: None,
26300 default: None,
26301 inferred_type: None,
26302 }));
26303 let rounded = Expression::Function(Box::new(Function::new(
26304 "ROUND".to_string(),
26305 vec![inner_cast],
26306 )));
26307 Expression::Cast(Box::new(crate::expressions::Cast {
26308 this: rounded,
26309 to: int_dt,
26310 trailing_comments: Vec::new(),
26311 double_colon_syntax: false,
26312 format: None,
26313 default: None,
26314 inferred_type: None,
26315 }))
26316 }
26317 DataType::Decimal { .. } => {
26318 // DECIMAL: CAST(CAST(val AS DECIMAL(p,s)) AS INT)
26319 Expression::Cast(Box::new(crate::expressions::Cast {
26320 this: Expression::Cast(c),
26321 to: int_dt,
26322 trailing_comments: Vec::new(),
26323 double_colon_syntax: false,
26324 format: None,
26325 default: None,
26326 inferred_type: None,
26327 }))
26328 }
26329 _ => Expression::Cast(c),
26330 }
26331 } else {
26332 agg_this
26333 }
26334 };
26335 match e {
26336 Expression::BitwiseOrAgg(mut f) => {
26337 f.this = wrap_agg(f.this, int_type);
26338 Ok(Expression::BitwiseOrAgg(f))
26339 }
26340 Expression::BitwiseAndAgg(mut f) => {
26341 let int_type = DataType::Int {
26342 length: None,
26343 integer_spelling: false,
26344 };
26345 f.this = wrap_agg(f.this, int_type);
26346 Ok(Expression::BitwiseAndAgg(f))
26347 }
26348 Expression::BitwiseXorAgg(mut f) => {
26349 let int_type = DataType::Int {
26350 length: None,
26351 integer_spelling: false,
26352 };
26353 f.this = wrap_agg(f.this, int_type);
26354 Ok(Expression::BitwiseXorAgg(f))
26355 }
26356 _ => Ok(e),
26357 }
26358 }
26359
26360 Action::BitAggSnowflakeRename => {
26361 // BIT_OR -> BITORAGG, BIT_AND -> BITANDAGG, BIT_XOR -> BITXORAGG for Snowflake
26362 match e {
26363 Expression::BitwiseOrAgg(f) => Ok(Expression::Function(Box::new(
26364 Function::new("BITORAGG".to_string(), vec![f.this]),
26365 ))),
26366 Expression::BitwiseAndAgg(f) => Ok(Expression::Function(Box::new(
26367 Function::new("BITANDAGG".to_string(), vec![f.this]),
26368 ))),
26369 Expression::BitwiseXorAgg(f) => Ok(Expression::Function(Box::new(
26370 Function::new("BITXORAGG".to_string(), vec![f.this]),
26371 ))),
26372 _ => Ok(e),
26373 }
26374 }
26375
26376 Action::StrftimeCastTimestamp => {
26377 // CAST(x AS TIMESTAMP) -> CAST(x AS TIMESTAMP_NTZ) for Spark
26378 if let Expression::Cast(mut c) = e {
26379 if matches!(
26380 c.to,
26381 DataType::Timestamp {
26382 timezone: false,
26383 ..
26384 }
26385 ) {
26386 c.to = DataType::Custom {
26387 name: "TIMESTAMP_NTZ".to_string(),
26388 };
26389 }
26390 Ok(Expression::Cast(c))
26391 } else {
26392 Ok(e)
26393 }
26394 }
26395
26396 Action::DecimalDefaultPrecision => {
26397 // DECIMAL without precision -> DECIMAL(18, 3) for Snowflake
26398 if let Expression::Cast(mut c) = e {
26399 if matches!(
26400 c.to,
26401 DataType::Decimal {
26402 precision: None,
26403 ..
26404 }
26405 ) {
26406 c.to = DataType::Decimal {
26407 precision: Some(18),
26408 scale: Some(3),
26409 };
26410 }
26411 Ok(Expression::Cast(c))
26412 } else {
26413 Ok(e)
26414 }
26415 }
26416
26417 Action::FilterToIff => {
26418 // FILTER(WHERE cond) -> rewrite aggregate: AGG(IFF(cond, val, NULL))
26419 if let Expression::Filter(f) = e {
26420 let condition = *f.expression;
26421 let agg = *f.this;
26422 // Strip WHERE from condition
26423 let cond = match condition {
26424 Expression::Where(w) => w.this,
26425 other => other,
26426 };
26427 // Extract the aggregate function and its argument
26428 // We want AVG(IFF(condition, x, NULL))
26429 match agg {
26430 Expression::Function(mut func) => {
26431 if !func.args.is_empty() {
26432 let orig_arg = func.args[0].clone();
26433 let iff_call = Expression::Function(Box::new(Function::new(
26434 "IFF".to_string(),
26435 vec![cond, orig_arg, Expression::Null(Null)],
26436 )));
26437 func.args[0] = iff_call;
26438 Ok(Expression::Function(func))
26439 } else {
26440 Ok(Expression::Filter(Box::new(crate::expressions::Filter {
26441 this: Box::new(Expression::Function(func)),
26442 expression: Box::new(cond),
26443 })))
26444 }
26445 }
26446 Expression::Avg(mut avg) => {
26447 let iff_call = Expression::Function(Box::new(Function::new(
26448 "IFF".to_string(),
26449 vec![cond, avg.this.clone(), Expression::Null(Null)],
26450 )));
26451 avg.this = iff_call;
26452 Ok(Expression::Avg(avg))
26453 }
26454 Expression::Sum(mut s) => {
26455 let iff_call = Expression::Function(Box::new(Function::new(
26456 "IFF".to_string(),
26457 vec![cond, s.this.clone(), Expression::Null(Null)],
26458 )));
26459 s.this = iff_call;
26460 Ok(Expression::Sum(s))
26461 }
26462 Expression::Count(mut c) => {
26463 if let Some(ref this_expr) = c.this {
26464 let iff_call = Expression::Function(Box::new(Function::new(
26465 "IFF".to_string(),
26466 vec![cond, this_expr.clone(), Expression::Null(Null)],
26467 )));
26468 c.this = Some(iff_call);
26469 }
26470 Ok(Expression::Count(c))
26471 }
26472 other => {
26473 // Fallback: keep as Filter
26474 Ok(Expression::Filter(Box::new(crate::expressions::Filter {
26475 this: Box::new(other),
26476 expression: Box::new(cond),
26477 })))
26478 }
26479 }
26480 } else {
26481 Ok(e)
26482 }
26483 }
26484
26485 Action::AggFilterToIff => {
26486 // AggFunc.filter -> IFF wrapping: AVG(x) FILTER(WHERE cond) -> AVG(IFF(cond, x, NULL))
26487 // Helper macro to handle the common AggFunc case
26488 macro_rules! handle_agg_filter_to_iff {
26489 ($variant:ident, $agg:expr) => {{
26490 let mut agg = $agg;
26491 if let Some(filter_cond) = agg.filter.take() {
26492 let iff_call = Expression::Function(Box::new(Function::new(
26493 "IFF".to_string(),
26494 vec![filter_cond, agg.this.clone(), Expression::Null(Null)],
26495 )));
26496 agg.this = iff_call;
26497 }
26498 Ok(Expression::$variant(agg))
26499 }};
26500 }
26501
26502 match e {
26503 Expression::Avg(agg) => handle_agg_filter_to_iff!(Avg, agg),
26504 Expression::Sum(agg) => handle_agg_filter_to_iff!(Sum, agg),
26505 Expression::Min(agg) => handle_agg_filter_to_iff!(Min, agg),
26506 Expression::Max(agg) => handle_agg_filter_to_iff!(Max, agg),
26507 Expression::ArrayAgg(agg) => handle_agg_filter_to_iff!(ArrayAgg, agg),
26508 Expression::CountIf(agg) => handle_agg_filter_to_iff!(CountIf, agg),
26509 Expression::Stddev(agg) => handle_agg_filter_to_iff!(Stddev, agg),
26510 Expression::StddevPop(agg) => handle_agg_filter_to_iff!(StddevPop, agg),
26511 Expression::StddevSamp(agg) => handle_agg_filter_to_iff!(StddevSamp, agg),
26512 Expression::Variance(agg) => handle_agg_filter_to_iff!(Variance, agg),
26513 Expression::VarPop(agg) => handle_agg_filter_to_iff!(VarPop, agg),
26514 Expression::VarSamp(agg) => handle_agg_filter_to_iff!(VarSamp, agg),
26515 Expression::Median(agg) => handle_agg_filter_to_iff!(Median, agg),
26516 Expression::Mode(agg) => handle_agg_filter_to_iff!(Mode, agg),
26517 Expression::First(agg) => handle_agg_filter_to_iff!(First, agg),
26518 Expression::Last(agg) => handle_agg_filter_to_iff!(Last, agg),
26519 Expression::AnyValue(agg) => handle_agg_filter_to_iff!(AnyValue, agg),
26520 Expression::ApproxDistinct(agg) => {
26521 handle_agg_filter_to_iff!(ApproxDistinct, agg)
26522 }
26523 Expression::Count(mut c) => {
26524 if let Some(filter_cond) = c.filter.take() {
26525 if let Some(ref this_expr) = c.this {
26526 let iff_call = Expression::Function(Box::new(Function::new(
26527 "IFF".to_string(),
26528 vec![
26529 filter_cond,
26530 this_expr.clone(),
26531 Expression::Null(Null),
26532 ],
26533 )));
26534 c.this = Some(iff_call);
26535 }
26536 }
26537 Ok(Expression::Count(c))
26538 }
26539 other => Ok(other),
26540 }
26541 }
26542
26543 Action::JsonToGetPath => {
26544 // JSON_EXTRACT(x, '$.key') -> GET_PATH(PARSE_JSON(x), 'key')
26545 if let Expression::JsonExtract(je) = e {
26546 // Convert to PARSE_JSON() wrapper:
26547 // - JSON(x) -> PARSE_JSON(x)
26548 // - PARSE_JSON(x) -> keep as-is
26549 // - anything else -> wrap in PARSE_JSON()
26550 let this = match &je.this {
26551 Expression::Function(f)
26552 if f.name.eq_ignore_ascii_case("JSON") && f.args.len() == 1 =>
26553 {
26554 Expression::Function(Box::new(Function::new(
26555 "PARSE_JSON".to_string(),
26556 f.args.clone(),
26557 )))
26558 }
26559 Expression::Function(f)
26560 if f.name.eq_ignore_ascii_case("PARSE_JSON") =>
26561 {
26562 je.this.clone()
26563 }
26564 // GET_PATH result is already JSON, don't wrap
26565 Expression::Function(f) if f.name.eq_ignore_ascii_case("GET_PATH") => {
26566 je.this.clone()
26567 }
26568 other => {
26569 // Wrap non-JSON expressions in PARSE_JSON()
26570 Expression::Function(Box::new(Function::new(
26571 "PARSE_JSON".to_string(),
26572 vec![other.clone()],
26573 )))
26574 }
26575 };
26576 // Convert path: extract key from JSONPath or strip $. prefix from string
26577 let path = match &je.path {
26578 Expression::JSONPath(jp) => {
26579 // Extract the key from JSONPath: $root.key -> 'key'
26580 let mut key_parts = Vec::new();
26581 for expr in &jp.expressions {
26582 match expr {
26583 Expression::JSONPathRoot(_) => {} // skip root
26584 Expression::JSONPathKey(k) => {
26585 if let Expression::Literal(lit) = &*k.this {
26586 if let Literal::String(s) = lit.as_ref() {
26587 key_parts.push(s.clone());
26588 }
26589 }
26590 }
26591 _ => {}
26592 }
26593 }
26594 if !key_parts.is_empty() {
26595 Expression::Literal(Box::new(Literal::String(
26596 key_parts.join("."),
26597 )))
26598 } else {
26599 je.path.clone()
26600 }
26601 }
26602 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(s) if s.starts_with("$.")) =>
26603 {
26604 let Literal::String(s) = lit.as_ref() else {
26605 unreachable!()
26606 };
26607 let stripped = Self::strip_json_wildcards(&s[2..].to_string());
26608 Expression::Literal(Box::new(Literal::String(stripped)))
26609 }
26610 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(s) if s.starts_with('$')) =>
26611 {
26612 let Literal::String(s) = lit.as_ref() else {
26613 unreachable!()
26614 };
26615 let stripped = Self::strip_json_wildcards(&s[1..].to_string());
26616 Expression::Literal(Box::new(Literal::String(stripped)))
26617 }
26618 _ => je.path.clone(),
26619 };
26620 Ok(Expression::Function(Box::new(Function::new(
26621 "GET_PATH".to_string(),
26622 vec![this, path],
26623 ))))
26624 } else {
26625 Ok(e)
26626 }
26627 }
26628
26629 Action::StructToRow => {
26630 // DuckDB struct/dict -> BigQuery STRUCT(value AS key, ...) / Presto ROW
26631 // Handles both Expression::Struct and Expression::MapFunc(curly_brace_syntax=true)
26632
26633 // Extract key-value pairs from either Struct or MapFunc
26634 let kv_pairs: Option<Vec<(String, Expression)>> = match &e {
26635 Expression::Struct(s) => Some(
26636 s.fields
26637 .iter()
26638 .map(|(opt_name, field_expr)| {
26639 if let Some(name) = opt_name {
26640 (name.clone(), field_expr.clone())
26641 } else if let Expression::NamedArgument(na) = field_expr {
26642 (na.name.name.clone(), na.value.clone())
26643 } else {
26644 (String::new(), field_expr.clone())
26645 }
26646 })
26647 .collect(),
26648 ),
26649 Expression::MapFunc(m) if m.curly_brace_syntax => Some(
26650 m.keys
26651 .iter()
26652 .zip(m.values.iter())
26653 .map(|(key, value)| {
26654 let key_name = match key {
26655 Expression::Literal(lit)
26656 if matches!(lit.as_ref(), Literal::String(_)) =>
26657 {
26658 let Literal::String(s) = lit.as_ref() else {
26659 unreachable!()
26660 };
26661 s.clone()
26662 }
26663 Expression::Identifier(id) => id.name.clone(),
26664 _ => String::new(),
26665 };
26666 (key_name, value.clone())
26667 })
26668 .collect(),
26669 ),
26670 _ => None,
26671 };
26672
26673 if let Some(pairs) = kv_pairs {
26674 let mut named_args = Vec::new();
26675 for (key_name, value) in pairs {
26676 if matches!(target, DialectType::BigQuery) && !key_name.is_empty() {
26677 named_args.push(Expression::Alias(Box::new(
26678 crate::expressions::Alias::new(
26679 value,
26680 Identifier::new(key_name),
26681 ),
26682 )));
26683 } else if matches!(target, DialectType::Presto | DialectType::Trino) {
26684 named_args.push(value);
26685 } else {
26686 named_args.push(value);
26687 }
26688 }
26689
26690 if matches!(target, DialectType::BigQuery) {
26691 Ok(Expression::Function(Box::new(Function::new(
26692 "STRUCT".to_string(),
26693 named_args,
26694 ))))
26695 } else if matches!(target, DialectType::Presto | DialectType::Trino) {
26696 // For Presto/Trino, infer types and wrap in CAST(ROW(...) AS ROW(name TYPE, ...))
26697 let row_func = Expression::Function(Box::new(Function::new(
26698 "ROW".to_string(),
26699 named_args,
26700 )));
26701
26702 // Try to infer types for each pair
26703 let kv_pairs_again: Option<Vec<(String, Expression)>> = match &e {
26704 Expression::Struct(s) => Some(
26705 s.fields
26706 .iter()
26707 .map(|(opt_name, field_expr)| {
26708 if let Some(name) = opt_name {
26709 (name.clone(), field_expr.clone())
26710 } else if let Expression::NamedArgument(na) = field_expr
26711 {
26712 (na.name.name.clone(), na.value.clone())
26713 } else {
26714 (String::new(), field_expr.clone())
26715 }
26716 })
26717 .collect(),
26718 ),
26719 Expression::MapFunc(m) if m.curly_brace_syntax => Some(
26720 m.keys
26721 .iter()
26722 .zip(m.values.iter())
26723 .map(|(key, value)| {
26724 let key_name = match key {
26725 Expression::Literal(lit)
26726 if matches!(
26727 lit.as_ref(),
26728 Literal::String(_)
26729 ) =>
26730 {
26731 let Literal::String(s) = lit.as_ref() else {
26732 unreachable!()
26733 };
26734 s.clone()
26735 }
26736 Expression::Identifier(id) => id.name.clone(),
26737 _ => String::new(),
26738 };
26739 (key_name, value.clone())
26740 })
26741 .collect(),
26742 ),
26743 _ => None,
26744 };
26745
26746 if let Some(pairs) = kv_pairs_again {
26747 // Infer types for all values
26748 let mut all_inferred = true;
26749 let mut fields = Vec::new();
26750 for (name, value) in &pairs {
26751 let inferred_type = match value {
26752 Expression::Literal(lit)
26753 if matches!(lit.as_ref(), Literal::Number(_)) =>
26754 {
26755 let Literal::Number(n) = lit.as_ref() else {
26756 unreachable!()
26757 };
26758 if n.contains('.') {
26759 Some(DataType::Double {
26760 precision: None,
26761 scale: None,
26762 })
26763 } else {
26764 Some(DataType::Int {
26765 length: None,
26766 integer_spelling: true,
26767 })
26768 }
26769 }
26770 Expression::Literal(lit)
26771 if matches!(lit.as_ref(), Literal::String(_)) =>
26772 {
26773 Some(DataType::VarChar {
26774 length: None,
26775 parenthesized_length: false,
26776 })
26777 }
26778 Expression::Boolean(_) => Some(DataType::Boolean),
26779 _ => None,
26780 };
26781 if let Some(dt) = inferred_type {
26782 fields.push(crate::expressions::StructField::new(
26783 name.clone(),
26784 dt,
26785 ));
26786 } else {
26787 all_inferred = false;
26788 break;
26789 }
26790 }
26791
26792 if all_inferred && !fields.is_empty() {
26793 let row_type = DataType::Struct {
26794 fields,
26795 nested: true,
26796 };
26797 Ok(Expression::Cast(Box::new(Cast {
26798 this: row_func,
26799 to: row_type,
26800 trailing_comments: Vec::new(),
26801 double_colon_syntax: false,
26802 format: None,
26803 default: None,
26804 inferred_type: None,
26805 })))
26806 } else {
26807 Ok(row_func)
26808 }
26809 } else {
26810 Ok(row_func)
26811 }
26812 } else {
26813 Ok(Expression::Function(Box::new(Function::new(
26814 "ROW".to_string(),
26815 named_args,
26816 ))))
26817 }
26818 } else {
26819 Ok(e)
26820 }
26821 }
26822
26823 Action::SparkStructConvert => {
26824 // Spark STRUCT(val AS name, ...) -> Presto CAST(ROW(...) AS ROW(name TYPE, ...))
26825 // or DuckDB {'name': val, ...}
26826 if let Expression::Function(f) = e {
26827 // Extract name-value pairs from aliased args
26828 let mut pairs: Vec<(String, Expression)> = Vec::new();
26829 for arg in &f.args {
26830 match arg {
26831 Expression::Alias(a) => {
26832 pairs.push((a.alias.name.clone(), a.this.clone()));
26833 }
26834 _ => {
26835 pairs.push((String::new(), arg.clone()));
26836 }
26837 }
26838 }
26839
26840 match target {
26841 DialectType::DuckDB => {
26842 // Convert to DuckDB struct literal {'name': value, ...}
26843 let mut keys = Vec::new();
26844 let mut values = Vec::new();
26845 for (name, value) in &pairs {
26846 keys.push(Expression::Literal(Box::new(Literal::String(
26847 name.clone(),
26848 ))));
26849 values.push(value.clone());
26850 }
26851 Ok(Expression::MapFunc(Box::new(
26852 crate::expressions::MapConstructor {
26853 keys,
26854 values,
26855 curly_brace_syntax: true,
26856 with_map_keyword: false,
26857 },
26858 )))
26859 }
26860 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
26861 // Convert to CAST(ROW(val1, val2) AS ROW(name1 TYPE1, name2 TYPE2))
26862 let row_args: Vec<Expression> =
26863 pairs.iter().map(|(_, v)| v.clone()).collect();
26864 let row_func = Expression::Function(Box::new(Function::new(
26865 "ROW".to_string(),
26866 row_args,
26867 )));
26868
26869 // Infer types
26870 let mut all_inferred = true;
26871 let mut fields = Vec::new();
26872 for (name, value) in &pairs {
26873 let inferred_type = match value {
26874 Expression::Literal(lit)
26875 if matches!(lit.as_ref(), Literal::Number(_)) =>
26876 {
26877 let Literal::Number(n) = lit.as_ref() else {
26878 unreachable!()
26879 };
26880 if n.contains('.') {
26881 Some(DataType::Double {
26882 precision: None,
26883 scale: None,
26884 })
26885 } else {
26886 Some(DataType::Int {
26887 length: None,
26888 integer_spelling: true,
26889 })
26890 }
26891 }
26892 Expression::Literal(lit)
26893 if matches!(lit.as_ref(), Literal::String(_)) =>
26894 {
26895 Some(DataType::VarChar {
26896 length: None,
26897 parenthesized_length: false,
26898 })
26899 }
26900 Expression::Boolean(_) => Some(DataType::Boolean),
26901 _ => None,
26902 };
26903 if let Some(dt) = inferred_type {
26904 fields.push(crate::expressions::StructField::new(
26905 name.clone(),
26906 dt,
26907 ));
26908 } else {
26909 all_inferred = false;
26910 break;
26911 }
26912 }
26913
26914 if all_inferred && !fields.is_empty() {
26915 let row_type = DataType::Struct {
26916 fields,
26917 nested: true,
26918 };
26919 Ok(Expression::Cast(Box::new(Cast {
26920 this: row_func,
26921 to: row_type,
26922 trailing_comments: Vec::new(),
26923 double_colon_syntax: false,
26924 format: None,
26925 default: None,
26926 inferred_type: None,
26927 })))
26928 } else {
26929 Ok(row_func)
26930 }
26931 }
26932 _ => Ok(Expression::Function(f)),
26933 }
26934 } else {
26935 Ok(e)
26936 }
26937 }
26938
26939 Action::ApproxCountDistinctToApproxDistinct => {
26940 // APPROX_COUNT_DISTINCT(x) -> APPROX_DISTINCT(x)
26941 if let Expression::ApproxCountDistinct(f) = e {
26942 Ok(Expression::ApproxDistinct(f))
26943 } else {
26944 Ok(e)
26945 }
26946 }
26947
26948 Action::CollectListToArrayAgg => {
26949 // COLLECT_LIST(x) -> ARRAY_AGG(x) FILTER(WHERE x IS NOT NULL)
26950 if let Expression::AggregateFunction(f) = e {
26951 let filter_expr = if !f.args.is_empty() {
26952 let arg = f.args[0].clone();
26953 Some(Expression::IsNull(Box::new(crate::expressions::IsNull {
26954 this: arg,
26955 not: true,
26956 postfix_form: false,
26957 })))
26958 } else {
26959 None
26960 };
26961 let agg = crate::expressions::AggFunc {
26962 this: if f.args.is_empty() {
26963 Expression::Null(crate::expressions::Null)
26964 } else {
26965 f.args[0].clone()
26966 },
26967 distinct: f.distinct,
26968 order_by: f.order_by.clone(),
26969 filter: filter_expr,
26970 ignore_nulls: None,
26971 name: None,
26972 having_max: None,
26973 limit: None,
26974 inferred_type: None,
26975 };
26976 Ok(Expression::ArrayAgg(Box::new(agg)))
26977 } else {
26978 Ok(e)
26979 }
26980 }
26981
26982 Action::CollectSetConvert => {
26983 // COLLECT_SET(x) -> target-specific
26984 if let Expression::AggregateFunction(f) = e {
26985 match target {
26986 DialectType::Presto => Ok(Expression::AggregateFunction(Box::new(
26987 crate::expressions::AggregateFunction {
26988 name: "SET_AGG".to_string(),
26989 args: f.args,
26990 distinct: false,
26991 order_by: f.order_by,
26992 filter: f.filter,
26993 limit: f.limit,
26994 ignore_nulls: f.ignore_nulls,
26995 inferred_type: None,
26996 },
26997 ))),
26998 DialectType::Snowflake => Ok(Expression::AggregateFunction(Box::new(
26999 crate::expressions::AggregateFunction {
27000 name: "ARRAY_UNIQUE_AGG".to_string(),
27001 args: f.args,
27002 distinct: false,
27003 order_by: f.order_by,
27004 filter: f.filter,
27005 limit: f.limit,
27006 ignore_nulls: f.ignore_nulls,
27007 inferred_type: None,
27008 },
27009 ))),
27010 DialectType::Trino | DialectType::DuckDB => {
27011 let agg = crate::expressions::AggFunc {
27012 this: if f.args.is_empty() {
27013 Expression::Null(crate::expressions::Null)
27014 } else {
27015 f.args[0].clone()
27016 },
27017 distinct: true,
27018 order_by: Vec::new(),
27019 filter: None,
27020 ignore_nulls: None,
27021 name: None,
27022 having_max: None,
27023 limit: None,
27024 inferred_type: None,
27025 };
27026 Ok(Expression::ArrayAgg(Box::new(agg)))
27027 }
27028 _ => Ok(Expression::AggregateFunction(f)),
27029 }
27030 } else {
27031 Ok(e)
27032 }
27033 }
27034
27035 Action::PercentileConvert => {
27036 // PERCENTILE(x, 0.5) -> QUANTILE(x, 0.5) / APPROX_PERCENTILE(x, 0.5)
27037 if let Expression::AggregateFunction(f) = e {
27038 let name = match target {
27039 DialectType::DuckDB => "QUANTILE",
27040 DialectType::Presto | DialectType::Trino => "APPROX_PERCENTILE",
27041 _ => "PERCENTILE",
27042 };
27043 Ok(Expression::AggregateFunction(Box::new(
27044 crate::expressions::AggregateFunction {
27045 name: name.to_string(),
27046 args: f.args,
27047 distinct: f.distinct,
27048 order_by: f.order_by,
27049 filter: f.filter,
27050 limit: f.limit,
27051 ignore_nulls: f.ignore_nulls,
27052 inferred_type: None,
27053 },
27054 )))
27055 } else {
27056 Ok(e)
27057 }
27058 }
27059
27060 Action::CorrIsnanWrap => {
27061 // CORR(a, b) -> CASE WHEN ISNAN(CORR(a, b)) THEN NULL ELSE CORR(a, b) END
27062 // The CORR expression could be AggregateFunction, WindowFunction, or Filter-wrapped
27063 let corr_clone = e.clone();
27064 let isnan = Expression::Function(Box::new(Function::new(
27065 "ISNAN".to_string(),
27066 vec![corr_clone.clone()],
27067 )));
27068 let case_expr = Expression::Case(Box::new(Case {
27069 operand: None,
27070 whens: vec![(isnan, Expression::Null(crate::expressions::Null))],
27071 else_: Some(corr_clone),
27072 comments: Vec::new(),
27073 inferred_type: None,
27074 }));
27075 Ok(case_expr)
27076 }
27077
27078 Action::TruncToDateTrunc => {
27079 // TRUNC(timestamp, 'MONTH') -> DATE_TRUNC('MONTH', timestamp)
27080 if let Expression::Function(f) = e {
27081 if f.args.len() == 2 {
27082 let timestamp = f.args[0].clone();
27083 let unit_expr = f.args[1].clone();
27084
27085 if matches!(target, DialectType::ClickHouse) {
27086 // For ClickHouse, produce Expression::DateTrunc which the generator
27087 // outputs as DATE_TRUNC(...) without going through the ClickHouse
27088 // target transform that would convert it to dateTrunc
27089 let unit_str = Self::get_unit_str_static(&unit_expr);
27090 let dt_field = match unit_str.as_str() {
27091 "YEAR" => DateTimeField::Year,
27092 "MONTH" => DateTimeField::Month,
27093 "DAY" => DateTimeField::Day,
27094 "HOUR" => DateTimeField::Hour,
27095 "MINUTE" => DateTimeField::Minute,
27096 "SECOND" => DateTimeField::Second,
27097 "WEEK" => DateTimeField::Week,
27098 "QUARTER" => DateTimeField::Quarter,
27099 _ => DateTimeField::Custom(unit_str),
27100 };
27101 Ok(Expression::DateTrunc(Box::new(
27102 crate::expressions::DateTruncFunc {
27103 this: timestamp,
27104 unit: dt_field,
27105 },
27106 )))
27107 } else {
27108 let new_args = vec![unit_expr, timestamp];
27109 Ok(Expression::Function(Box::new(Function::new(
27110 "DATE_TRUNC".to_string(),
27111 new_args,
27112 ))))
27113 }
27114 } else {
27115 Ok(Expression::Function(f))
27116 }
27117 } else {
27118 Ok(e)
27119 }
27120 }
27121
27122 Action::ArrayContainsConvert => {
27123 if let Expression::ArrayContains(f) = e {
27124 match target {
27125 DialectType::Presto | DialectType::Trino => {
27126 // ARRAY_CONTAINS(arr, val) -> CONTAINS(arr, val)
27127 Ok(Expression::Function(Box::new(Function::new(
27128 "CONTAINS".to_string(),
27129 vec![f.this, f.expression],
27130 ))))
27131 }
27132 DialectType::Snowflake => {
27133 // ARRAY_CONTAINS(arr, val) -> ARRAY_CONTAINS(CAST(val AS VARIANT), arr)
27134 let cast_val =
27135 Expression::Cast(Box::new(crate::expressions::Cast {
27136 this: f.expression,
27137 to: crate::expressions::DataType::Custom {
27138 name: "VARIANT".to_string(),
27139 },
27140 trailing_comments: Vec::new(),
27141 double_colon_syntax: false,
27142 format: None,
27143 default: None,
27144 inferred_type: None,
27145 }));
27146 Ok(Expression::Function(Box::new(Function::new(
27147 "ARRAY_CONTAINS".to_string(),
27148 vec![cast_val, f.this],
27149 ))))
27150 }
27151 _ => Ok(Expression::ArrayContains(f)),
27152 }
27153 } else {
27154 Ok(e)
27155 }
27156 }
27157
27158 Action::ArrayExceptConvert => {
27159 if let Expression::ArrayExcept(f) = e {
27160 let source_arr = f.this;
27161 let exclude_arr = f.expression;
27162 match target {
27163 DialectType::DuckDB if matches!(source, DialectType::Snowflake) => {
27164 // Snowflake ARRAY_EXCEPT -> DuckDB bag semantics:
27165 // CASE WHEN source IS NULL OR exclude IS NULL THEN NULL
27166 // ELSE LIST_TRANSFORM(LIST_FILTER(
27167 // LIST_ZIP(source, GENERATE_SERIES(1, LENGTH(source))),
27168 // pair -> (LENGTH(LIST_FILTER(source[1:pair[2]], e -> e IS NOT DISTINCT FROM pair[1]))
27169 // > LENGTH(LIST_FILTER(exclude, e -> e IS NOT DISTINCT FROM pair[1])))),
27170 // pair -> pair[1])
27171 // END
27172
27173 // Build null check
27174 let source_is_null =
27175 Expression::IsNull(Box::new(crate::expressions::IsNull {
27176 this: source_arr.clone(),
27177 not: false,
27178 postfix_form: false,
27179 }));
27180 let exclude_is_null =
27181 Expression::IsNull(Box::new(crate::expressions::IsNull {
27182 this: exclude_arr.clone(),
27183 not: false,
27184 postfix_form: false,
27185 }));
27186 let null_check =
27187 Expression::Or(Box::new(crate::expressions::BinaryOp {
27188 left: source_is_null,
27189 right: exclude_is_null,
27190 left_comments: vec![],
27191 operator_comments: vec![],
27192 trailing_comments: vec![],
27193 inferred_type: None,
27194 }));
27195
27196 // GENERATE_SERIES(1, LENGTH(source))
27197 let gen_series = Expression::Function(Box::new(Function::new(
27198 "GENERATE_SERIES".to_string(),
27199 vec![
27200 Expression::number(1),
27201 Expression::Function(Box::new(Function::new(
27202 "LENGTH".to_string(),
27203 vec![source_arr.clone()],
27204 ))),
27205 ],
27206 )));
27207
27208 // LIST_ZIP(source, GENERATE_SERIES(1, LENGTH(source)))
27209 let list_zip = Expression::Function(Box::new(Function::new(
27210 "LIST_ZIP".to_string(),
27211 vec![source_arr.clone(), gen_series],
27212 )));
27213
27214 // pair[1] and pair[2]
27215 let pair_col = Expression::column("pair");
27216 let pair_1 = Expression::Subscript(Box::new(
27217 crate::expressions::Subscript {
27218 this: pair_col.clone(),
27219 index: Expression::number(1),
27220 },
27221 ));
27222 let pair_2 = Expression::Subscript(Box::new(
27223 crate::expressions::Subscript {
27224 this: pair_col.clone(),
27225 index: Expression::number(2),
27226 },
27227 ));
27228
27229 // source[1:pair[2]]
27230 let source_slice = Expression::ArraySlice(Box::new(
27231 crate::expressions::ArraySlice {
27232 this: source_arr.clone(),
27233 start: Some(Expression::number(1)),
27234 end: Some(pair_2),
27235 },
27236 ));
27237
27238 let e_col = Expression::column("e");
27239
27240 // e -> e IS NOT DISTINCT FROM pair[1]
27241 let inner_lambda1 =
27242 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
27243 parameters: vec![crate::expressions::Identifier::new("e")],
27244 body: Expression::NullSafeEq(Box::new(
27245 crate::expressions::BinaryOp {
27246 left: e_col.clone(),
27247 right: pair_1.clone(),
27248 left_comments: vec![],
27249 operator_comments: vec![],
27250 trailing_comments: vec![],
27251 inferred_type: None,
27252 },
27253 )),
27254 colon: false,
27255 parameter_types: vec![],
27256 }));
27257
27258 // LIST_FILTER(source[1:pair[2]], e -> e IS NOT DISTINCT FROM pair[1])
27259 let inner_filter1 = Expression::Function(Box::new(Function::new(
27260 "LIST_FILTER".to_string(),
27261 vec![source_slice, inner_lambda1],
27262 )));
27263
27264 // LENGTH(LIST_FILTER(source[1:pair[2]], ...))
27265 let len1 = Expression::Function(Box::new(Function::new(
27266 "LENGTH".to_string(),
27267 vec![inner_filter1],
27268 )));
27269
27270 // e -> e IS NOT DISTINCT FROM pair[1]
27271 let inner_lambda2 =
27272 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
27273 parameters: vec![crate::expressions::Identifier::new("e")],
27274 body: Expression::NullSafeEq(Box::new(
27275 crate::expressions::BinaryOp {
27276 left: e_col,
27277 right: pair_1.clone(),
27278 left_comments: vec![],
27279 operator_comments: vec![],
27280 trailing_comments: vec![],
27281 inferred_type: None,
27282 },
27283 )),
27284 colon: false,
27285 parameter_types: vec![],
27286 }));
27287
27288 // LIST_FILTER(exclude, e -> e IS NOT DISTINCT FROM pair[1])
27289 let inner_filter2 = Expression::Function(Box::new(Function::new(
27290 "LIST_FILTER".to_string(),
27291 vec![exclude_arr.clone(), inner_lambda2],
27292 )));
27293
27294 // LENGTH(LIST_FILTER(exclude, ...))
27295 let len2 = Expression::Function(Box::new(Function::new(
27296 "LENGTH".to_string(),
27297 vec![inner_filter2],
27298 )));
27299
27300 // (LENGTH(...) > LENGTH(...))
27301 let cond = Expression::Paren(Box::new(Paren {
27302 this: Expression::Gt(Box::new(crate::expressions::BinaryOp {
27303 left: len1,
27304 right: len2,
27305 left_comments: vec![],
27306 operator_comments: vec![],
27307 trailing_comments: vec![],
27308 inferred_type: None,
27309 })),
27310 trailing_comments: vec![],
27311 }));
27312
27313 // pair -> (condition)
27314 let filter_lambda =
27315 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
27316 parameters: vec![crate::expressions::Identifier::new(
27317 "pair",
27318 )],
27319 body: cond,
27320 colon: false,
27321 parameter_types: vec![],
27322 }));
27323
27324 // LIST_FILTER(LIST_ZIP(...), pair -> ...)
27325 let outer_filter = Expression::Function(Box::new(Function::new(
27326 "LIST_FILTER".to_string(),
27327 vec![list_zip, filter_lambda],
27328 )));
27329
27330 // pair -> pair[1]
27331 let transform_lambda =
27332 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
27333 parameters: vec![crate::expressions::Identifier::new(
27334 "pair",
27335 )],
27336 body: pair_1,
27337 colon: false,
27338 parameter_types: vec![],
27339 }));
27340
27341 // LIST_TRANSFORM(LIST_FILTER(...), pair -> pair[1])
27342 let list_transform = Expression::Function(Box::new(Function::new(
27343 "LIST_TRANSFORM".to_string(),
27344 vec![outer_filter, transform_lambda],
27345 )));
27346
27347 Ok(Expression::Case(Box::new(Case {
27348 operand: None,
27349 whens: vec![(null_check, Expression::Null(Null))],
27350 else_: Some(list_transform),
27351 comments: Vec::new(),
27352 inferred_type: None,
27353 })))
27354 }
27355 DialectType::DuckDB => {
27356 // ARRAY_EXCEPT(source, exclude) -> set semantics for DuckDB:
27357 // CASE WHEN source IS NULL OR exclude IS NULL THEN NULL
27358 // ELSE LIST_FILTER(LIST_DISTINCT(source),
27359 // e -> LENGTH(LIST_FILTER(exclude, x -> x IS NOT DISTINCT FROM e)) = 0)
27360 // END
27361
27362 // Build: source IS NULL
27363 let source_is_null =
27364 Expression::IsNull(Box::new(crate::expressions::IsNull {
27365 this: source_arr.clone(),
27366 not: false,
27367 postfix_form: false,
27368 }));
27369 // Build: exclude IS NULL
27370 let exclude_is_null =
27371 Expression::IsNull(Box::new(crate::expressions::IsNull {
27372 this: exclude_arr.clone(),
27373 not: false,
27374 postfix_form: false,
27375 }));
27376 // source IS NULL OR exclude IS NULL
27377 let null_check =
27378 Expression::Or(Box::new(crate::expressions::BinaryOp {
27379 left: source_is_null,
27380 right: exclude_is_null,
27381 left_comments: vec![],
27382 operator_comments: vec![],
27383 trailing_comments: vec![],
27384 inferred_type: None,
27385 }));
27386
27387 // LIST_DISTINCT(source)
27388 let list_distinct = Expression::Function(Box::new(Function::new(
27389 "LIST_DISTINCT".to_string(),
27390 vec![source_arr.clone()],
27391 )));
27392
27393 // x IS NOT DISTINCT FROM e
27394 let x_col = Expression::column("x");
27395 let e_col = Expression::column("e");
27396 let is_not_distinct = Expression::NullSafeEq(Box::new(
27397 crate::expressions::BinaryOp {
27398 left: x_col,
27399 right: e_col.clone(),
27400 left_comments: vec![],
27401 operator_comments: vec![],
27402 trailing_comments: vec![],
27403 inferred_type: None,
27404 },
27405 ));
27406
27407 // x -> x IS NOT DISTINCT FROM e
27408 let inner_lambda =
27409 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
27410 parameters: vec![crate::expressions::Identifier::new("x")],
27411 body: is_not_distinct,
27412 colon: false,
27413 parameter_types: vec![],
27414 }));
27415
27416 // LIST_FILTER(exclude, x -> x IS NOT DISTINCT FROM e)
27417 let inner_list_filter =
27418 Expression::Function(Box::new(Function::new(
27419 "LIST_FILTER".to_string(),
27420 vec![exclude_arr.clone(), inner_lambda],
27421 )));
27422
27423 // LENGTH(LIST_FILTER(exclude, x -> x IS NOT DISTINCT FROM e))
27424 let len_inner = Expression::Function(Box::new(Function::new(
27425 "LENGTH".to_string(),
27426 vec![inner_list_filter],
27427 )));
27428
27429 // LENGTH(...) = 0
27430 let eq_zero =
27431 Expression::Eq(Box::new(crate::expressions::BinaryOp {
27432 left: len_inner,
27433 right: Expression::number(0),
27434 left_comments: vec![],
27435 operator_comments: vec![],
27436 trailing_comments: vec![],
27437 inferred_type: None,
27438 }));
27439
27440 // e -> LENGTH(LIST_FILTER(...)) = 0
27441 let outer_lambda =
27442 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
27443 parameters: vec![crate::expressions::Identifier::new("e")],
27444 body: eq_zero,
27445 colon: false,
27446 parameter_types: vec![],
27447 }));
27448
27449 // LIST_FILTER(LIST_DISTINCT(source), e -> ...)
27450 let outer_list_filter =
27451 Expression::Function(Box::new(Function::new(
27452 "LIST_FILTER".to_string(),
27453 vec![list_distinct, outer_lambda],
27454 )));
27455
27456 // CASE WHEN ... IS NULL ... THEN NULL ELSE LIST_FILTER(...) END
27457 Ok(Expression::Case(Box::new(Case {
27458 operand: None,
27459 whens: vec![(null_check, Expression::Null(Null))],
27460 else_: Some(outer_list_filter),
27461 comments: Vec::new(),
27462 inferred_type: None,
27463 })))
27464 }
27465 DialectType::Snowflake => {
27466 // Snowflake: ARRAY_EXCEPT(source, exclude) - keep as-is
27467 Ok(Expression::ArrayExcept(Box::new(
27468 crate::expressions::BinaryFunc {
27469 this: source_arr,
27470 expression: exclude_arr,
27471 original_name: None,
27472 inferred_type: None,
27473 },
27474 )))
27475 }
27476 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
27477 // Presto/Trino: ARRAY_EXCEPT(source, exclude) - keep function name, array syntax already converted
27478 Ok(Expression::Function(Box::new(Function::new(
27479 "ARRAY_EXCEPT".to_string(),
27480 vec![source_arr, exclude_arr],
27481 ))))
27482 }
27483 _ => Ok(Expression::ArrayExcept(Box::new(
27484 crate::expressions::BinaryFunc {
27485 this: source_arr,
27486 expression: exclude_arr,
27487 original_name: None,
27488 inferred_type: None,
27489 },
27490 ))),
27491 }
27492 } else {
27493 Ok(e)
27494 }
27495 }
27496
27497 Action::RegexpLikeExasolAnchor => {
27498 // RegexpLike -> Exasol: wrap pattern with .*...*
27499 // Exasol REGEXP_LIKE does full-string match, but RLIKE/REGEXP from other
27500 // dialects does partial match, so we need to anchor with .* on both sides
27501 if let Expression::RegexpLike(mut f) = e {
27502 match &f.pattern {
27503 Expression::Literal(lit)
27504 if matches!(lit.as_ref(), Literal::String(_)) =>
27505 {
27506 let Literal::String(s) = lit.as_ref() else {
27507 unreachable!()
27508 };
27509 // String literal: wrap with .*...*
27510 f.pattern = Expression::Literal(Box::new(Literal::String(
27511 format!(".*{}.*", s),
27512 )));
27513 }
27514 _ => {
27515 // Non-literal: wrap with CONCAT('.*', pattern, '.*')
27516 f.pattern =
27517 Expression::Paren(Box::new(crate::expressions::Paren {
27518 this: Expression::Concat(Box::new(
27519 crate::expressions::BinaryOp {
27520 left: Expression::Concat(Box::new(
27521 crate::expressions::BinaryOp {
27522 left: Expression::Literal(Box::new(
27523 Literal::String(".*".to_string()),
27524 )),
27525 right: f.pattern,
27526 left_comments: vec![],
27527 operator_comments: vec![],
27528 trailing_comments: vec![],
27529 inferred_type: None,
27530 },
27531 )),
27532 right: Expression::Literal(Box::new(
27533 Literal::String(".*".to_string()),
27534 )),
27535 left_comments: vec![],
27536 operator_comments: vec![],
27537 trailing_comments: vec![],
27538 inferred_type: None,
27539 },
27540 )),
27541 trailing_comments: vec![],
27542 }));
27543 }
27544 }
27545 Ok(Expression::RegexpLike(f))
27546 } else {
27547 Ok(e)
27548 }
27549 }
27550
27551 Action::ArrayPositionSnowflakeSwap => {
27552 // ARRAY_POSITION(arr, elem) -> ARRAY_POSITION(elem, arr) for Snowflake
27553 if let Expression::ArrayPosition(f) = e {
27554 Ok(Expression::ArrayPosition(Box::new(
27555 crate::expressions::BinaryFunc {
27556 this: f.expression,
27557 expression: f.this,
27558 original_name: f.original_name,
27559 inferred_type: f.inferred_type,
27560 },
27561 )))
27562 } else {
27563 Ok(e)
27564 }
27565 }
27566
27567 Action::SnowflakeArrayPositionToDuckDB => {
27568 // Snowflake ARRAY_POSITION(value, array) -> DuckDB ARRAY_POSITION(array, value) - 1
27569 // Snowflake uses 0-based indexing, DuckDB uses 1-based
27570 // The parser has this=value, expression=array (Snowflake order)
27571 if let Expression::ArrayPosition(f) = e {
27572 // Create ARRAY_POSITION(array, value) in standard order
27573 let standard_pos =
27574 Expression::ArrayPosition(Box::new(crate::expressions::BinaryFunc {
27575 this: f.expression, // array
27576 expression: f.this, // value
27577 original_name: f.original_name,
27578 inferred_type: f.inferred_type,
27579 }));
27580 // Subtract 1 for zero-based indexing
27581 Ok(Expression::Sub(Box::new(BinaryOp {
27582 left: standard_pos,
27583 right: Expression::number(1),
27584 left_comments: vec![],
27585 operator_comments: vec![],
27586 trailing_comments: vec![],
27587 inferred_type: None,
27588 })))
27589 } else {
27590 Ok(e)
27591 }
27592 }
27593
27594 Action::ArrayDistinctConvert => {
27595 // ARRAY_DISTINCT(arr) -> DuckDB NULL-aware CASE:
27596 // CASE WHEN ARRAY_LENGTH(arr) <> LIST_COUNT(arr)
27597 // THEN LIST_APPEND(LIST_DISTINCT(LIST_FILTER(arr, _u -> NOT _u IS NULL)), NULL)
27598 // ELSE LIST_DISTINCT(arr)
27599 // END
27600 if let Expression::ArrayDistinct(f) = e {
27601 let arr = f.this;
27602
27603 // ARRAY_LENGTH(arr)
27604 let array_length = Expression::Function(Box::new(Function::new(
27605 "ARRAY_LENGTH".to_string(),
27606 vec![arr.clone()],
27607 )));
27608 // LIST_COUNT(arr)
27609 let list_count = Expression::Function(Box::new(Function::new(
27610 "LIST_COUNT".to_string(),
27611 vec![arr.clone()],
27612 )));
27613 // ARRAY_LENGTH(arr) <> LIST_COUNT(arr)
27614 let neq = Expression::Neq(Box::new(crate::expressions::BinaryOp {
27615 left: array_length,
27616 right: list_count,
27617 left_comments: vec![],
27618 operator_comments: vec![],
27619 trailing_comments: vec![],
27620 inferred_type: None,
27621 }));
27622
27623 // _u column
27624 let u_col = Expression::column("_u");
27625 // NOT _u IS NULL
27626 let u_is_null = Expression::IsNull(Box::new(crate::expressions::IsNull {
27627 this: u_col.clone(),
27628 not: false,
27629 postfix_form: false,
27630 }));
27631 let not_u_is_null =
27632 Expression::Not(Box::new(crate::expressions::UnaryOp {
27633 this: u_is_null,
27634 inferred_type: None,
27635 }));
27636 // _u -> NOT _u IS NULL
27637 let filter_lambda =
27638 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
27639 parameters: vec![crate::expressions::Identifier::new("_u")],
27640 body: not_u_is_null,
27641 colon: false,
27642 parameter_types: vec![],
27643 }));
27644 // LIST_FILTER(arr, _u -> NOT _u IS NULL)
27645 let list_filter = Expression::Function(Box::new(Function::new(
27646 "LIST_FILTER".to_string(),
27647 vec![arr.clone(), filter_lambda],
27648 )));
27649 // LIST_DISTINCT(LIST_FILTER(arr, ...))
27650 let list_distinct_filtered = Expression::Function(Box::new(Function::new(
27651 "LIST_DISTINCT".to_string(),
27652 vec![list_filter],
27653 )));
27654 // LIST_APPEND(LIST_DISTINCT(LIST_FILTER(...)), NULL)
27655 let list_append = Expression::Function(Box::new(Function::new(
27656 "LIST_APPEND".to_string(),
27657 vec![list_distinct_filtered, Expression::Null(Null)],
27658 )));
27659
27660 // LIST_DISTINCT(arr)
27661 let list_distinct = Expression::Function(Box::new(Function::new(
27662 "LIST_DISTINCT".to_string(),
27663 vec![arr],
27664 )));
27665
27666 // CASE WHEN neq THEN list_append ELSE list_distinct END
27667 Ok(Expression::Case(Box::new(Case {
27668 operand: None,
27669 whens: vec![(neq, list_append)],
27670 else_: Some(list_distinct),
27671 comments: Vec::new(),
27672 inferred_type: None,
27673 })))
27674 } else {
27675 Ok(e)
27676 }
27677 }
27678
27679 Action::ArrayDistinctClickHouse => {
27680 // ARRAY_DISTINCT(arr) -> arrayDistinct(arr) for ClickHouse
27681 if let Expression::ArrayDistinct(f) = e {
27682 Ok(Expression::Function(Box::new(Function::new(
27683 "arrayDistinct".to_string(),
27684 vec![f.this],
27685 ))))
27686 } else {
27687 Ok(e)
27688 }
27689 }
27690
27691 Action::ArrayContainsDuckDBConvert => {
27692 // Snowflake ARRAY_CONTAINS(value, array) -> DuckDB NULL-aware:
27693 // CASE WHEN value IS NULL
27694 // THEN NULLIF(ARRAY_LENGTH(array) <> LIST_COUNT(array), FALSE)
27695 // ELSE ARRAY_CONTAINS(array, value)
27696 // END
27697 // Note: In Rust AST from Snowflake parse, this=value (first arg), expression=array (second arg)
27698 if let Expression::ArrayContains(f) = e {
27699 let value = f.this;
27700 let array = f.expression;
27701
27702 // value IS NULL
27703 let value_is_null =
27704 Expression::IsNull(Box::new(crate::expressions::IsNull {
27705 this: value.clone(),
27706 not: false,
27707 postfix_form: false,
27708 }));
27709
27710 // ARRAY_LENGTH(array)
27711 let array_length = Expression::Function(Box::new(Function::new(
27712 "ARRAY_LENGTH".to_string(),
27713 vec![array.clone()],
27714 )));
27715 // LIST_COUNT(array)
27716 let list_count = Expression::Function(Box::new(Function::new(
27717 "LIST_COUNT".to_string(),
27718 vec![array.clone()],
27719 )));
27720 // ARRAY_LENGTH(array) <> LIST_COUNT(array)
27721 let neq = Expression::Neq(Box::new(crate::expressions::BinaryOp {
27722 left: array_length,
27723 right: list_count,
27724 left_comments: vec![],
27725 operator_comments: vec![],
27726 trailing_comments: vec![],
27727 inferred_type: None,
27728 }));
27729 // NULLIF(ARRAY_LENGTH(array) <> LIST_COUNT(array), FALSE)
27730 let nullif = Expression::Nullif(Box::new(crate::expressions::Nullif {
27731 this: Box::new(neq),
27732 expression: Box::new(Expression::Boolean(
27733 crate::expressions::BooleanLiteral { value: false },
27734 )),
27735 }));
27736
27737 // ARRAY_CONTAINS(array, value) - DuckDB syntax: array first, value second
27738 let array_contains = Expression::Function(Box::new(Function::new(
27739 "ARRAY_CONTAINS".to_string(),
27740 vec![array, value],
27741 )));
27742
27743 // CASE WHEN value IS NULL THEN NULLIF(...) ELSE ARRAY_CONTAINS(array, value) END
27744 Ok(Expression::Case(Box::new(Case {
27745 operand: None,
27746 whens: vec![(value_is_null, nullif)],
27747 else_: Some(array_contains),
27748 comments: Vec::new(),
27749 inferred_type: None,
27750 })))
27751 } else {
27752 Ok(e)
27753 }
27754 }
27755
27756 Action::StrPositionExpand => {
27757 // StrPosition with position arg -> complex STRPOS expansion for Presto/DuckDB
27758 // For Presto: IF(STRPOS(SUBSTRING(str, pos), substr) = 0, 0, STRPOS(SUBSTRING(str, pos), substr) + pos - 1)
27759 // For DuckDB: CASE WHEN STRPOS(SUBSTRING(str, pos), substr) = 0 THEN 0 ELSE STRPOS(SUBSTRING(str, pos), substr) + pos - 1 END
27760 if let Expression::StrPosition(sp) = e {
27761 let crate::expressions::StrPosition {
27762 this,
27763 substr,
27764 position,
27765 occurrence,
27766 } = *sp;
27767 let string = *this;
27768 let substr_expr = match substr {
27769 Some(s) => *s,
27770 None => Expression::Null(Null),
27771 };
27772 let pos = match position {
27773 Some(p) => *p,
27774 None => Expression::number(1),
27775 };
27776
27777 // SUBSTRING(string, pos)
27778 let substring_call = Expression::Function(Box::new(Function::new(
27779 "SUBSTRING".to_string(),
27780 vec![string.clone(), pos.clone()],
27781 )));
27782 // STRPOS(SUBSTRING(string, pos), substr)
27783 let strpos_call = Expression::Function(Box::new(Function::new(
27784 "STRPOS".to_string(),
27785 vec![substring_call, substr_expr.clone()],
27786 )));
27787 // STRPOS(...) + pos - 1
27788 let pos_adjusted =
27789 Expression::Sub(Box::new(crate::expressions::BinaryOp::new(
27790 Expression::Add(Box::new(crate::expressions::BinaryOp::new(
27791 strpos_call.clone(),
27792 pos.clone(),
27793 ))),
27794 Expression::number(1),
27795 )));
27796 // STRPOS(...) = 0
27797 let is_zero = Expression::Eq(Box::new(crate::expressions::BinaryOp::new(
27798 strpos_call.clone(),
27799 Expression::number(0),
27800 )));
27801
27802 match target {
27803 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
27804 // IF(STRPOS(SUBSTRING(str, pos), substr) = 0, 0, STRPOS(SUBSTRING(str, pos), substr) + pos - 1)
27805 Ok(Expression::Function(Box::new(Function::new(
27806 "IF".to_string(),
27807 vec![is_zero, Expression::number(0), pos_adjusted],
27808 ))))
27809 }
27810 DialectType::DuckDB => {
27811 // CASE WHEN STRPOS(SUBSTRING(str, pos), substr) = 0 THEN 0 ELSE STRPOS(SUBSTRING(str, pos), substr) + pos - 1 END
27812 Ok(Expression::Case(Box::new(Case {
27813 operand: None,
27814 whens: vec![(is_zero, Expression::number(0))],
27815 else_: Some(pos_adjusted),
27816 comments: Vec::new(),
27817 inferred_type: None,
27818 })))
27819 }
27820 _ => {
27821 // Reconstruct StrPosition
27822 Ok(Expression::StrPosition(Box::new(
27823 crate::expressions::StrPosition {
27824 this: Box::new(string),
27825 substr: Some(Box::new(substr_expr)),
27826 position: Some(Box::new(pos)),
27827 occurrence,
27828 },
27829 )))
27830 }
27831 }
27832 } else {
27833 Ok(e)
27834 }
27835 }
27836
27837 Action::MonthsBetweenConvert => {
27838 if let Expression::MonthsBetween(mb) = e {
27839 let crate::expressions::BinaryFunc {
27840 this: end_date,
27841 expression: start_date,
27842 ..
27843 } = *mb;
27844 match target {
27845 DialectType::DuckDB => {
27846 let cast_end = Self::ensure_cast_date(end_date);
27847 let cast_start = Self::ensure_cast_date(start_date);
27848 let dd = Expression::Function(Box::new(Function::new(
27849 "DATE_DIFF".to_string(),
27850 vec![
27851 Expression::string("MONTH"),
27852 cast_start.clone(),
27853 cast_end.clone(),
27854 ],
27855 )));
27856 let day_end = Expression::Function(Box::new(Function::new(
27857 "DAY".to_string(),
27858 vec![cast_end.clone()],
27859 )));
27860 let day_start = Expression::Function(Box::new(Function::new(
27861 "DAY".to_string(),
27862 vec![cast_start.clone()],
27863 )));
27864 let last_day_end = Expression::Function(Box::new(Function::new(
27865 "LAST_DAY".to_string(),
27866 vec![cast_end.clone()],
27867 )));
27868 let last_day_start = Expression::Function(Box::new(Function::new(
27869 "LAST_DAY".to_string(),
27870 vec![cast_start.clone()],
27871 )));
27872 let day_last_end = Expression::Function(Box::new(Function::new(
27873 "DAY".to_string(),
27874 vec![last_day_end],
27875 )));
27876 let day_last_start = Expression::Function(Box::new(Function::new(
27877 "DAY".to_string(),
27878 vec![last_day_start],
27879 )));
27880 let cond1 = Expression::Eq(Box::new(BinaryOp::new(
27881 day_end.clone(),
27882 day_last_end,
27883 )));
27884 let cond2 = Expression::Eq(Box::new(BinaryOp::new(
27885 day_start.clone(),
27886 day_last_start,
27887 )));
27888 let both_cond =
27889 Expression::And(Box::new(BinaryOp::new(cond1, cond2)));
27890 let day_diff =
27891 Expression::Sub(Box::new(BinaryOp::new(day_end, day_start)));
27892 let day_diff_paren =
27893 Expression::Paren(Box::new(crate::expressions::Paren {
27894 this: day_diff,
27895 trailing_comments: Vec::new(),
27896 }));
27897 let frac = Expression::Div(Box::new(BinaryOp::new(
27898 day_diff_paren,
27899 Expression::Literal(Box::new(Literal::Number(
27900 "31.0".to_string(),
27901 ))),
27902 )));
27903 let case_expr = Expression::Case(Box::new(Case {
27904 operand: None,
27905 whens: vec![(both_cond, Expression::number(0))],
27906 else_: Some(frac),
27907 comments: Vec::new(),
27908 inferred_type: None,
27909 }));
27910 Ok(Expression::Add(Box::new(BinaryOp::new(dd, case_expr))))
27911 }
27912 DialectType::Snowflake | DialectType::Redshift => {
27913 let unit = Expression::Identifier(Identifier::new("MONTH"));
27914 Ok(Expression::Function(Box::new(Function::new(
27915 "DATEDIFF".to_string(),
27916 vec![unit, start_date, end_date],
27917 ))))
27918 }
27919 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
27920 Ok(Expression::Function(Box::new(Function::new(
27921 "DATE_DIFF".to_string(),
27922 vec![Expression::string("MONTH"), start_date, end_date],
27923 ))))
27924 }
27925 _ => Ok(Expression::MonthsBetween(Box::new(
27926 crate::expressions::BinaryFunc {
27927 this: end_date,
27928 expression: start_date,
27929 original_name: None,
27930 inferred_type: None,
27931 },
27932 ))),
27933 }
27934 } else {
27935 Ok(e)
27936 }
27937 }
27938
27939 Action::AddMonthsConvert => {
27940 if let Expression::AddMonths(am) = e {
27941 let date = am.this;
27942 let val = am.expression;
27943 match target {
27944 DialectType::TSQL | DialectType::Fabric => {
27945 let cast_date = Self::ensure_cast_datetime2(date);
27946 Ok(Expression::Function(Box::new(Function::new(
27947 "DATEADD".to_string(),
27948 vec![
27949 Expression::Identifier(Identifier::new("MONTH")),
27950 val,
27951 cast_date,
27952 ],
27953 ))))
27954 }
27955 DialectType::DuckDB if matches!(source, DialectType::Snowflake) => {
27956 // DuckDB ADD_MONTHS from Snowflake: CASE WHEN LAST_DAY(date) = date THEN LAST_DAY(date + interval) ELSE date + interval END
27957 // Optionally wrapped in CAST(... AS type) if the input had a specific type
27958
27959 // Determine the cast type from the date expression
27960 let (cast_date, return_type) = match &date {
27961 Expression::Literal(lit)
27962 if matches!(lit.as_ref(), Literal::String(_)) =>
27963 {
27964 // String literal: CAST(str AS TIMESTAMP), no outer CAST
27965 (
27966 Expression::Cast(Box::new(Cast {
27967 this: date.clone(),
27968 to: DataType::Timestamp {
27969 precision: None,
27970 timezone: false,
27971 },
27972 trailing_comments: Vec::new(),
27973 double_colon_syntax: false,
27974 format: None,
27975 default: None,
27976 inferred_type: None,
27977 })),
27978 None,
27979 )
27980 }
27981 Expression::Cast(c) => {
27982 // Already cast (e.g., '2023-01-31'::DATE) - keep the cast, wrap result in CAST(... AS type)
27983 (date.clone(), Some(c.to.clone()))
27984 }
27985 _ => {
27986 // Expression or NULL::TYPE - keep as-is, check for cast type
27987 if let Expression::Cast(c) = &date {
27988 (date.clone(), Some(c.to.clone()))
27989 } else {
27990 (date.clone(), None)
27991 }
27992 }
27993 };
27994
27995 // Build the interval expression
27996 // For non-integer values (float, decimal, cast), use TO_MONTHS(CAST(ROUND(val) AS INT))
27997 // For integer values, use INTERVAL val MONTH
27998 let is_non_integer_val = match &val {
27999 Expression::Literal(lit)
28000 if matches!(lit.as_ref(), Literal::Number(_)) =>
28001 {
28002 let Literal::Number(n) = lit.as_ref() else {
28003 unreachable!()
28004 };
28005 n.contains('.')
28006 }
28007 Expression::Cast(_) => true, // e.g., 3.2::DECIMAL(10,2)
28008 Expression::Neg(n) => {
28009 if let Expression::Literal(lit) = &n.this {
28010 if let Literal::Number(s) = lit.as_ref() {
28011 s.contains('.')
28012 } else {
28013 false
28014 }
28015 } else {
28016 false
28017 }
28018 }
28019 _ => false,
28020 };
28021
28022 let add_interval = if is_non_integer_val {
28023 // TO_MONTHS(CAST(ROUND(val) AS INT))
28024 let round_val = Expression::Function(Box::new(Function::new(
28025 "ROUND".to_string(),
28026 vec![val.clone()],
28027 )));
28028 let cast_int = Expression::Cast(Box::new(Cast {
28029 this: round_val,
28030 to: DataType::Int {
28031 length: None,
28032 integer_spelling: false,
28033 },
28034 trailing_comments: Vec::new(),
28035 double_colon_syntax: false,
28036 format: None,
28037 default: None,
28038 inferred_type: None,
28039 }));
28040 Expression::Function(Box::new(Function::new(
28041 "TO_MONTHS".to_string(),
28042 vec![cast_int],
28043 )))
28044 } else {
28045 // INTERVAL val MONTH
28046 // For negative numbers, wrap in parens
28047 let interval_val = match &val {
28048 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(n) if n.starts_with('-')) =>
28049 {
28050 let Literal::Number(_) = lit.as_ref() else {
28051 unreachable!()
28052 };
28053 Expression::Paren(Box::new(Paren {
28054 this: val.clone(),
28055 trailing_comments: Vec::new(),
28056 }))
28057 }
28058 Expression::Neg(_) => Expression::Paren(Box::new(Paren {
28059 this: val.clone(),
28060 trailing_comments: Vec::new(),
28061 })),
28062 Expression::Null(_) => Expression::Paren(Box::new(Paren {
28063 this: val.clone(),
28064 trailing_comments: Vec::new(),
28065 })),
28066 _ => val.clone(),
28067 };
28068 Expression::Interval(Box::new(crate::expressions::Interval {
28069 this: Some(interval_val),
28070 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
28071 unit: crate::expressions::IntervalUnit::Month,
28072 use_plural: false,
28073 }),
28074 }))
28075 };
28076
28077 // Build: date + interval
28078 let date_plus_interval = Expression::Add(Box::new(BinaryOp::new(
28079 cast_date.clone(),
28080 add_interval.clone(),
28081 )));
28082
28083 // Build LAST_DAY(date)
28084 let last_day_date = Expression::Function(Box::new(Function::new(
28085 "LAST_DAY".to_string(),
28086 vec![cast_date.clone()],
28087 )));
28088
28089 // Build LAST_DAY(date + interval)
28090 let last_day_date_plus =
28091 Expression::Function(Box::new(Function::new(
28092 "LAST_DAY".to_string(),
28093 vec![date_plus_interval.clone()],
28094 )));
28095
28096 // Build: CASE WHEN LAST_DAY(date) = date THEN LAST_DAY(date + interval) ELSE date + interval END
28097 let case_expr = Expression::Case(Box::new(Case {
28098 operand: None,
28099 whens: vec![(
28100 Expression::Eq(Box::new(BinaryOp::new(
28101 last_day_date,
28102 cast_date.clone(),
28103 ))),
28104 last_day_date_plus,
28105 )],
28106 else_: Some(date_plus_interval),
28107 comments: Vec::new(),
28108 inferred_type: None,
28109 }));
28110
28111 // Wrap in CAST(... AS type) if needed
28112 if let Some(dt) = return_type {
28113 Ok(Expression::Cast(Box::new(Cast {
28114 this: case_expr,
28115 to: dt,
28116 trailing_comments: Vec::new(),
28117 double_colon_syntax: false,
28118 format: None,
28119 default: None,
28120 inferred_type: None,
28121 })))
28122 } else {
28123 Ok(case_expr)
28124 }
28125 }
28126 DialectType::DuckDB => {
28127 // Non-Snowflake source: simple date + INTERVAL
28128 let cast_date = if matches!(&date, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)))
28129 {
28130 Expression::Cast(Box::new(Cast {
28131 this: date,
28132 to: DataType::Timestamp {
28133 precision: None,
28134 timezone: false,
28135 },
28136 trailing_comments: Vec::new(),
28137 double_colon_syntax: false,
28138 format: None,
28139 default: None,
28140 inferred_type: None,
28141 }))
28142 } else {
28143 date
28144 };
28145 let interval =
28146 Expression::Interval(Box::new(crate::expressions::Interval {
28147 this: Some(val),
28148 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
28149 unit: crate::expressions::IntervalUnit::Month,
28150 use_plural: false,
28151 }),
28152 }));
28153 Ok(Expression::Add(Box::new(BinaryOp::new(
28154 cast_date, interval,
28155 ))))
28156 }
28157 DialectType::Snowflake => {
28158 // Keep ADD_MONTHS when source is also Snowflake
28159 if matches!(source, DialectType::Snowflake) {
28160 Ok(Expression::Function(Box::new(Function::new(
28161 "ADD_MONTHS".to_string(),
28162 vec![date, val],
28163 ))))
28164 } else {
28165 Ok(Expression::Function(Box::new(Function::new(
28166 "DATEADD".to_string(),
28167 vec![
28168 Expression::Identifier(Identifier::new("MONTH")),
28169 val,
28170 date,
28171 ],
28172 ))))
28173 }
28174 }
28175 DialectType::Redshift => {
28176 Ok(Expression::Function(Box::new(Function::new(
28177 "DATEADD".to_string(),
28178 vec![
28179 Expression::Identifier(Identifier::new("MONTH")),
28180 val,
28181 date,
28182 ],
28183 ))))
28184 }
28185 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
28186 let cast_date = if matches!(&date, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)))
28187 {
28188 Expression::Cast(Box::new(Cast {
28189 this: date,
28190 to: DataType::Timestamp {
28191 precision: None,
28192 timezone: false,
28193 },
28194 trailing_comments: Vec::new(),
28195 double_colon_syntax: false,
28196 format: None,
28197 default: None,
28198 inferred_type: None,
28199 }))
28200 } else {
28201 date
28202 };
28203 Ok(Expression::Function(Box::new(Function::new(
28204 "DATE_ADD".to_string(),
28205 vec![Expression::string("MONTH"), val, cast_date],
28206 ))))
28207 }
28208 DialectType::BigQuery => {
28209 let interval =
28210 Expression::Interval(Box::new(crate::expressions::Interval {
28211 this: Some(val),
28212 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
28213 unit: crate::expressions::IntervalUnit::Month,
28214 use_plural: false,
28215 }),
28216 }));
28217 let cast_date = if matches!(&date, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)))
28218 {
28219 Expression::Cast(Box::new(Cast {
28220 this: date,
28221 to: DataType::Custom {
28222 name: "DATETIME".to_string(),
28223 },
28224 trailing_comments: Vec::new(),
28225 double_colon_syntax: false,
28226 format: None,
28227 default: None,
28228 inferred_type: None,
28229 }))
28230 } else {
28231 date
28232 };
28233 Ok(Expression::Function(Box::new(Function::new(
28234 "DATE_ADD".to_string(),
28235 vec![cast_date, interval],
28236 ))))
28237 }
28238 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
28239 Ok(Expression::Function(Box::new(Function::new(
28240 "ADD_MONTHS".to_string(),
28241 vec![date, val],
28242 ))))
28243 }
28244 _ => {
28245 // Default: keep as AddMonths expression
28246 Ok(Expression::AddMonths(Box::new(
28247 crate::expressions::BinaryFunc {
28248 this: date,
28249 expression: val,
28250 original_name: None,
28251 inferred_type: None,
28252 },
28253 )))
28254 }
28255 }
28256 } else {
28257 Ok(e)
28258 }
28259 }
28260
28261 Action::PercentileContConvert => {
28262 // PERCENTILE_CONT(p) WITHIN GROUP (ORDER BY col) ->
28263 // Presto/Trino: APPROX_PERCENTILE(col, p)
28264 // Spark/Databricks: PERCENTILE_APPROX(col, p)
28265 if let Expression::WithinGroup(wg) = e {
28266 // Extract percentile value and order by column
28267 let (percentile, _is_disc) = match &wg.this {
28268 Expression::Function(f) => {
28269 let is_disc = f.name.eq_ignore_ascii_case("PERCENTILE_DISC");
28270 let pct = f.args.first().cloned().unwrap_or(Expression::Literal(
28271 Box::new(Literal::Number("0.5".to_string())),
28272 ));
28273 (pct, is_disc)
28274 }
28275 Expression::AggregateFunction(af) => {
28276 let is_disc = af.name.eq_ignore_ascii_case("PERCENTILE_DISC");
28277 let pct = af.args.first().cloned().unwrap_or(Expression::Literal(
28278 Box::new(Literal::Number("0.5".to_string())),
28279 ));
28280 (pct, is_disc)
28281 }
28282 Expression::PercentileCont(pc) => (pc.percentile.clone(), false),
28283 _ => return Ok(Expression::WithinGroup(wg)),
28284 };
28285 let col = wg.order_by.first().map(|o| o.this.clone()).unwrap_or(
28286 Expression::Literal(Box::new(Literal::Number("1".to_string()))),
28287 );
28288
28289 let func_name = match target {
28290 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
28291 "APPROX_PERCENTILE"
28292 }
28293 _ => "PERCENTILE_APPROX", // Spark, Databricks
28294 };
28295 Ok(Expression::Function(Box::new(Function::new(
28296 func_name.to_string(),
28297 vec![col, percentile],
28298 ))))
28299 } else {
28300 Ok(e)
28301 }
28302 }
28303
28304 Action::CurrentUserSparkParens => {
28305 // CURRENT_USER -> CURRENT_USER() for Spark
28306 if let Expression::CurrentUser(_) = e {
28307 Ok(Expression::Function(Box::new(Function::new(
28308 "CURRENT_USER".to_string(),
28309 vec![],
28310 ))))
28311 } else {
28312 Ok(e)
28313 }
28314 }
28315
28316 Action::SparkDateFuncCast => {
28317 // MONTH/YEAR/DAY('string') from Spark -> wrap arg in CAST to DATE
28318 let cast_arg = |arg: Expression| -> Expression {
28319 match target {
28320 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
28321 Self::double_cast_timestamp_date(arg)
28322 }
28323 _ => {
28324 // DuckDB, PostgreSQL, etc: CAST(arg AS DATE)
28325 Self::ensure_cast_date(arg)
28326 }
28327 }
28328 };
28329 match e {
28330 Expression::Month(f) => Ok(Expression::Month(Box::new(
28331 crate::expressions::UnaryFunc::new(cast_arg(f.this)),
28332 ))),
28333 Expression::Year(f) => Ok(Expression::Year(Box::new(
28334 crate::expressions::UnaryFunc::new(cast_arg(f.this)),
28335 ))),
28336 Expression::Day(f) => Ok(Expression::Day(Box::new(
28337 crate::expressions::UnaryFunc::new(cast_arg(f.this)),
28338 ))),
28339 other => Ok(other),
28340 }
28341 }
28342
28343 Action::MapFromArraysConvert => {
28344 // Expression::MapFromArrays -> target-specific
28345 if let Expression::MapFromArrays(mfa) = e {
28346 let keys = mfa.this;
28347 let values = mfa.expression;
28348 match target {
28349 DialectType::Snowflake => Ok(Expression::Function(Box::new(
28350 Function::new("OBJECT_CONSTRUCT".to_string(), vec![keys, values]),
28351 ))),
28352 _ => {
28353 // Hive, Presto, DuckDB, etc.: MAP(keys, values)
28354 Ok(Expression::Function(Box::new(Function::new(
28355 "MAP".to_string(),
28356 vec![keys, values],
28357 ))))
28358 }
28359 }
28360 } else {
28361 Ok(e)
28362 }
28363 }
28364
28365 Action::AnyToExists => {
28366 if let Expression::Any(q) = e {
28367 if let Some(op) = q.op.clone() {
28368 let lambda_param = crate::expressions::Identifier::new("x");
28369 let rhs = Expression::Identifier(lambda_param.clone());
28370 let body = match op {
28371 crate::expressions::QuantifiedOp::Eq => {
28372 Expression::Eq(Box::new(BinaryOp::new(q.this, rhs)))
28373 }
28374 crate::expressions::QuantifiedOp::Neq => {
28375 Expression::Neq(Box::new(BinaryOp::new(q.this, rhs)))
28376 }
28377 crate::expressions::QuantifiedOp::Lt => {
28378 Expression::Lt(Box::new(BinaryOp::new(q.this, rhs)))
28379 }
28380 crate::expressions::QuantifiedOp::Lte => {
28381 Expression::Lte(Box::new(BinaryOp::new(q.this, rhs)))
28382 }
28383 crate::expressions::QuantifiedOp::Gt => {
28384 Expression::Gt(Box::new(BinaryOp::new(q.this, rhs)))
28385 }
28386 crate::expressions::QuantifiedOp::Gte => {
28387 Expression::Gte(Box::new(BinaryOp::new(q.this, rhs)))
28388 }
28389 };
28390 let lambda =
28391 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
28392 parameters: vec![lambda_param],
28393 body,
28394 colon: false,
28395 parameter_types: Vec::new(),
28396 }));
28397 Ok(Expression::Function(Box::new(Function::new(
28398 "EXISTS".to_string(),
28399 vec![q.subquery, lambda],
28400 ))))
28401 } else {
28402 Ok(Expression::Any(q))
28403 }
28404 } else {
28405 Ok(e)
28406 }
28407 }
28408
28409 Action::GenerateSeriesConvert => {
28410 // GENERATE_SERIES(start, end[, step]) -> SEQUENCE for Spark/Databricks/Hive, wrapped in UNNEST/EXPLODE
28411 // For DuckDB target: wrap in UNNEST(GENERATE_SERIES(...))
28412 // For PG/Redshift target: keep as GENERATE_SERIES but normalize interval string step
28413 if let Expression::Function(f) = e {
28414 if f.name.eq_ignore_ascii_case("GENERATE_SERIES") && f.args.len() >= 2 {
28415 let start = f.args[0].clone();
28416 let end = f.args[1].clone();
28417 let step = f.args.get(2).cloned();
28418
28419 // Normalize step: convert string interval like '1day' or ' 2 days ' to INTERVAL expression
28420 let step = step.map(|s| Self::normalize_interval_string(s, target));
28421
28422 // Helper: wrap CURRENT_TIMESTAMP in CAST(... AS TIMESTAMP) for Presto/Trino/Spark
28423 let maybe_cast_timestamp = |arg: Expression| -> Expression {
28424 if matches!(
28425 target,
28426 DialectType::Presto
28427 | DialectType::Trino
28428 | DialectType::Athena
28429 | DialectType::Spark
28430 | DialectType::Databricks
28431 | DialectType::Hive
28432 ) {
28433 match &arg {
28434 Expression::CurrentTimestamp(_) => {
28435 Expression::Cast(Box::new(Cast {
28436 this: arg,
28437 to: DataType::Timestamp {
28438 precision: None,
28439 timezone: false,
28440 },
28441 trailing_comments: Vec::new(),
28442 double_colon_syntax: false,
28443 format: None,
28444 default: None,
28445 inferred_type: None,
28446 }))
28447 }
28448 _ => arg,
28449 }
28450 } else {
28451 arg
28452 }
28453 };
28454
28455 let start = maybe_cast_timestamp(start);
28456 let end = maybe_cast_timestamp(end);
28457
28458 // For PostgreSQL/Redshift target, keep as GENERATE_SERIES
28459 if matches!(target, DialectType::PostgreSQL | DialectType::Redshift) {
28460 let mut gs_args = vec![start, end];
28461 if let Some(step) = step {
28462 gs_args.push(step);
28463 }
28464 return Ok(Expression::Function(Box::new(Function::new(
28465 "GENERATE_SERIES".to_string(),
28466 gs_args,
28467 ))));
28468 }
28469
28470 // For DuckDB target: wrap in UNNEST(GENERATE_SERIES(...))
28471 if matches!(target, DialectType::DuckDB) {
28472 let mut gs_args = vec![start, end];
28473 if let Some(step) = step {
28474 gs_args.push(step);
28475 }
28476 let gs = Expression::Function(Box::new(Function::new(
28477 "GENERATE_SERIES".to_string(),
28478 gs_args,
28479 )));
28480 return Ok(Expression::Function(Box::new(Function::new(
28481 "UNNEST".to_string(),
28482 vec![gs],
28483 ))));
28484 }
28485
28486 let mut seq_args = vec![start, end];
28487 if let Some(step) = step {
28488 seq_args.push(step);
28489 }
28490
28491 let seq = Expression::Function(Box::new(Function::new(
28492 "SEQUENCE".to_string(),
28493 seq_args,
28494 )));
28495
28496 match target {
28497 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
28498 // Wrap in UNNEST
28499 Ok(Expression::Function(Box::new(Function::new(
28500 "UNNEST".to_string(),
28501 vec![seq],
28502 ))))
28503 }
28504 DialectType::Spark
28505 | DialectType::Databricks
28506 | DialectType::Hive => {
28507 // Wrap in EXPLODE
28508 Ok(Expression::Function(Box::new(Function::new(
28509 "EXPLODE".to_string(),
28510 vec![seq],
28511 ))))
28512 }
28513 _ => {
28514 // Just SEQUENCE for others
28515 Ok(seq)
28516 }
28517 }
28518 } else {
28519 Ok(Expression::Function(f))
28520 }
28521 } else {
28522 Ok(e)
28523 }
28524 }
28525
28526 Action::ConcatCoalesceWrap => {
28527 // CONCAT(a, b) function -> CONCAT(COALESCE(CAST(a AS VARCHAR), ''), ...) for Presto
28528 // CONCAT(a, b) function -> CONCAT(COALESCE(a, ''), ...) for ClickHouse
28529 if let Expression::Function(f) = e {
28530 if f.name.eq_ignore_ascii_case("CONCAT") {
28531 let new_args: Vec<Expression> = f
28532 .args
28533 .into_iter()
28534 .map(|arg| {
28535 let cast_arg = if matches!(
28536 target,
28537 DialectType::Presto
28538 | DialectType::Trino
28539 | DialectType::Athena
28540 ) {
28541 Expression::Cast(Box::new(Cast {
28542 this: arg,
28543 to: DataType::VarChar {
28544 length: None,
28545 parenthesized_length: false,
28546 },
28547 trailing_comments: Vec::new(),
28548 double_colon_syntax: false,
28549 format: None,
28550 default: None,
28551 inferred_type: None,
28552 }))
28553 } else {
28554 arg
28555 };
28556 Expression::Function(Box::new(Function::new(
28557 "COALESCE".to_string(),
28558 vec![cast_arg, Expression::string("")],
28559 )))
28560 })
28561 .collect();
28562 Ok(Expression::Function(Box::new(Function::new(
28563 "CONCAT".to_string(),
28564 new_args,
28565 ))))
28566 } else {
28567 Ok(Expression::Function(f))
28568 }
28569 } else {
28570 Ok(e)
28571 }
28572 }
28573
28574 Action::PipeConcatToConcat => {
28575 // a || b (Concat operator) -> CONCAT(CAST(a AS VARCHAR), CAST(b AS VARCHAR)) for Presto/Trino
28576 if let Expression::Concat(op) = e {
28577 let cast_left = Expression::Cast(Box::new(Cast {
28578 this: op.left,
28579 to: DataType::VarChar {
28580 length: None,
28581 parenthesized_length: false,
28582 },
28583 trailing_comments: Vec::new(),
28584 double_colon_syntax: false,
28585 format: None,
28586 default: None,
28587 inferred_type: None,
28588 }));
28589 let cast_right = Expression::Cast(Box::new(Cast {
28590 this: op.right,
28591 to: DataType::VarChar {
28592 length: None,
28593 parenthesized_length: false,
28594 },
28595 trailing_comments: Vec::new(),
28596 double_colon_syntax: false,
28597 format: None,
28598 default: None,
28599 inferred_type: None,
28600 }));
28601 Ok(Expression::Function(Box::new(Function::new(
28602 "CONCAT".to_string(),
28603 vec![cast_left, cast_right],
28604 ))))
28605 } else {
28606 Ok(e)
28607 }
28608 }
28609
28610 Action::DivFuncConvert => {
28611 // DIV(a, b) -> target-specific integer division
28612 if let Expression::Function(f) = e {
28613 if f.name.eq_ignore_ascii_case("DIV") && f.args.len() == 2 {
28614 let a = f.args[0].clone();
28615 let b = f.args[1].clone();
28616 match target {
28617 DialectType::DuckDB => {
28618 // DIV(a, b) -> CAST(a // b AS DECIMAL)
28619 let int_div = Expression::IntDiv(Box::new(
28620 crate::expressions::BinaryFunc {
28621 this: a,
28622 expression: b,
28623 original_name: None,
28624 inferred_type: None,
28625 },
28626 ));
28627 Ok(Expression::Cast(Box::new(Cast {
28628 this: int_div,
28629 to: DataType::Decimal {
28630 precision: None,
28631 scale: None,
28632 },
28633 trailing_comments: Vec::new(),
28634 double_colon_syntax: false,
28635 format: None,
28636 default: None,
28637 inferred_type: None,
28638 })))
28639 }
28640 DialectType::BigQuery => {
28641 // DIV(a, b) -> CAST(DIV(a, b) AS NUMERIC)
28642 let div_func = Expression::Function(Box::new(Function::new(
28643 "DIV".to_string(),
28644 vec![a, b],
28645 )));
28646 Ok(Expression::Cast(Box::new(Cast {
28647 this: div_func,
28648 to: DataType::Custom {
28649 name: "NUMERIC".to_string(),
28650 },
28651 trailing_comments: Vec::new(),
28652 double_colon_syntax: false,
28653 format: None,
28654 default: None,
28655 inferred_type: None,
28656 })))
28657 }
28658 DialectType::SQLite => {
28659 // DIV(a, b) -> CAST(CAST(CAST(a AS REAL) / b AS INTEGER) AS REAL)
28660 let cast_a = Expression::Cast(Box::new(Cast {
28661 this: a,
28662 to: DataType::Custom {
28663 name: "REAL".to_string(),
28664 },
28665 trailing_comments: Vec::new(),
28666 double_colon_syntax: false,
28667 format: None,
28668 default: None,
28669 inferred_type: None,
28670 }));
28671 let div = Expression::Div(Box::new(BinaryOp::new(cast_a, b)));
28672 let cast_int = Expression::Cast(Box::new(Cast {
28673 this: div,
28674 to: DataType::Int {
28675 length: None,
28676 integer_spelling: true,
28677 },
28678 trailing_comments: Vec::new(),
28679 double_colon_syntax: false,
28680 format: None,
28681 default: None,
28682 inferred_type: None,
28683 }));
28684 Ok(Expression::Cast(Box::new(Cast {
28685 this: cast_int,
28686 to: DataType::Custom {
28687 name: "REAL".to_string(),
28688 },
28689 trailing_comments: Vec::new(),
28690 double_colon_syntax: false,
28691 format: None,
28692 default: None,
28693 inferred_type: None,
28694 })))
28695 }
28696 _ => Ok(Expression::Function(f)),
28697 }
28698 } else {
28699 Ok(Expression::Function(f))
28700 }
28701 } else {
28702 Ok(e)
28703 }
28704 }
28705
28706 Action::JsonObjectAggConvert => {
28707 // JSON_OBJECT_AGG/JSONB_OBJECT_AGG -> JSON_GROUP_OBJECT for DuckDB
28708 match e {
28709 Expression::Function(f) => Ok(Expression::Function(Box::new(
28710 Function::new("JSON_GROUP_OBJECT".to_string(), f.args),
28711 ))),
28712 Expression::AggregateFunction(af) => {
28713 // AggregateFunction stores all args in the `args` vec
28714 Ok(Expression::Function(Box::new(Function::new(
28715 "JSON_GROUP_OBJECT".to_string(),
28716 af.args,
28717 ))))
28718 }
28719 other => Ok(other),
28720 }
28721 }
28722
28723 Action::JsonbExistsConvert => {
28724 // JSONB_EXISTS('json', 'key') -> JSON_EXISTS('json', '$.key') for DuckDB
28725 if let Expression::Function(f) = e {
28726 if f.args.len() == 2 {
28727 let json_expr = f.args[0].clone();
28728 let key = match &f.args[1] {
28729 Expression::Literal(lit)
28730 if matches!(
28731 lit.as_ref(),
28732 crate::expressions::Literal::String(_)
28733 ) =>
28734 {
28735 let crate::expressions::Literal::String(s) = lit.as_ref()
28736 else {
28737 unreachable!()
28738 };
28739 format!("$.{}", s)
28740 }
28741 _ => return Ok(Expression::Function(f)),
28742 };
28743 Ok(Expression::Function(Box::new(Function::new(
28744 "JSON_EXISTS".to_string(),
28745 vec![json_expr, Expression::string(&key)],
28746 ))))
28747 } else {
28748 Ok(Expression::Function(f))
28749 }
28750 } else {
28751 Ok(e)
28752 }
28753 }
28754
28755 Action::DateBinConvert => {
28756 // DATE_BIN('interval', ts, origin) -> TIME_BUCKET('interval', ts, origin) for DuckDB
28757 if let Expression::Function(f) = e {
28758 Ok(Expression::Function(Box::new(Function::new(
28759 "TIME_BUCKET".to_string(),
28760 f.args,
28761 ))))
28762 } else {
28763 Ok(e)
28764 }
28765 }
28766
28767 Action::MysqlCastCharToText => {
28768 // MySQL CAST(x AS CHAR) was originally TEXT -> convert to target text type
28769 if let Expression::Cast(mut c) = e {
28770 c.to = DataType::Text;
28771 Ok(Expression::Cast(c))
28772 } else {
28773 Ok(e)
28774 }
28775 }
28776
28777 Action::SparkCastVarcharToString => {
28778 // Spark parses VARCHAR(n)/CHAR(n) as TEXT -> normalize to STRING
28779 match e {
28780 Expression::Cast(mut c) => {
28781 c.to = Self::normalize_varchar_to_string(c.to);
28782 Ok(Expression::Cast(c))
28783 }
28784 Expression::TryCast(mut c) => {
28785 c.to = Self::normalize_varchar_to_string(c.to);
28786 Ok(Expression::TryCast(c))
28787 }
28788 _ => Ok(e),
28789 }
28790 }
28791
28792 Action::MinMaxToLeastGreatest => {
28793 // Multi-arg MIN(a,b,c) -> LEAST(a,b,c), MAX(a,b,c) -> GREATEST(a,b,c)
28794 if let Expression::Function(f) = e {
28795 let new_name = if f.name.eq_ignore_ascii_case("MIN") {
28796 "LEAST"
28797 } else if f.name.eq_ignore_ascii_case("MAX") {
28798 "GREATEST"
28799 } else {
28800 return Ok(Expression::Function(f));
28801 };
28802 Ok(Expression::Function(Box::new(Function::new(
28803 new_name.to_string(),
28804 f.args,
28805 ))))
28806 } else {
28807 Ok(e)
28808 }
28809 }
28810
28811 Action::ClickHouseUniqToApproxCountDistinct => {
28812 // ClickHouse uniq(x) -> APPROX_COUNT_DISTINCT(x) for non-ClickHouse targets
28813 if let Expression::Function(f) = e {
28814 Ok(Expression::Function(Box::new(Function::new(
28815 "APPROX_COUNT_DISTINCT".to_string(),
28816 f.args,
28817 ))))
28818 } else {
28819 Ok(e)
28820 }
28821 }
28822
28823 Action::ClickHouseAnyToAnyValue => {
28824 // ClickHouse any(x) -> ANY_VALUE(x) for non-ClickHouse targets
28825 if let Expression::Function(f) = e {
28826 Ok(Expression::Function(Box::new(Function::new(
28827 "ANY_VALUE".to_string(),
28828 f.args,
28829 ))))
28830 } else {
28831 Ok(e)
28832 }
28833 }
28834
28835 Action::OracleVarchar2ToVarchar => {
28836 // Oracle VARCHAR2(N CHAR/BYTE) / NVARCHAR2(N) -> VarChar(N) for non-Oracle targets
28837 if let Expression::DataType(DataType::Custom { ref name }) = e {
28838 // Extract length from VARCHAR2(N ...) or NVARCHAR2(N ...)
28839 let starts_varchar2 =
28840 name.len() >= 9 && name[..9].eq_ignore_ascii_case("VARCHAR2(");
28841 let starts_nvarchar2 =
28842 name.len() >= 10 && name[..10].eq_ignore_ascii_case("NVARCHAR2(");
28843 let inner = if starts_varchar2 || starts_nvarchar2 {
28844 let start = if starts_nvarchar2 { 10 } else { 9 }; // skip "NVARCHAR2(" or "VARCHAR2("
28845 let end = name.len() - 1; // skip trailing ")"
28846 Some(&name[start..end])
28847 } else {
28848 Option::None
28849 };
28850 if let Some(inner_str) = inner {
28851 // Parse the number part, ignoring BYTE/CHAR qualifier
28852 let num_str = inner_str.split_whitespace().next().unwrap_or("");
28853 if let Ok(n) = num_str.parse::<u32>() {
28854 Ok(Expression::DataType(DataType::VarChar {
28855 length: Some(n),
28856 parenthesized_length: false,
28857 }))
28858 } else {
28859 Ok(e)
28860 }
28861 } else {
28862 // Plain VARCHAR2 / NVARCHAR2 without parens
28863 Ok(Expression::DataType(DataType::VarChar {
28864 length: Option::None,
28865 parenthesized_length: false,
28866 }))
28867 }
28868 } else {
28869 Ok(e)
28870 }
28871 }
28872
28873 Action::Nvl2Expand => {
28874 // NVL2(a, b[, c]) -> CASE WHEN NOT a IS NULL THEN b [ELSE c] END
28875 // But keep as NVL2 for dialects that support it natively
28876 let nvl2_native = matches!(
28877 target,
28878 DialectType::Oracle
28879 | DialectType::Snowflake
28880 | DialectType::Redshift
28881 | DialectType::Teradata
28882 | DialectType::Spark
28883 | DialectType::Databricks
28884 );
28885 let (a, b, c) = if let Expression::Nvl2(nvl2) = e {
28886 if nvl2_native {
28887 return Ok(Expression::Nvl2(nvl2));
28888 }
28889 (nvl2.this, nvl2.true_value, Some(nvl2.false_value))
28890 } else if let Expression::Function(f) = e {
28891 if nvl2_native {
28892 return Ok(Expression::Function(Box::new(Function::new(
28893 "NVL2".to_string(),
28894 f.args,
28895 ))));
28896 }
28897 if f.args.len() < 2 {
28898 return Ok(Expression::Function(f));
28899 }
28900 let mut args = f.args;
28901 let a = args.remove(0);
28902 let b = args.remove(0);
28903 let c = if !args.is_empty() {
28904 Some(args.remove(0))
28905 } else {
28906 Option::None
28907 };
28908 (a, b, c)
28909 } else {
28910 return Ok(e);
28911 };
28912 // Build: NOT (a IS NULL)
28913 let is_null = Expression::IsNull(Box::new(IsNull {
28914 this: a,
28915 not: false,
28916 postfix_form: false,
28917 }));
28918 let not_null = Expression::Not(Box::new(crate::expressions::UnaryOp {
28919 this: is_null,
28920 inferred_type: None,
28921 }));
28922 Ok(Expression::Case(Box::new(Case {
28923 operand: Option::None,
28924 whens: vec![(not_null, b)],
28925 else_: c,
28926 comments: Vec::new(),
28927 inferred_type: None,
28928 })))
28929 }
28930
28931 Action::IfnullToCoalesce => {
28932 // IFNULL(a, b) -> COALESCE(a, b): clear original_name to output COALESCE
28933 if let Expression::Coalesce(mut cf) = e {
28934 cf.original_name = Option::None;
28935 Ok(Expression::Coalesce(cf))
28936 } else if let Expression::Function(f) = e {
28937 Ok(Expression::Function(Box::new(Function::new(
28938 "COALESCE".to_string(),
28939 f.args,
28940 ))))
28941 } else {
28942 Ok(e)
28943 }
28944 }
28945
28946 Action::IsAsciiConvert => {
28947 // IS_ASCII(x) -> dialect-specific ASCII check
28948 if let Expression::Function(f) = e {
28949 let arg = f.args.into_iter().next().unwrap();
28950 match target {
28951 DialectType::MySQL | DialectType::SingleStore | DialectType::TiDB => {
28952 // REGEXP_LIKE(x, '^[[:ascii:]]*$')
28953 Ok(Expression::Function(Box::new(Function::new(
28954 "REGEXP_LIKE".to_string(),
28955 vec![
28956 arg,
28957 Expression::Literal(Box::new(Literal::String(
28958 "^[[:ascii:]]*$".to_string(),
28959 ))),
28960 ],
28961 ))))
28962 }
28963 DialectType::PostgreSQL
28964 | DialectType::Redshift
28965 | DialectType::Materialize
28966 | DialectType::RisingWave => {
28967 // (x ~ '^[[:ascii:]]*$')
28968 Ok(Expression::Paren(Box::new(Paren {
28969 this: Expression::RegexpLike(Box::new(
28970 crate::expressions::RegexpFunc {
28971 this: arg,
28972 pattern: Expression::Literal(Box::new(
28973 Literal::String("^[[:ascii:]]*$".to_string()),
28974 )),
28975 flags: Option::None,
28976 },
28977 )),
28978 trailing_comments: Vec::new(),
28979 })))
28980 }
28981 DialectType::SQLite => {
28982 // (NOT x GLOB CAST(x'2a5b5e012d7f5d2a' AS TEXT))
28983 let hex_lit = Expression::Literal(Box::new(Literal::HexString(
28984 "2a5b5e012d7f5d2a".to_string(),
28985 )));
28986 let cast_expr = Expression::Cast(Box::new(Cast {
28987 this: hex_lit,
28988 to: DataType::Text,
28989 trailing_comments: Vec::new(),
28990 double_colon_syntax: false,
28991 format: Option::None,
28992 default: Option::None,
28993 inferred_type: None,
28994 }));
28995 let glob = Expression::Glob(Box::new(BinaryOp {
28996 left: arg,
28997 right: cast_expr,
28998 left_comments: Vec::new(),
28999 operator_comments: Vec::new(),
29000 trailing_comments: Vec::new(),
29001 inferred_type: None,
29002 }));
29003 Ok(Expression::Paren(Box::new(Paren {
29004 this: Expression::Not(Box::new(crate::expressions::UnaryOp {
29005 this: glob,
29006 inferred_type: None,
29007 })),
29008 trailing_comments: Vec::new(),
29009 })))
29010 }
29011 DialectType::TSQL | DialectType::Fabric => {
29012 // (PATINDEX(CONVERT(VARCHAR(MAX), 0x255b5e002d7f5d25) COLLATE Latin1_General_BIN, x) = 0)
29013 let hex_lit = Expression::Literal(Box::new(Literal::HexNumber(
29014 "255b5e002d7f5d25".to_string(),
29015 )));
29016 let convert_expr = Expression::Convert(Box::new(
29017 crate::expressions::ConvertFunc {
29018 this: hex_lit,
29019 to: DataType::Text, // Text generates as VARCHAR(MAX) for TSQL
29020 style: None,
29021 },
29022 ));
29023 let collated = Expression::Collation(Box::new(
29024 crate::expressions::CollationExpr {
29025 this: convert_expr,
29026 collation: "Latin1_General_BIN".to_string(),
29027 quoted: false,
29028 double_quoted: false,
29029 },
29030 ));
29031 let patindex = Expression::Function(Box::new(Function::new(
29032 "PATINDEX".to_string(),
29033 vec![collated, arg],
29034 )));
29035 let zero =
29036 Expression::Literal(Box::new(Literal::Number("0".to_string())));
29037 let eq_zero = Expression::Eq(Box::new(BinaryOp {
29038 left: patindex,
29039 right: zero,
29040 left_comments: Vec::new(),
29041 operator_comments: Vec::new(),
29042 trailing_comments: Vec::new(),
29043 inferred_type: None,
29044 }));
29045 Ok(Expression::Paren(Box::new(Paren {
29046 this: eq_zero,
29047 trailing_comments: Vec::new(),
29048 })))
29049 }
29050 DialectType::Oracle => {
29051 // NVL(REGEXP_LIKE(x, '^[' || CHR(1) || '-' || CHR(127) || ']*$'), TRUE)
29052 // Build the pattern: '^[' || CHR(1) || '-' || CHR(127) || ']*$'
29053 let s1 = Expression::Literal(Box::new(Literal::String(
29054 "^[".to_string(),
29055 )));
29056 let chr1 = Expression::Function(Box::new(Function::new(
29057 "CHR".to_string(),
29058 vec![Expression::Literal(Box::new(Literal::Number(
29059 "1".to_string(),
29060 )))],
29061 )));
29062 let dash =
29063 Expression::Literal(Box::new(Literal::String("-".to_string())));
29064 let chr127 = Expression::Function(Box::new(Function::new(
29065 "CHR".to_string(),
29066 vec![Expression::Literal(Box::new(Literal::Number(
29067 "127".to_string(),
29068 )))],
29069 )));
29070 let s2 = Expression::Literal(Box::new(Literal::String(
29071 "]*$".to_string(),
29072 )));
29073 // Build: '^[' || CHR(1) || '-' || CHR(127) || ']*$'
29074 let concat1 =
29075 Expression::DPipe(Box::new(crate::expressions::DPipe {
29076 this: Box::new(s1),
29077 expression: Box::new(chr1),
29078 safe: None,
29079 }));
29080 let concat2 =
29081 Expression::DPipe(Box::new(crate::expressions::DPipe {
29082 this: Box::new(concat1),
29083 expression: Box::new(dash),
29084 safe: None,
29085 }));
29086 let concat3 =
29087 Expression::DPipe(Box::new(crate::expressions::DPipe {
29088 this: Box::new(concat2),
29089 expression: Box::new(chr127),
29090 safe: None,
29091 }));
29092 let concat4 =
29093 Expression::DPipe(Box::new(crate::expressions::DPipe {
29094 this: Box::new(concat3),
29095 expression: Box::new(s2),
29096 safe: None,
29097 }));
29098 let regexp_like = Expression::Function(Box::new(Function::new(
29099 "REGEXP_LIKE".to_string(),
29100 vec![arg, concat4],
29101 )));
29102 // Use Column("TRUE") to output literal TRUE keyword (not boolean 1/0)
29103 let true_expr =
29104 Expression::Column(Box::new(crate::expressions::Column {
29105 name: Identifier {
29106 name: "TRUE".to_string(),
29107 quoted: false,
29108 trailing_comments: Vec::new(),
29109 span: None,
29110 },
29111 table: None,
29112 join_mark: false,
29113 trailing_comments: Vec::new(),
29114 span: None,
29115 inferred_type: None,
29116 }));
29117 let nvl = Expression::Function(Box::new(Function::new(
29118 "NVL".to_string(),
29119 vec![regexp_like, true_expr],
29120 )));
29121 Ok(nvl)
29122 }
29123 _ => Ok(Expression::Function(Box::new(Function::new(
29124 "IS_ASCII".to_string(),
29125 vec![arg],
29126 )))),
29127 }
29128 } else {
29129 Ok(e)
29130 }
29131 }
29132
29133 Action::StrPositionConvert => {
29134 // STR_POSITION(haystack, needle[, position[, occurrence]]) -> dialect-specific
29135 if let Expression::Function(f) = e {
29136 if f.args.len() < 2 {
29137 return Ok(Expression::Function(f));
29138 }
29139 let mut args = f.args;
29140
29141 let haystack = args.remove(0);
29142 let needle = args.remove(0);
29143 let position = if !args.is_empty() {
29144 Some(args.remove(0))
29145 } else {
29146 Option::None
29147 };
29148 let occurrence = if !args.is_empty() {
29149 Some(args.remove(0))
29150 } else {
29151 Option::None
29152 };
29153
29154 // Helper to build: STRPOS/INSTR(SUBSTRING(haystack, pos), needle) expansion
29155 // Returns: CASE/IF WHEN func(SUBSTRING(haystack, pos), needle[, occ]) = 0 THEN 0 ELSE ... + pos - 1 END
29156 fn build_position_expansion(
29157 haystack: Expression,
29158 needle: Expression,
29159 pos: Expression,
29160 occurrence: Option<Expression>,
29161 inner_func: &str,
29162 wrapper: &str, // "CASE", "IF", "IIF"
29163 ) -> Expression {
29164 let substr = Expression::Function(Box::new(Function::new(
29165 "SUBSTRING".to_string(),
29166 vec![haystack, pos.clone()],
29167 )));
29168 let mut inner_args = vec![substr, needle];
29169 if let Some(occ) = occurrence {
29170 inner_args.push(occ);
29171 }
29172 let inner_call = Expression::Function(Box::new(Function::new(
29173 inner_func.to_string(),
29174 inner_args,
29175 )));
29176 let zero =
29177 Expression::Literal(Box::new(Literal::Number("0".to_string())));
29178 let one =
29179 Expression::Literal(Box::new(Literal::Number("1".to_string())));
29180 let eq_zero = Expression::Eq(Box::new(BinaryOp {
29181 left: inner_call.clone(),
29182 right: zero.clone(),
29183 left_comments: Vec::new(),
29184 operator_comments: Vec::new(),
29185 trailing_comments: Vec::new(),
29186 inferred_type: None,
29187 }));
29188 let add_pos = Expression::Add(Box::new(BinaryOp {
29189 left: inner_call,
29190 right: pos,
29191 left_comments: Vec::new(),
29192 operator_comments: Vec::new(),
29193 trailing_comments: Vec::new(),
29194 inferred_type: None,
29195 }));
29196 let sub_one = Expression::Sub(Box::new(BinaryOp {
29197 left: add_pos,
29198 right: one,
29199 left_comments: Vec::new(),
29200 operator_comments: Vec::new(),
29201 trailing_comments: Vec::new(),
29202 inferred_type: None,
29203 }));
29204
29205 match wrapper {
29206 "CASE" => Expression::Case(Box::new(Case {
29207 operand: Option::None,
29208 whens: vec![(eq_zero, zero)],
29209 else_: Some(sub_one),
29210 comments: Vec::new(),
29211 inferred_type: None,
29212 })),
29213 "IIF" => Expression::Function(Box::new(Function::new(
29214 "IIF".to_string(),
29215 vec![eq_zero, zero, sub_one],
29216 ))),
29217 _ => Expression::Function(Box::new(Function::new(
29218 "IF".to_string(),
29219 vec![eq_zero, zero, sub_one],
29220 ))),
29221 }
29222 }
29223
29224 match target {
29225 // STRPOS group: Athena, DuckDB, Presto, Trino, Drill
29226 DialectType::Athena
29227 | DialectType::DuckDB
29228 | DialectType::Presto
29229 | DialectType::Trino
29230 | DialectType::Drill => {
29231 if let Some(pos) = position {
29232 let wrapper = if matches!(target, DialectType::DuckDB) {
29233 "CASE"
29234 } else {
29235 "IF"
29236 };
29237 let result = build_position_expansion(
29238 haystack, needle, pos, occurrence, "STRPOS", wrapper,
29239 );
29240 if matches!(target, DialectType::Drill) {
29241 // Drill uses backtick-quoted `IF`
29242 if let Expression::Function(mut f) = result {
29243 f.name = "`IF`".to_string();
29244 Ok(Expression::Function(f))
29245 } else {
29246 Ok(result)
29247 }
29248 } else {
29249 Ok(result)
29250 }
29251 } else {
29252 Ok(Expression::Function(Box::new(Function::new(
29253 "STRPOS".to_string(),
29254 vec![haystack, needle],
29255 ))))
29256 }
29257 }
29258 // SQLite: IIF wrapper
29259 DialectType::SQLite => {
29260 if let Some(pos) = position {
29261 Ok(build_position_expansion(
29262 haystack, needle, pos, occurrence, "INSTR", "IIF",
29263 ))
29264 } else {
29265 Ok(Expression::Function(Box::new(Function::new(
29266 "INSTR".to_string(),
29267 vec![haystack, needle],
29268 ))))
29269 }
29270 }
29271 // INSTR group: Teradata, BigQuery, Oracle
29272 DialectType::Teradata | DialectType::BigQuery | DialectType::Oracle => {
29273 let mut a = vec![haystack, needle];
29274 if let Some(pos) = position {
29275 a.push(pos);
29276 }
29277 if let Some(occ) = occurrence {
29278 a.push(occ);
29279 }
29280 Ok(Expression::Function(Box::new(Function::new(
29281 "INSTR".to_string(),
29282 a,
29283 ))))
29284 }
29285 // CHARINDEX group: Snowflake, TSQL
29286 DialectType::Snowflake | DialectType::TSQL | DialectType::Fabric => {
29287 let mut a = vec![needle, haystack];
29288 if let Some(pos) = position {
29289 a.push(pos);
29290 }
29291 Ok(Expression::Function(Box::new(Function::new(
29292 "CHARINDEX".to_string(),
29293 a,
29294 ))))
29295 }
29296 // POSITION(needle IN haystack): PostgreSQL, Materialize, RisingWave, Redshift
29297 DialectType::PostgreSQL
29298 | DialectType::Materialize
29299 | DialectType::RisingWave
29300 | DialectType::Redshift => {
29301 if let Some(pos) = position {
29302 // Build: CASE WHEN POSITION(needle IN SUBSTRING(haystack FROM pos)) = 0 THEN 0
29303 // ELSE POSITION(...) + pos - 1 END
29304 let substr = Expression::Substring(Box::new(
29305 crate::expressions::SubstringFunc {
29306 this: haystack,
29307 start: pos.clone(),
29308 length: Option::None,
29309 from_for_syntax: true,
29310 },
29311 ));
29312 let pos_in = Expression::StrPosition(Box::new(
29313 crate::expressions::StrPosition {
29314 this: Box::new(substr),
29315 substr: Some(Box::new(needle)),
29316 position: Option::None,
29317 occurrence: Option::None,
29318 },
29319 ));
29320 let zero = Expression::Literal(Box::new(Literal::Number(
29321 "0".to_string(),
29322 )));
29323 let one = Expression::Literal(Box::new(Literal::Number(
29324 "1".to_string(),
29325 )));
29326 let eq_zero = Expression::Eq(Box::new(BinaryOp {
29327 left: pos_in.clone(),
29328 right: zero.clone(),
29329 left_comments: Vec::new(),
29330 operator_comments: Vec::new(),
29331 trailing_comments: Vec::new(),
29332 inferred_type: None,
29333 }));
29334 let add_pos = Expression::Add(Box::new(BinaryOp {
29335 left: pos_in,
29336 right: pos,
29337 left_comments: Vec::new(),
29338 operator_comments: Vec::new(),
29339 trailing_comments: Vec::new(),
29340 inferred_type: None,
29341 }));
29342 let sub_one = Expression::Sub(Box::new(BinaryOp {
29343 left: add_pos,
29344 right: one,
29345 left_comments: Vec::new(),
29346 operator_comments: Vec::new(),
29347 trailing_comments: Vec::new(),
29348 inferred_type: None,
29349 }));
29350 Ok(Expression::Case(Box::new(Case {
29351 operand: Option::None,
29352 whens: vec![(eq_zero, zero)],
29353 else_: Some(sub_one),
29354 comments: Vec::new(),
29355 inferred_type: None,
29356 })))
29357 } else {
29358 Ok(Expression::StrPosition(Box::new(
29359 crate::expressions::StrPosition {
29360 this: Box::new(haystack),
29361 substr: Some(Box::new(needle)),
29362 position: Option::None,
29363 occurrence: Option::None,
29364 },
29365 )))
29366 }
29367 }
29368 // LOCATE group: MySQL, Hive, Spark, Databricks, Doris
29369 DialectType::MySQL
29370 | DialectType::SingleStore
29371 | DialectType::TiDB
29372 | DialectType::Hive
29373 | DialectType::Spark
29374 | DialectType::Databricks
29375 | DialectType::Doris
29376 | DialectType::StarRocks => {
29377 let mut a = vec![needle, haystack];
29378 if let Some(pos) = position {
29379 a.push(pos);
29380 }
29381 Ok(Expression::Function(Box::new(Function::new(
29382 "LOCATE".to_string(),
29383 a,
29384 ))))
29385 }
29386 // ClickHouse: POSITION(haystack, needle[, position])
29387 DialectType::ClickHouse => {
29388 let mut a = vec![haystack, needle];
29389 if let Some(pos) = position {
29390 a.push(pos);
29391 }
29392 Ok(Expression::Function(Box::new(Function::new(
29393 "POSITION".to_string(),
29394 a,
29395 ))))
29396 }
29397 _ => {
29398 let mut a = vec![haystack, needle];
29399 if let Some(pos) = position {
29400 a.push(pos);
29401 }
29402 if let Some(occ) = occurrence {
29403 a.push(occ);
29404 }
29405 Ok(Expression::Function(Box::new(Function::new(
29406 "STR_POSITION".to_string(),
29407 a,
29408 ))))
29409 }
29410 }
29411 } else {
29412 Ok(e)
29413 }
29414 }
29415
29416 Action::ArraySumConvert => {
29417 // ARRAY_SUM(arr) -> dialect-specific
29418 if let Expression::Function(f) = e {
29419 let args = f.args;
29420 match target {
29421 DialectType::DuckDB => Ok(Expression::Function(Box::new(
29422 Function::new("LIST_SUM".to_string(), args),
29423 ))),
29424 DialectType::Spark | DialectType::Databricks => {
29425 // AGGREGATE(arr, 0, (acc, x) -> acc + x, acc -> acc)
29426 let arr = args.into_iter().next().unwrap();
29427 let zero =
29428 Expression::Literal(Box::new(Literal::Number("0".to_string())));
29429 let acc_id = Identifier::new("acc");
29430 let x_id = Identifier::new("x");
29431 let acc = Expression::Identifier(acc_id.clone());
29432 let x = Expression::Identifier(x_id.clone());
29433 let add = Expression::Add(Box::new(BinaryOp {
29434 left: acc.clone(),
29435 right: x,
29436 left_comments: Vec::new(),
29437 operator_comments: Vec::new(),
29438 trailing_comments: Vec::new(),
29439 inferred_type: None,
29440 }));
29441 let lambda1 =
29442 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
29443 parameters: vec![acc_id.clone(), x_id],
29444 body: add,
29445 colon: false,
29446 parameter_types: Vec::new(),
29447 }));
29448 let lambda2 =
29449 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
29450 parameters: vec![acc_id],
29451 body: acc,
29452 colon: false,
29453 parameter_types: Vec::new(),
29454 }));
29455 Ok(Expression::Function(Box::new(Function::new(
29456 "AGGREGATE".to_string(),
29457 vec![arr, zero, lambda1, lambda2],
29458 ))))
29459 }
29460 DialectType::Presto | DialectType::Athena => {
29461 // Presto/Athena keep ARRAY_SUM natively
29462 Ok(Expression::Function(Box::new(Function::new(
29463 "ARRAY_SUM".to_string(),
29464 args,
29465 ))))
29466 }
29467 DialectType::Trino => {
29468 // REDUCE(arr, 0, (acc, x) -> acc + x, acc -> acc)
29469 if args.len() == 1 {
29470 let arr = args.into_iter().next().unwrap();
29471 let zero = Expression::Literal(Box::new(Literal::Number(
29472 "0".to_string(),
29473 )));
29474 let acc_id = Identifier::new("acc");
29475 let x_id = Identifier::new("x");
29476 let acc = Expression::Identifier(acc_id.clone());
29477 let x = Expression::Identifier(x_id.clone());
29478 let add = Expression::Add(Box::new(BinaryOp {
29479 left: acc.clone(),
29480 right: x,
29481 left_comments: Vec::new(),
29482 operator_comments: Vec::new(),
29483 trailing_comments: Vec::new(),
29484 inferred_type: None,
29485 }));
29486 let lambda1 = Expression::Lambda(Box::new(
29487 crate::expressions::LambdaExpr {
29488 parameters: vec![acc_id.clone(), x_id],
29489 body: add,
29490 colon: false,
29491 parameter_types: Vec::new(),
29492 },
29493 ));
29494 let lambda2 = Expression::Lambda(Box::new(
29495 crate::expressions::LambdaExpr {
29496 parameters: vec![acc_id],
29497 body: acc,
29498 colon: false,
29499 parameter_types: Vec::new(),
29500 },
29501 ));
29502 Ok(Expression::Function(Box::new(Function::new(
29503 "REDUCE".to_string(),
29504 vec![arr, zero, lambda1, lambda2],
29505 ))))
29506 } else {
29507 Ok(Expression::Function(Box::new(Function::new(
29508 "ARRAY_SUM".to_string(),
29509 args,
29510 ))))
29511 }
29512 }
29513 DialectType::ClickHouse => {
29514 // arraySum(lambda, arr) or arraySum(arr)
29515 Ok(Expression::Function(Box::new(Function::new(
29516 "arraySum".to_string(),
29517 args,
29518 ))))
29519 }
29520 _ => Ok(Expression::Function(Box::new(Function::new(
29521 "ARRAY_SUM".to_string(),
29522 args,
29523 )))),
29524 }
29525 } else {
29526 Ok(e)
29527 }
29528 }
29529
29530 Action::ArraySizeConvert => {
29531 if let Expression::Function(f) = e {
29532 Ok(Expression::Function(Box::new(Function::new(
29533 "REPEATED_COUNT".to_string(),
29534 f.args,
29535 ))))
29536 } else {
29537 Ok(e)
29538 }
29539 }
29540
29541 Action::ArrayAnyConvert => {
29542 if let Expression::Function(f) = e {
29543 let mut args = f.args;
29544 if args.len() == 2 {
29545 let arr = args.remove(0);
29546 let lambda = args.remove(0);
29547
29548 // Extract lambda parameter name and body
29549 let (param_name, pred_body) =
29550 if let Expression::Lambda(ref lam) = lambda {
29551 let name = if let Some(p) = lam.parameters.first() {
29552 p.name.clone()
29553 } else {
29554 "x".to_string()
29555 };
29556 (name, lam.body.clone())
29557 } else {
29558 ("x".to_string(), lambda.clone())
29559 };
29560
29561 // Helper: build a function call Expression
29562 let make_func = |name: &str, args: Vec<Expression>| -> Expression {
29563 Expression::Function(Box::new(Function::new(
29564 name.to_string(),
29565 args,
29566 )))
29567 };
29568
29569 // Helper: build (len_func(arr) = 0 OR len_func(filter_expr) <> 0) wrapped in Paren
29570 let build_filter_pattern = |len_func: &str,
29571 len_args_extra: Vec<Expression>,
29572 filter_expr: Expression|
29573 -> Expression {
29574 // len_func(arr, ...extra) = 0
29575 let mut len_arr_args = vec![arr.clone()];
29576 len_arr_args.extend(len_args_extra.clone());
29577 let len_arr = make_func(len_func, len_arr_args);
29578 let eq_zero = Expression::Eq(Box::new(BinaryOp::new(
29579 len_arr,
29580 Expression::number(0),
29581 )));
29582
29583 // len_func(filter_expr, ...extra) <> 0
29584 let mut len_filter_args = vec![filter_expr];
29585 len_filter_args.extend(len_args_extra);
29586 let len_filter = make_func(len_func, len_filter_args);
29587 let neq_zero = Expression::Neq(Box::new(BinaryOp::new(
29588 len_filter,
29589 Expression::number(0),
29590 )));
29591
29592 // (eq_zero OR neq_zero)
29593 let or_expr =
29594 Expression::Or(Box::new(BinaryOp::new(eq_zero, neq_zero)));
29595 Expression::Paren(Box::new(Paren {
29596 this: or_expr,
29597 trailing_comments: Vec::new(),
29598 }))
29599 };
29600
29601 match target {
29602 DialectType::Trino | DialectType::Presto | DialectType::Athena => {
29603 Ok(make_func("ANY_MATCH", vec![arr, lambda]))
29604 }
29605 DialectType::ClickHouse => {
29606 // (LENGTH(arr) = 0 OR LENGTH(arrayFilter(x -> pred, arr)) <> 0)
29607 // ClickHouse arrayFilter takes lambda first, then array
29608 let filter_expr =
29609 make_func("arrayFilter", vec![lambda, arr.clone()]);
29610 Ok(build_filter_pattern("LENGTH", vec![], filter_expr))
29611 }
29612 DialectType::Databricks | DialectType::Spark => {
29613 // (SIZE(arr) = 0 OR SIZE(FILTER(arr, x -> pred)) <> 0)
29614 let filter_expr =
29615 make_func("FILTER", vec![arr.clone(), lambda]);
29616 Ok(build_filter_pattern("SIZE", vec![], filter_expr))
29617 }
29618 DialectType::DuckDB => {
29619 // (ARRAY_LENGTH(arr) = 0 OR ARRAY_LENGTH(LIST_FILTER(arr, x -> pred)) <> 0)
29620 let filter_expr =
29621 make_func("LIST_FILTER", vec![arr.clone(), lambda]);
29622 Ok(build_filter_pattern("ARRAY_LENGTH", vec![], filter_expr))
29623 }
29624 DialectType::Teradata => {
29625 // (CARDINALITY(arr) = 0 OR CARDINALITY(FILTER(arr, x -> pred)) <> 0)
29626 let filter_expr =
29627 make_func("FILTER", vec![arr.clone(), lambda]);
29628 Ok(build_filter_pattern("CARDINALITY", vec![], filter_expr))
29629 }
29630 DialectType::BigQuery => {
29631 // (ARRAY_LENGTH(arr) = 0 OR ARRAY_LENGTH(ARRAY(SELECT x FROM UNNEST(arr) AS x WHERE pred)) <> 0)
29632 // Build: SELECT x FROM UNNEST(arr) AS x WHERE pred
29633 let param_col = Expression::column(¶m_name);
29634 let unnest_expr = Expression::Unnest(Box::new(
29635 crate::expressions::UnnestFunc {
29636 this: arr.clone(),
29637 expressions: vec![],
29638 with_ordinality: false,
29639 alias: Some(Identifier::new(¶m_name)),
29640 offset_alias: None,
29641 },
29642 ));
29643 let mut sel = crate::expressions::Select::default();
29644 sel.expressions = vec![param_col];
29645 sel.from = Some(crate::expressions::From {
29646 expressions: vec![unnest_expr],
29647 });
29648 sel.where_clause =
29649 Some(crate::expressions::Where { this: pred_body });
29650 let array_subquery =
29651 make_func("ARRAY", vec![Expression::Select(Box::new(sel))]);
29652 Ok(build_filter_pattern("ARRAY_LENGTH", vec![], array_subquery))
29653 }
29654 DialectType::PostgreSQL => {
29655 // (ARRAY_LENGTH(arr, 1) = 0 OR ARRAY_LENGTH(ARRAY(SELECT x FROM UNNEST(arr) AS _t0(x) WHERE pred), 1) <> 0)
29656 // Build: SELECT x FROM UNNEST(arr) AS _t0(x) WHERE pred
29657 let param_col = Expression::column(¶m_name);
29658 // For PostgreSQL, UNNEST uses AS _t0(x) syntax - use TableAlias
29659 let unnest_with_alias =
29660 Expression::Alias(Box::new(crate::expressions::Alias {
29661 this: Expression::Unnest(Box::new(
29662 crate::expressions::UnnestFunc {
29663 this: arr.clone(),
29664 expressions: vec![],
29665 with_ordinality: false,
29666 alias: None,
29667 offset_alias: None,
29668 },
29669 )),
29670 alias: Identifier::new("_t0"),
29671 column_aliases: vec![Identifier::new(¶m_name)],
29672 alias_explicit_as: false,
29673 alias_keyword: None,
29674 pre_alias_comments: Vec::new(),
29675 trailing_comments: Vec::new(),
29676 inferred_type: None,
29677 }));
29678 let mut sel = crate::expressions::Select::default();
29679 sel.expressions = vec![param_col];
29680 sel.from = Some(crate::expressions::From {
29681 expressions: vec![unnest_with_alias],
29682 });
29683 sel.where_clause =
29684 Some(crate::expressions::Where { this: pred_body });
29685 let array_subquery =
29686 make_func("ARRAY", vec![Expression::Select(Box::new(sel))]);
29687 Ok(build_filter_pattern(
29688 "ARRAY_LENGTH",
29689 vec![Expression::number(1)],
29690 array_subquery,
29691 ))
29692 }
29693 _ => Ok(Expression::Function(Box::new(Function::new(
29694 "ARRAY_ANY".to_string(),
29695 vec![arr, lambda],
29696 )))),
29697 }
29698 } else {
29699 Ok(Expression::Function(Box::new(Function::new(
29700 "ARRAY_ANY".to_string(),
29701 args,
29702 ))))
29703 }
29704 } else {
29705 Ok(e)
29706 }
29707 }
29708
29709 Action::DecodeSimplify => {
29710 // DECODE(x, search1, result1, ..., default) -> CASE WHEN ... THEN result1 ... [ELSE default] END
29711 // For literal search values: CASE WHEN x = search THEN result
29712 // For NULL search: CASE WHEN x IS NULL THEN result
29713 // For non-literal (column, expr): CASE WHEN x = search OR (x IS NULL AND search IS NULL) THEN result
29714 fn is_decode_literal(e: &Expression) -> bool {
29715 matches!(
29716 e,
29717 Expression::Literal(_) | Expression::Boolean(_) | Expression::Neg(_)
29718 )
29719 }
29720
29721 let build_decode_case =
29722 |this_expr: Expression,
29723 pairs: Vec<(Expression, Expression)>,
29724 default: Option<Expression>| {
29725 let whens: Vec<(Expression, Expression)> = pairs
29726 .into_iter()
29727 .map(|(search, result)| {
29728 if matches!(&search, Expression::Null(_)) {
29729 // NULL search -> IS NULL
29730 let condition = Expression::Is(Box::new(BinaryOp {
29731 left: this_expr.clone(),
29732 right: Expression::Null(crate::expressions::Null),
29733 left_comments: Vec::new(),
29734 operator_comments: Vec::new(),
29735 trailing_comments: Vec::new(),
29736 inferred_type: None,
29737 }));
29738 (condition, result)
29739 } else if is_decode_literal(&search)
29740 || is_decode_literal(&this_expr)
29741 {
29742 // At least one side is a literal -> simple equality (no NULL check needed)
29743 let eq = Expression::Eq(Box::new(BinaryOp {
29744 left: this_expr.clone(),
29745 right: search,
29746 left_comments: Vec::new(),
29747 operator_comments: Vec::new(),
29748 trailing_comments: Vec::new(),
29749 inferred_type: None,
29750 }));
29751 (eq, result)
29752 } else {
29753 // Non-literal -> null-safe comparison
29754 let needs_paren = matches!(
29755 &search,
29756 Expression::Eq(_)
29757 | Expression::Neq(_)
29758 | Expression::Gt(_)
29759 | Expression::Gte(_)
29760 | Expression::Lt(_)
29761 | Expression::Lte(_)
29762 );
29763 let search_ref = if needs_paren {
29764 Expression::Paren(Box::new(crate::expressions::Paren {
29765 this: search.clone(),
29766 trailing_comments: Vec::new(),
29767 }))
29768 } else {
29769 search.clone()
29770 };
29771 // Build: x = search OR (x IS NULL AND search IS NULL)
29772 let eq = Expression::Eq(Box::new(BinaryOp {
29773 left: this_expr.clone(),
29774 right: search_ref,
29775 left_comments: Vec::new(),
29776 operator_comments: Vec::new(),
29777 trailing_comments: Vec::new(),
29778 inferred_type: None,
29779 }));
29780 let search_in_null = if needs_paren {
29781 Expression::Paren(Box::new(crate::expressions::Paren {
29782 this: search.clone(),
29783 trailing_comments: Vec::new(),
29784 }))
29785 } else {
29786 search.clone()
29787 };
29788 let x_is_null = Expression::Is(Box::new(BinaryOp {
29789 left: this_expr.clone(),
29790 right: Expression::Null(crate::expressions::Null),
29791 left_comments: Vec::new(),
29792 operator_comments: Vec::new(),
29793 trailing_comments: Vec::new(),
29794 inferred_type: None,
29795 }));
29796 let search_is_null = Expression::Is(Box::new(BinaryOp {
29797 left: search_in_null,
29798 right: Expression::Null(crate::expressions::Null),
29799 left_comments: Vec::new(),
29800 operator_comments: Vec::new(),
29801 trailing_comments: Vec::new(),
29802 inferred_type: None,
29803 }));
29804 let both_null = Expression::And(Box::new(BinaryOp {
29805 left: x_is_null,
29806 right: search_is_null,
29807 left_comments: Vec::new(),
29808 operator_comments: Vec::new(),
29809 trailing_comments: Vec::new(),
29810 inferred_type: None,
29811 }));
29812 let condition = Expression::Or(Box::new(BinaryOp {
29813 left: eq,
29814 right: Expression::Paren(Box::new(
29815 crate::expressions::Paren {
29816 this: both_null,
29817 trailing_comments: Vec::new(),
29818 },
29819 )),
29820 left_comments: Vec::new(),
29821 operator_comments: Vec::new(),
29822 trailing_comments: Vec::new(),
29823 inferred_type: None,
29824 }));
29825 (condition, result)
29826 }
29827 })
29828 .collect();
29829 Expression::Case(Box::new(Case {
29830 operand: None,
29831 whens,
29832 else_: default,
29833 comments: Vec::new(),
29834 inferred_type: None,
29835 }))
29836 };
29837
29838 if let Expression::Decode(decode) = e {
29839 Ok(build_decode_case(
29840 decode.this,
29841 decode.search_results,
29842 decode.default,
29843 ))
29844 } else if let Expression::DecodeCase(dc) = e {
29845 // DecodeCase has flat expressions: [x, s1, r1, s2, r2, ..., default?]
29846 let mut exprs = dc.expressions;
29847 if exprs.len() < 3 {
29848 return Ok(Expression::DecodeCase(Box::new(
29849 crate::expressions::DecodeCase { expressions: exprs },
29850 )));
29851 }
29852 let this_expr = exprs.remove(0);
29853 let mut pairs = Vec::new();
29854 let mut default = None;
29855 let mut i = 0;
29856 while i + 1 < exprs.len() {
29857 pairs.push((exprs[i].clone(), exprs[i + 1].clone()));
29858 i += 2;
29859 }
29860 if i < exprs.len() {
29861 // Odd remaining element is the default
29862 default = Some(exprs[i].clone());
29863 }
29864 Ok(build_decode_case(this_expr, pairs, default))
29865 } else {
29866 Ok(e)
29867 }
29868 }
29869
29870 Action::CreateTableLikeToCtas => {
29871 // CREATE TABLE a LIKE b -> CREATE TABLE a AS SELECT * FROM b LIMIT 0
29872 if let Expression::CreateTable(ct) = e {
29873 let like_source = ct.constraints.iter().find_map(|c| {
29874 if let crate::expressions::TableConstraint::Like { source, .. } = c {
29875 Some(source.clone())
29876 } else {
29877 None
29878 }
29879 });
29880 if let Some(source_table) = like_source {
29881 let mut new_ct = *ct;
29882 new_ct.constraints.clear();
29883 // Build: SELECT * FROM b LIMIT 0
29884 let select = Expression::Select(Box::new(crate::expressions::Select {
29885 expressions: vec![Expression::Star(crate::expressions::Star {
29886 table: None,
29887 except: None,
29888 replace: None,
29889 rename: None,
29890 trailing_comments: Vec::new(),
29891 span: None,
29892 })],
29893 from: Some(crate::expressions::From {
29894 expressions: vec![Expression::Table(Box::new(source_table))],
29895 }),
29896 limit: Some(crate::expressions::Limit {
29897 this: Expression::Literal(Box::new(Literal::Number(
29898 "0".to_string(),
29899 ))),
29900 percent: false,
29901 comments: Vec::new(),
29902 }),
29903 ..Default::default()
29904 }));
29905 new_ct.as_select = Some(select);
29906 Ok(Expression::CreateTable(Box::new(new_ct)))
29907 } else {
29908 Ok(Expression::CreateTable(ct))
29909 }
29910 } else {
29911 Ok(e)
29912 }
29913 }
29914
29915 Action::CreateTableLikeToSelectInto => {
29916 // CREATE TABLE a LIKE b -> SELECT TOP 0 * INTO a FROM b AS temp
29917 if let Expression::CreateTable(ct) = e {
29918 let like_source = ct.constraints.iter().find_map(|c| {
29919 if let crate::expressions::TableConstraint::Like { source, .. } = c {
29920 Some(source.clone())
29921 } else {
29922 None
29923 }
29924 });
29925 if let Some(source_table) = like_source {
29926 let mut aliased_source = source_table;
29927 aliased_source.alias = Some(Identifier::new("temp"));
29928 // Build: SELECT TOP 0 * INTO a FROM b AS temp
29929 let select = Expression::Select(Box::new(crate::expressions::Select {
29930 expressions: vec![Expression::Star(crate::expressions::Star {
29931 table: None,
29932 except: None,
29933 replace: None,
29934 rename: None,
29935 trailing_comments: Vec::new(),
29936 span: None,
29937 })],
29938 from: Some(crate::expressions::From {
29939 expressions: vec![Expression::Table(Box::new(aliased_source))],
29940 }),
29941 into: Some(crate::expressions::SelectInto {
29942 this: Expression::Table(Box::new(ct.name.clone())),
29943 temporary: false,
29944 unlogged: false,
29945 bulk_collect: false,
29946 expressions: Vec::new(),
29947 }),
29948 top: Some(crate::expressions::Top {
29949 this: Expression::Literal(Box::new(Literal::Number(
29950 "0".to_string(),
29951 ))),
29952 percent: false,
29953 with_ties: false,
29954 parenthesized: false,
29955 }),
29956 ..Default::default()
29957 }));
29958 Ok(select)
29959 } else {
29960 Ok(Expression::CreateTable(ct))
29961 }
29962 } else {
29963 Ok(e)
29964 }
29965 }
29966
29967 Action::CreateTableLikeToAs => {
29968 // CREATE TABLE a LIKE b -> CREATE TABLE a AS b (ClickHouse)
29969 if let Expression::CreateTable(ct) = e {
29970 let like_source = ct.constraints.iter().find_map(|c| {
29971 if let crate::expressions::TableConstraint::Like { source, .. } = c {
29972 Some(source.clone())
29973 } else {
29974 None
29975 }
29976 });
29977 if let Some(source_table) = like_source {
29978 let mut new_ct = *ct;
29979 new_ct.constraints.clear();
29980 // AS b (just a table reference, not a SELECT)
29981 new_ct.as_select = Some(Expression::Table(Box::new(source_table)));
29982 Ok(Expression::CreateTable(Box::new(new_ct)))
29983 } else {
29984 Ok(Expression::CreateTable(ct))
29985 }
29986 } else {
29987 Ok(e)
29988 }
29989 }
29990
29991 Action::TsOrDsToDateConvert => {
29992 // TS_OR_DS_TO_DATE(x[, fmt]) -> dialect-specific date conversion
29993 if let Expression::Function(f) = e {
29994 let mut args = f.args;
29995 let this = args.remove(0);
29996 let fmt = if !args.is_empty() {
29997 match &args[0] {
29998 Expression::Literal(lit)
29999 if matches!(lit.as_ref(), Literal::String(_)) =>
30000 {
30001 let Literal::String(s) = lit.as_ref() else {
30002 unreachable!()
30003 };
30004 Some(s.clone())
30005 }
30006 _ => None,
30007 }
30008 } else {
30009 None
30010 };
30011 Ok(Expression::TsOrDsToDate(Box::new(
30012 crate::expressions::TsOrDsToDate {
30013 this: Box::new(this),
30014 format: fmt,
30015 safe: None,
30016 },
30017 )))
30018 } else {
30019 Ok(e)
30020 }
30021 }
30022
30023 Action::TsOrDsToDateStrConvert => {
30024 // TS_OR_DS_TO_DATE_STR(x) -> SUBSTRING(CAST(x AS type), 1, 10)
30025 if let Expression::Function(f) = e {
30026 let arg = f.args.into_iter().next().unwrap();
30027 let str_type = match target {
30028 DialectType::DuckDB
30029 | DialectType::PostgreSQL
30030 | DialectType::Materialize => DataType::Text,
30031 DialectType::Hive | DialectType::Spark | DialectType::Databricks => {
30032 DataType::Custom {
30033 name: "STRING".to_string(),
30034 }
30035 }
30036 DialectType::Presto
30037 | DialectType::Trino
30038 | DialectType::Athena
30039 | DialectType::Drill => DataType::VarChar {
30040 length: None,
30041 parenthesized_length: false,
30042 },
30043 DialectType::MySQL | DialectType::Doris | DialectType::StarRocks => {
30044 DataType::Custom {
30045 name: "STRING".to_string(),
30046 }
30047 }
30048 _ => DataType::VarChar {
30049 length: None,
30050 parenthesized_length: false,
30051 },
30052 };
30053 let cast_expr = Expression::Cast(Box::new(Cast {
30054 this: arg,
30055 to: str_type,
30056 double_colon_syntax: false,
30057 trailing_comments: Vec::new(),
30058 format: None,
30059 default: None,
30060 inferred_type: None,
30061 }));
30062 Ok(Expression::Substring(Box::new(
30063 crate::expressions::SubstringFunc {
30064 this: cast_expr,
30065 start: Expression::number(1),
30066 length: Some(Expression::number(10)),
30067 from_for_syntax: false,
30068 },
30069 )))
30070 } else {
30071 Ok(e)
30072 }
30073 }
30074
30075 Action::DateStrToDateConvert => {
30076 // DATE_STR_TO_DATE(x) -> dialect-specific
30077 if let Expression::Function(f) = e {
30078 let arg = f.args.into_iter().next().unwrap();
30079 match target {
30080 DialectType::SQLite => {
30081 // SQLite: just the bare expression (dates are strings)
30082 Ok(arg)
30083 }
30084 _ => Ok(Expression::Cast(Box::new(Cast {
30085 this: arg,
30086 to: DataType::Date,
30087 double_colon_syntax: false,
30088 trailing_comments: Vec::new(),
30089 format: None,
30090 default: None,
30091 inferred_type: None,
30092 }))),
30093 }
30094 } else {
30095 Ok(e)
30096 }
30097 }
30098
30099 Action::TimeStrToDateConvert => {
30100 // TIME_STR_TO_DATE(x) -> dialect-specific
30101 if let Expression::Function(f) = e {
30102 let arg = f.args.into_iter().next().unwrap();
30103 match target {
30104 DialectType::Hive
30105 | DialectType::Doris
30106 | DialectType::StarRocks
30107 | DialectType::Snowflake => Ok(Expression::Function(Box::new(
30108 Function::new("TO_DATE".to_string(), vec![arg]),
30109 ))),
30110 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
30111 // Presto: CAST(x AS TIMESTAMP)
30112 Ok(Expression::Cast(Box::new(Cast {
30113 this: arg,
30114 to: DataType::Timestamp {
30115 timezone: false,
30116 precision: None,
30117 },
30118 double_colon_syntax: false,
30119 trailing_comments: Vec::new(),
30120 format: None,
30121 default: None,
30122 inferred_type: None,
30123 })))
30124 }
30125 _ => {
30126 // Default: CAST(x AS DATE)
30127 Ok(Expression::Cast(Box::new(Cast {
30128 this: arg,
30129 to: DataType::Date,
30130 double_colon_syntax: false,
30131 trailing_comments: Vec::new(),
30132 format: None,
30133 default: None,
30134 inferred_type: None,
30135 })))
30136 }
30137 }
30138 } else {
30139 Ok(e)
30140 }
30141 }
30142
30143 Action::TimeStrToTimeConvert => {
30144 // TIME_STR_TO_TIME(x[, zone]) -> dialect-specific CAST to timestamp type
30145 if let Expression::Function(f) = e {
30146 let mut args = f.args;
30147 let this = args.remove(0);
30148 let zone = if !args.is_empty() {
30149 match &args[0] {
30150 Expression::Literal(lit)
30151 if matches!(lit.as_ref(), Literal::String(_)) =>
30152 {
30153 let Literal::String(s) = lit.as_ref() else {
30154 unreachable!()
30155 };
30156 Some(s.clone())
30157 }
30158 _ => None,
30159 }
30160 } else {
30161 None
30162 };
30163 let has_zone = zone.is_some();
30164
30165 match target {
30166 DialectType::SQLite => {
30167 // SQLite: just the bare expression
30168 Ok(this)
30169 }
30170 DialectType::MySQL => {
30171 if has_zone {
30172 // MySQL with zone: TIMESTAMP(x)
30173 Ok(Expression::Function(Box::new(Function::new(
30174 "TIMESTAMP".to_string(),
30175 vec![this],
30176 ))))
30177 } else {
30178 // MySQL: CAST(x AS DATETIME) or with precision
30179 // Use DataType::Custom to avoid MySQL's transform_cast converting
30180 // CAST(x AS TIMESTAMP) -> TIMESTAMP(x)
30181 let precision = if let Expression::Literal(ref lit) = this {
30182 if let Literal::String(ref s) = lit.as_ref() {
30183 if let Some(dot_pos) = s.rfind('.') {
30184 let frac = &s[dot_pos + 1..];
30185 let digit_count = frac
30186 .chars()
30187 .take_while(|c| c.is_ascii_digit())
30188 .count();
30189 if digit_count > 0 {
30190 Some(digit_count)
30191 } else {
30192 None
30193 }
30194 } else {
30195 None
30196 }
30197 } else {
30198 None
30199 }
30200 } else {
30201 None
30202 };
30203 let type_name = match precision {
30204 Some(p) => format!("DATETIME({})", p),
30205 None => "DATETIME".to_string(),
30206 };
30207 Ok(Expression::Cast(Box::new(Cast {
30208 this,
30209 to: DataType::Custom { name: type_name },
30210 double_colon_syntax: false,
30211 trailing_comments: Vec::new(),
30212 format: None,
30213 default: None,
30214 inferred_type: None,
30215 })))
30216 }
30217 }
30218 DialectType::ClickHouse => {
30219 if has_zone {
30220 // ClickHouse with zone: CAST(x AS DateTime64(6, 'zone'))
30221 // We need to strip the timezone offset from the literal if present
30222 let clean_this = if let Expression::Literal(ref lit) = this {
30223 if let Literal::String(ref s) = lit.as_ref() {
30224 // Strip timezone offset like "-08:00" or "+00:00"
30225 let re_offset = s.rfind(|c: char| c == '+' || c == '-');
30226 if let Some(offset_pos) = re_offset {
30227 if offset_pos > 10 {
30228 // After the date part
30229 let trimmed = s[..offset_pos].to_string();
30230 Expression::Literal(Box::new(Literal::String(
30231 trimmed,
30232 )))
30233 } else {
30234 this.clone()
30235 }
30236 } else {
30237 this.clone()
30238 }
30239 } else {
30240 this.clone()
30241 }
30242 } else {
30243 this.clone()
30244 };
30245 let zone_str = zone.unwrap();
30246 // Build: CAST(x AS DateTime64(6, 'zone'))
30247 let type_name = format!("DateTime64(6, '{}')", zone_str);
30248 Ok(Expression::Cast(Box::new(Cast {
30249 this: clean_this,
30250 to: DataType::Custom { name: type_name },
30251 double_colon_syntax: false,
30252 trailing_comments: Vec::new(),
30253 format: None,
30254 default: None,
30255 inferred_type: None,
30256 })))
30257 } else {
30258 Ok(Expression::Cast(Box::new(Cast {
30259 this,
30260 to: DataType::Custom {
30261 name: "DateTime64(6)".to_string(),
30262 },
30263 double_colon_syntax: false,
30264 trailing_comments: Vec::new(),
30265 format: None,
30266 default: None,
30267 inferred_type: None,
30268 })))
30269 }
30270 }
30271 DialectType::BigQuery => {
30272 if has_zone {
30273 // BigQuery with zone: CAST(x AS TIMESTAMP)
30274 Ok(Expression::Cast(Box::new(Cast {
30275 this,
30276 to: DataType::Timestamp {
30277 timezone: false,
30278 precision: None,
30279 },
30280 double_colon_syntax: false,
30281 trailing_comments: Vec::new(),
30282 format: None,
30283 default: None,
30284 inferred_type: None,
30285 })))
30286 } else {
30287 // BigQuery: CAST(x AS DATETIME) - Timestamp{tz:false} renders as DATETIME for BigQuery
30288 Ok(Expression::Cast(Box::new(Cast {
30289 this,
30290 to: DataType::Custom {
30291 name: "DATETIME".to_string(),
30292 },
30293 double_colon_syntax: false,
30294 trailing_comments: Vec::new(),
30295 format: None,
30296 default: None,
30297 inferred_type: None,
30298 })))
30299 }
30300 }
30301 DialectType::Doris => {
30302 // Doris: CAST(x AS DATETIME)
30303 Ok(Expression::Cast(Box::new(Cast {
30304 this,
30305 to: DataType::Custom {
30306 name: "DATETIME".to_string(),
30307 },
30308 double_colon_syntax: false,
30309 trailing_comments: Vec::new(),
30310 format: None,
30311 default: None,
30312 inferred_type: None,
30313 })))
30314 }
30315 DialectType::TSQL | DialectType::Fabric => {
30316 if has_zone {
30317 // TSQL with zone: CAST(x AS DATETIMEOFFSET) AT TIME ZONE 'UTC'
30318 let cast_expr = Expression::Cast(Box::new(Cast {
30319 this,
30320 to: DataType::Custom {
30321 name: "DATETIMEOFFSET".to_string(),
30322 },
30323 double_colon_syntax: false,
30324 trailing_comments: Vec::new(),
30325 format: None,
30326 default: None,
30327 inferred_type: None,
30328 }));
30329 Ok(Expression::AtTimeZone(Box::new(
30330 crate::expressions::AtTimeZone {
30331 this: cast_expr,
30332 zone: Expression::Literal(Box::new(Literal::String(
30333 "UTC".to_string(),
30334 ))),
30335 },
30336 )))
30337 } else {
30338 // TSQL: CAST(x AS DATETIME2)
30339 Ok(Expression::Cast(Box::new(Cast {
30340 this,
30341 to: DataType::Custom {
30342 name: "DATETIME2".to_string(),
30343 },
30344 double_colon_syntax: false,
30345 trailing_comments: Vec::new(),
30346 format: None,
30347 default: None,
30348 inferred_type: None,
30349 })))
30350 }
30351 }
30352 DialectType::DuckDB => {
30353 if has_zone {
30354 // DuckDB with zone: CAST(x AS TIMESTAMPTZ)
30355 Ok(Expression::Cast(Box::new(Cast {
30356 this,
30357 to: DataType::Timestamp {
30358 timezone: true,
30359 precision: None,
30360 },
30361 double_colon_syntax: false,
30362 trailing_comments: Vec::new(),
30363 format: None,
30364 default: None,
30365 inferred_type: None,
30366 })))
30367 } else {
30368 // DuckDB: CAST(x AS TIMESTAMP)
30369 Ok(Expression::Cast(Box::new(Cast {
30370 this,
30371 to: DataType::Timestamp {
30372 timezone: false,
30373 precision: None,
30374 },
30375 double_colon_syntax: false,
30376 trailing_comments: Vec::new(),
30377 format: None,
30378 default: None,
30379 inferred_type: None,
30380 })))
30381 }
30382 }
30383 DialectType::PostgreSQL
30384 | DialectType::Materialize
30385 | DialectType::RisingWave => {
30386 if has_zone {
30387 // PostgreSQL with zone: CAST(x AS TIMESTAMPTZ)
30388 Ok(Expression::Cast(Box::new(Cast {
30389 this,
30390 to: DataType::Timestamp {
30391 timezone: true,
30392 precision: None,
30393 },
30394 double_colon_syntax: false,
30395 trailing_comments: Vec::new(),
30396 format: None,
30397 default: None,
30398 inferred_type: None,
30399 })))
30400 } else {
30401 // PostgreSQL: CAST(x AS TIMESTAMP)
30402 Ok(Expression::Cast(Box::new(Cast {
30403 this,
30404 to: DataType::Timestamp {
30405 timezone: false,
30406 precision: None,
30407 },
30408 double_colon_syntax: false,
30409 trailing_comments: Vec::new(),
30410 format: None,
30411 default: None,
30412 inferred_type: None,
30413 })))
30414 }
30415 }
30416 DialectType::Snowflake => {
30417 if has_zone {
30418 // Snowflake with zone: CAST(x AS TIMESTAMPTZ)
30419 Ok(Expression::Cast(Box::new(Cast {
30420 this,
30421 to: DataType::Timestamp {
30422 timezone: true,
30423 precision: None,
30424 },
30425 double_colon_syntax: false,
30426 trailing_comments: Vec::new(),
30427 format: None,
30428 default: None,
30429 inferred_type: None,
30430 })))
30431 } else {
30432 // Snowflake: CAST(x AS TIMESTAMP)
30433 Ok(Expression::Cast(Box::new(Cast {
30434 this,
30435 to: DataType::Timestamp {
30436 timezone: false,
30437 precision: None,
30438 },
30439 double_colon_syntax: false,
30440 trailing_comments: Vec::new(),
30441 format: None,
30442 default: None,
30443 inferred_type: None,
30444 })))
30445 }
30446 }
30447 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
30448 if has_zone {
30449 // Presto/Trino with zone: CAST(x AS TIMESTAMP WITH TIME ZONE)
30450 // Check for precision from sub-second digits
30451 let precision = if let Expression::Literal(ref lit) = this {
30452 if let Literal::String(ref s) = lit.as_ref() {
30453 if let Some(dot_pos) = s.rfind('.') {
30454 let frac = &s[dot_pos + 1..];
30455 let digit_count = frac
30456 .chars()
30457 .take_while(|c| c.is_ascii_digit())
30458 .count();
30459 if digit_count > 0
30460 && matches!(target, DialectType::Trino)
30461 {
30462 Some(digit_count as u32)
30463 } else {
30464 None
30465 }
30466 } else {
30467 None
30468 }
30469 } else {
30470 None
30471 }
30472 } else {
30473 None
30474 };
30475 let dt = if let Some(prec) = precision {
30476 DataType::Timestamp {
30477 timezone: true,
30478 precision: Some(prec),
30479 }
30480 } else {
30481 DataType::Timestamp {
30482 timezone: true,
30483 precision: None,
30484 }
30485 };
30486 Ok(Expression::Cast(Box::new(Cast {
30487 this,
30488 to: dt,
30489 double_colon_syntax: false,
30490 trailing_comments: Vec::new(),
30491 format: None,
30492 default: None,
30493 inferred_type: None,
30494 })))
30495 } else {
30496 // Check for sub-second precision for Trino
30497 let precision = if let Expression::Literal(ref lit) = this {
30498 if let Literal::String(ref s) = lit.as_ref() {
30499 if let Some(dot_pos) = s.rfind('.') {
30500 let frac = &s[dot_pos + 1..];
30501 let digit_count = frac
30502 .chars()
30503 .take_while(|c| c.is_ascii_digit())
30504 .count();
30505 if digit_count > 0
30506 && matches!(target, DialectType::Trino)
30507 {
30508 Some(digit_count as u32)
30509 } else {
30510 None
30511 }
30512 } else {
30513 None
30514 }
30515 } else {
30516 None
30517 }
30518 } else {
30519 None
30520 };
30521 let dt = DataType::Timestamp {
30522 timezone: false,
30523 precision,
30524 };
30525 Ok(Expression::Cast(Box::new(Cast {
30526 this,
30527 to: dt,
30528 double_colon_syntax: false,
30529 trailing_comments: Vec::new(),
30530 format: None,
30531 default: None,
30532 inferred_type: None,
30533 })))
30534 }
30535 }
30536 DialectType::Redshift => {
30537 if has_zone {
30538 // Redshift with zone: CAST(x AS TIMESTAMP WITH TIME ZONE)
30539 Ok(Expression::Cast(Box::new(Cast {
30540 this,
30541 to: DataType::Timestamp {
30542 timezone: true,
30543 precision: None,
30544 },
30545 double_colon_syntax: false,
30546 trailing_comments: Vec::new(),
30547 format: None,
30548 default: None,
30549 inferred_type: None,
30550 })))
30551 } else {
30552 // Redshift: CAST(x AS TIMESTAMP)
30553 Ok(Expression::Cast(Box::new(Cast {
30554 this,
30555 to: DataType::Timestamp {
30556 timezone: false,
30557 precision: None,
30558 },
30559 double_colon_syntax: false,
30560 trailing_comments: Vec::new(),
30561 format: None,
30562 default: None,
30563 inferred_type: None,
30564 })))
30565 }
30566 }
30567 _ => {
30568 // Default: CAST(x AS TIMESTAMP)
30569 Ok(Expression::Cast(Box::new(Cast {
30570 this,
30571 to: DataType::Timestamp {
30572 timezone: false,
30573 precision: None,
30574 },
30575 double_colon_syntax: false,
30576 trailing_comments: Vec::new(),
30577 format: None,
30578 default: None,
30579 inferred_type: None,
30580 })))
30581 }
30582 }
30583 } else {
30584 Ok(e)
30585 }
30586 }
30587
30588 Action::DateToDateStrConvert => {
30589 // DATE_TO_DATE_STR(x) -> CAST(x AS text_type) per dialect
30590 if let Expression::Function(f) = e {
30591 let arg = f.args.into_iter().next().unwrap();
30592 let str_type = match target {
30593 DialectType::DuckDB => DataType::Text,
30594 DialectType::Hive | DialectType::Spark | DialectType::Databricks => {
30595 DataType::Custom {
30596 name: "STRING".to_string(),
30597 }
30598 }
30599 DialectType::Presto
30600 | DialectType::Trino
30601 | DialectType::Athena
30602 | DialectType::Drill => DataType::VarChar {
30603 length: None,
30604 parenthesized_length: false,
30605 },
30606 _ => DataType::VarChar {
30607 length: None,
30608 parenthesized_length: false,
30609 },
30610 };
30611 Ok(Expression::Cast(Box::new(Cast {
30612 this: arg,
30613 to: str_type,
30614 double_colon_syntax: false,
30615 trailing_comments: Vec::new(),
30616 format: None,
30617 default: None,
30618 inferred_type: None,
30619 })))
30620 } else {
30621 Ok(e)
30622 }
30623 }
30624
30625 Action::DateToDiConvert => {
30626 // DATE_TO_DI(x) -> CAST(format_func(x, fmt) AS INT)
30627 if let Expression::Function(f) = e {
30628 let arg = f.args.into_iter().next().unwrap();
30629 let inner = match target {
30630 DialectType::DuckDB => {
30631 // STRFTIME(x, '%Y%m%d')
30632 Expression::Function(Box::new(Function::new(
30633 "STRFTIME".to_string(),
30634 vec![arg, Expression::string("%Y%m%d")],
30635 )))
30636 }
30637 DialectType::Hive | DialectType::Spark | DialectType::Databricks => {
30638 // DATE_FORMAT(x, 'yyyyMMdd')
30639 Expression::Function(Box::new(Function::new(
30640 "DATE_FORMAT".to_string(),
30641 vec![arg, Expression::string("yyyyMMdd")],
30642 )))
30643 }
30644 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
30645 // DATE_FORMAT(x, '%Y%m%d')
30646 Expression::Function(Box::new(Function::new(
30647 "DATE_FORMAT".to_string(),
30648 vec![arg, Expression::string("%Y%m%d")],
30649 )))
30650 }
30651 DialectType::Drill => {
30652 // TO_DATE(x, 'yyyyMMdd')
30653 Expression::Function(Box::new(Function::new(
30654 "TO_DATE".to_string(),
30655 vec![arg, Expression::string("yyyyMMdd")],
30656 )))
30657 }
30658 _ => {
30659 // Default: STRFTIME(x, '%Y%m%d')
30660 Expression::Function(Box::new(Function::new(
30661 "STRFTIME".to_string(),
30662 vec![arg, Expression::string("%Y%m%d")],
30663 )))
30664 }
30665 };
30666 // Use INT (not INTEGER) for Presto/Trino
30667 let int_type = match target {
30668 DialectType::Presto
30669 | DialectType::Trino
30670 | DialectType::Athena
30671 | DialectType::TSQL
30672 | DialectType::Fabric
30673 | DialectType::SQLite
30674 | DialectType::Redshift => DataType::Custom {
30675 name: "INT".to_string(),
30676 },
30677 _ => DataType::Int {
30678 length: None,
30679 integer_spelling: false,
30680 },
30681 };
30682 Ok(Expression::Cast(Box::new(Cast {
30683 this: inner,
30684 to: int_type,
30685 double_colon_syntax: false,
30686 trailing_comments: Vec::new(),
30687 format: None,
30688 default: None,
30689 inferred_type: None,
30690 })))
30691 } else {
30692 Ok(e)
30693 }
30694 }
30695
30696 Action::DiToDateConvert => {
30697 // DI_TO_DATE(x) -> dialect-specific integer-to-date conversion
30698 if let Expression::Function(f) = e {
30699 let arg = f.args.into_iter().next().unwrap();
30700 match target {
30701 DialectType::DuckDB => {
30702 // CAST(STRPTIME(CAST(x AS TEXT), '%Y%m%d') AS DATE)
30703 let cast_text = Expression::Cast(Box::new(Cast {
30704 this: arg,
30705 to: DataType::Text,
30706 double_colon_syntax: false,
30707 trailing_comments: Vec::new(),
30708 format: None,
30709 default: None,
30710 inferred_type: None,
30711 }));
30712 let strptime = Expression::Function(Box::new(Function::new(
30713 "STRPTIME".to_string(),
30714 vec![cast_text, Expression::string("%Y%m%d")],
30715 )));
30716 Ok(Expression::Cast(Box::new(Cast {
30717 this: strptime,
30718 to: DataType::Date,
30719 double_colon_syntax: false,
30720 trailing_comments: Vec::new(),
30721 format: None,
30722 default: None,
30723 inferred_type: None,
30724 })))
30725 }
30726 DialectType::Hive | DialectType::Spark | DialectType::Databricks => {
30727 // TO_DATE(CAST(x AS STRING), 'yyyyMMdd')
30728 let cast_str = Expression::Cast(Box::new(Cast {
30729 this: arg,
30730 to: DataType::Custom {
30731 name: "STRING".to_string(),
30732 },
30733 double_colon_syntax: false,
30734 trailing_comments: Vec::new(),
30735 format: None,
30736 default: None,
30737 inferred_type: None,
30738 }));
30739 Ok(Expression::Function(Box::new(Function::new(
30740 "TO_DATE".to_string(),
30741 vec![cast_str, Expression::string("yyyyMMdd")],
30742 ))))
30743 }
30744 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
30745 // CAST(DATE_PARSE(CAST(x AS VARCHAR), '%Y%m%d') AS DATE)
30746 let cast_varchar = Expression::Cast(Box::new(Cast {
30747 this: arg,
30748 to: DataType::VarChar {
30749 length: None,
30750 parenthesized_length: false,
30751 },
30752 double_colon_syntax: false,
30753 trailing_comments: Vec::new(),
30754 format: None,
30755 default: None,
30756 inferred_type: None,
30757 }));
30758 let date_parse = Expression::Function(Box::new(Function::new(
30759 "DATE_PARSE".to_string(),
30760 vec![cast_varchar, Expression::string("%Y%m%d")],
30761 )));
30762 Ok(Expression::Cast(Box::new(Cast {
30763 this: date_parse,
30764 to: DataType::Date,
30765 double_colon_syntax: false,
30766 trailing_comments: Vec::new(),
30767 format: None,
30768 default: None,
30769 inferred_type: None,
30770 })))
30771 }
30772 DialectType::Drill => {
30773 // TO_DATE(CAST(x AS VARCHAR), 'yyyyMMdd')
30774 let cast_varchar = Expression::Cast(Box::new(Cast {
30775 this: arg,
30776 to: DataType::VarChar {
30777 length: None,
30778 parenthesized_length: false,
30779 },
30780 double_colon_syntax: false,
30781 trailing_comments: Vec::new(),
30782 format: None,
30783 default: None,
30784 inferred_type: None,
30785 }));
30786 Ok(Expression::Function(Box::new(Function::new(
30787 "TO_DATE".to_string(),
30788 vec![cast_varchar, Expression::string("yyyyMMdd")],
30789 ))))
30790 }
30791 _ => Ok(Expression::Function(Box::new(Function::new(
30792 "DI_TO_DATE".to_string(),
30793 vec![arg],
30794 )))),
30795 }
30796 } else {
30797 Ok(e)
30798 }
30799 }
30800
30801 Action::TsOrDiToDiConvert => {
30802 // TS_OR_DI_TO_DI(x) -> CAST(SUBSTR(REPLACE(CAST(x AS type), '-', ''), 1, 8) AS INT)
30803 if let Expression::Function(f) = e {
30804 let arg = f.args.into_iter().next().unwrap();
30805 let str_type = match target {
30806 DialectType::DuckDB => DataType::Text,
30807 DialectType::Hive | DialectType::Spark | DialectType::Databricks => {
30808 DataType::Custom {
30809 name: "STRING".to_string(),
30810 }
30811 }
30812 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
30813 DataType::VarChar {
30814 length: None,
30815 parenthesized_length: false,
30816 }
30817 }
30818 _ => DataType::VarChar {
30819 length: None,
30820 parenthesized_length: false,
30821 },
30822 };
30823 let cast_str = Expression::Cast(Box::new(Cast {
30824 this: arg,
30825 to: str_type,
30826 double_colon_syntax: false,
30827 trailing_comments: Vec::new(),
30828 format: None,
30829 default: None,
30830 inferred_type: None,
30831 }));
30832 let replace_expr = Expression::Function(Box::new(Function::new(
30833 "REPLACE".to_string(),
30834 vec![cast_str, Expression::string("-"), Expression::string("")],
30835 )));
30836 let substr_name = match target {
30837 DialectType::DuckDB
30838 | DialectType::Hive
30839 | DialectType::Spark
30840 | DialectType::Databricks => "SUBSTR",
30841 _ => "SUBSTR",
30842 };
30843 let substr = Expression::Function(Box::new(Function::new(
30844 substr_name.to_string(),
30845 vec![replace_expr, Expression::number(1), Expression::number(8)],
30846 )));
30847 // Use INT (not INTEGER) for Presto/Trino etc.
30848 let int_type = match target {
30849 DialectType::Presto
30850 | DialectType::Trino
30851 | DialectType::Athena
30852 | DialectType::TSQL
30853 | DialectType::Fabric
30854 | DialectType::SQLite
30855 | DialectType::Redshift => DataType::Custom {
30856 name: "INT".to_string(),
30857 },
30858 _ => DataType::Int {
30859 length: None,
30860 integer_spelling: false,
30861 },
30862 };
30863 Ok(Expression::Cast(Box::new(Cast {
30864 this: substr,
30865 to: int_type,
30866 double_colon_syntax: false,
30867 trailing_comments: Vec::new(),
30868 format: None,
30869 default: None,
30870 inferred_type: None,
30871 })))
30872 } else {
30873 Ok(e)
30874 }
30875 }
30876
30877 Action::UnixToStrConvert => {
30878 // UNIX_TO_STR(x, fmt) -> convert to Expression::UnixToStr for generator
30879 if let Expression::Function(f) = e {
30880 let mut args = f.args;
30881 let this = args.remove(0);
30882 let fmt_expr = if !args.is_empty() {
30883 Some(args.remove(0))
30884 } else {
30885 None
30886 };
30887
30888 // Check if format is a string literal
30889 let fmt_str = fmt_expr.as_ref().and_then(|f| {
30890 if let Expression::Literal(lit) = f {
30891 if let Literal::String(s) = lit.as_ref() {
30892 Some(s.clone())
30893 } else {
30894 None
30895 }
30896 } else {
30897 None
30898 }
30899 });
30900
30901 if let Some(fmt_string) = fmt_str {
30902 // String literal format -> use UnixToStr expression (generator handles it)
30903 Ok(Expression::UnixToStr(Box::new(
30904 crate::expressions::UnixToStr {
30905 this: Box::new(this),
30906 format: Some(fmt_string),
30907 },
30908 )))
30909 } else if let Some(fmt_e) = fmt_expr {
30910 // Non-literal format (e.g., identifier `y`) -> build target expression directly
30911 match target {
30912 DialectType::DuckDB => {
30913 // STRFTIME(TO_TIMESTAMP(x), y)
30914 let to_ts = Expression::Function(Box::new(Function::new(
30915 "TO_TIMESTAMP".to_string(),
30916 vec![this],
30917 )));
30918 Ok(Expression::Function(Box::new(Function::new(
30919 "STRFTIME".to_string(),
30920 vec![to_ts, fmt_e],
30921 ))))
30922 }
30923 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
30924 // DATE_FORMAT(FROM_UNIXTIME(x), y)
30925 let from_unix = Expression::Function(Box::new(Function::new(
30926 "FROM_UNIXTIME".to_string(),
30927 vec![this],
30928 )));
30929 Ok(Expression::Function(Box::new(Function::new(
30930 "DATE_FORMAT".to_string(),
30931 vec![from_unix, fmt_e],
30932 ))))
30933 }
30934 DialectType::Hive
30935 | DialectType::Spark
30936 | DialectType::Databricks
30937 | DialectType::Doris
30938 | DialectType::StarRocks => {
30939 // FROM_UNIXTIME(x, y)
30940 Ok(Expression::Function(Box::new(Function::new(
30941 "FROM_UNIXTIME".to_string(),
30942 vec![this, fmt_e],
30943 ))))
30944 }
30945 _ => {
30946 // Default: keep as UNIX_TO_STR(x, y)
30947 Ok(Expression::Function(Box::new(Function::new(
30948 "UNIX_TO_STR".to_string(),
30949 vec![this, fmt_e],
30950 ))))
30951 }
30952 }
30953 } else {
30954 Ok(Expression::UnixToStr(Box::new(
30955 crate::expressions::UnixToStr {
30956 this: Box::new(this),
30957 format: None,
30958 },
30959 )))
30960 }
30961 } else {
30962 Ok(e)
30963 }
30964 }
30965
30966 Action::UnixToTimeConvert => {
30967 // UNIX_TO_TIME(x) -> convert to Expression::UnixToTime for generator
30968 if let Expression::Function(f) = e {
30969 let arg = f.args.into_iter().next().unwrap();
30970 Ok(Expression::UnixToTime(Box::new(
30971 crate::expressions::UnixToTime {
30972 this: Box::new(arg),
30973 scale: None,
30974 zone: None,
30975 hours: None,
30976 minutes: None,
30977 format: None,
30978 target_type: None,
30979 },
30980 )))
30981 } else {
30982 Ok(e)
30983 }
30984 }
30985
30986 Action::UnixToTimeStrConvert => {
30987 // UNIX_TO_TIME_STR(x) -> dialect-specific
30988 if let Expression::Function(f) = e {
30989 let arg = f.args.into_iter().next().unwrap();
30990 match target {
30991 DialectType::Hive | DialectType::Spark | DialectType::Databricks => {
30992 // FROM_UNIXTIME(x)
30993 Ok(Expression::Function(Box::new(Function::new(
30994 "FROM_UNIXTIME".to_string(),
30995 vec![arg],
30996 ))))
30997 }
30998 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
30999 // CAST(FROM_UNIXTIME(x) AS VARCHAR)
31000 let from_unix = Expression::Function(Box::new(Function::new(
31001 "FROM_UNIXTIME".to_string(),
31002 vec![arg],
31003 )));
31004 Ok(Expression::Cast(Box::new(Cast {
31005 this: from_unix,
31006 to: DataType::VarChar {
31007 length: None,
31008 parenthesized_length: false,
31009 },
31010 double_colon_syntax: false,
31011 trailing_comments: Vec::new(),
31012 format: None,
31013 default: None,
31014 inferred_type: None,
31015 })))
31016 }
31017 DialectType::DuckDB => {
31018 // CAST(TO_TIMESTAMP(x) AS TEXT)
31019 let to_ts = Expression::Function(Box::new(Function::new(
31020 "TO_TIMESTAMP".to_string(),
31021 vec![arg],
31022 )));
31023 Ok(Expression::Cast(Box::new(Cast {
31024 this: to_ts,
31025 to: DataType::Text,
31026 double_colon_syntax: false,
31027 trailing_comments: Vec::new(),
31028 format: None,
31029 default: None,
31030 inferred_type: None,
31031 })))
31032 }
31033 _ => Ok(Expression::Function(Box::new(Function::new(
31034 "UNIX_TO_TIME_STR".to_string(),
31035 vec![arg],
31036 )))),
31037 }
31038 } else {
31039 Ok(e)
31040 }
31041 }
31042
31043 Action::TimeToUnixConvert => {
31044 // TIME_TO_UNIX(x) -> convert to Expression::TimeToUnix for generator
31045 if let Expression::Function(f) = e {
31046 let arg = f.args.into_iter().next().unwrap();
31047 Ok(Expression::TimeToUnix(Box::new(
31048 crate::expressions::UnaryFunc {
31049 this: arg,
31050 original_name: None,
31051 inferred_type: None,
31052 },
31053 )))
31054 } else {
31055 Ok(e)
31056 }
31057 }
31058
31059 Action::TimeToStrConvert => {
31060 // TIME_TO_STR(x, fmt) -> convert to Expression::TimeToStr for generator
31061 if let Expression::Function(f) = e {
31062 let mut args = f.args;
31063 let this = args.remove(0);
31064 let fmt = match args.remove(0) {
31065 Expression::Literal(lit)
31066 if matches!(lit.as_ref(), Literal::String(_)) =>
31067 {
31068 let Literal::String(s) = lit.as_ref() else {
31069 unreachable!()
31070 };
31071 s.clone()
31072 }
31073 other => {
31074 return Ok(Expression::Function(Box::new(Function::new(
31075 "TIME_TO_STR".to_string(),
31076 vec![this, other],
31077 ))));
31078 }
31079 };
31080 Ok(Expression::TimeToStr(Box::new(
31081 crate::expressions::TimeToStr {
31082 this: Box::new(this),
31083 format: fmt,
31084 culture: None,
31085 zone: None,
31086 },
31087 )))
31088 } else {
31089 Ok(e)
31090 }
31091 }
31092
31093 Action::StrToUnixConvert => {
31094 // STR_TO_UNIX(x, fmt) -> convert to Expression::StrToUnix for generator
31095 if let Expression::Function(f) = e {
31096 let mut args = f.args;
31097 let this = args.remove(0);
31098 let fmt = match args.remove(0) {
31099 Expression::Literal(lit)
31100 if matches!(lit.as_ref(), Literal::String(_)) =>
31101 {
31102 let Literal::String(s) = lit.as_ref() else {
31103 unreachable!()
31104 };
31105 s.clone()
31106 }
31107 other => {
31108 return Ok(Expression::Function(Box::new(Function::new(
31109 "STR_TO_UNIX".to_string(),
31110 vec![this, other],
31111 ))));
31112 }
31113 };
31114 Ok(Expression::StrToUnix(Box::new(
31115 crate::expressions::StrToUnix {
31116 this: Some(Box::new(this)),
31117 format: Some(fmt),
31118 },
31119 )))
31120 } else {
31121 Ok(e)
31122 }
31123 }
31124
31125 Action::TimeStrToUnixConvert => {
31126 // TIME_STR_TO_UNIX(x) -> dialect-specific
31127 if let Expression::Function(f) = e {
31128 let arg = f.args.into_iter().next().unwrap();
31129 match target {
31130 DialectType::DuckDB => {
31131 // EPOCH(CAST(x AS TIMESTAMP))
31132 let cast_ts = Expression::Cast(Box::new(Cast {
31133 this: arg,
31134 to: DataType::Timestamp {
31135 timezone: false,
31136 precision: None,
31137 },
31138 double_colon_syntax: false,
31139 trailing_comments: Vec::new(),
31140 format: None,
31141 default: None,
31142 inferred_type: None,
31143 }));
31144 Ok(Expression::Function(Box::new(Function::new(
31145 "EPOCH".to_string(),
31146 vec![cast_ts],
31147 ))))
31148 }
31149 DialectType::Hive
31150 | DialectType::Doris
31151 | DialectType::StarRocks
31152 | DialectType::MySQL => {
31153 // UNIX_TIMESTAMP(x)
31154 Ok(Expression::Function(Box::new(Function::new(
31155 "UNIX_TIMESTAMP".to_string(),
31156 vec![arg],
31157 ))))
31158 }
31159 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
31160 // TO_UNIXTIME(DATE_PARSE(x, '%Y-%m-%d %T'))
31161 let date_parse = Expression::Function(Box::new(Function::new(
31162 "DATE_PARSE".to_string(),
31163 vec![arg, Expression::string("%Y-%m-%d %T")],
31164 )));
31165 Ok(Expression::Function(Box::new(Function::new(
31166 "TO_UNIXTIME".to_string(),
31167 vec![date_parse],
31168 ))))
31169 }
31170 _ => Ok(Expression::Function(Box::new(Function::new(
31171 "TIME_STR_TO_UNIX".to_string(),
31172 vec![arg],
31173 )))),
31174 }
31175 } else {
31176 Ok(e)
31177 }
31178 }
31179
31180 Action::TimeToTimeStrConvert => {
31181 // TIME_TO_TIME_STR(x) -> CAST(x AS str_type) per dialect
31182 if let Expression::Function(f) = e {
31183 let arg = f.args.into_iter().next().unwrap();
31184 let str_type = match target {
31185 DialectType::DuckDB => DataType::Text,
31186 DialectType::Hive
31187 | DialectType::Spark
31188 | DialectType::Databricks
31189 | DialectType::Doris
31190 | DialectType::StarRocks => DataType::Custom {
31191 name: "STRING".to_string(),
31192 },
31193 DialectType::Redshift => DataType::Custom {
31194 name: "VARCHAR(MAX)".to_string(),
31195 },
31196 _ => DataType::VarChar {
31197 length: None,
31198 parenthesized_length: false,
31199 },
31200 };
31201 Ok(Expression::Cast(Box::new(Cast {
31202 this: arg,
31203 to: str_type,
31204 double_colon_syntax: false,
31205 trailing_comments: Vec::new(),
31206 format: None,
31207 default: None,
31208 inferred_type: None,
31209 })))
31210 } else {
31211 Ok(e)
31212 }
31213 }
31214
31215 Action::DateTruncSwapArgs => {
31216 // DATE_TRUNC('unit', x) from Generic -> target-specific
31217 if let Expression::Function(f) = e {
31218 if f.args.len() == 2 {
31219 let unit_arg = f.args[0].clone();
31220 let expr_arg = f.args[1].clone();
31221 // Extract unit string from the first arg
31222 let unit_str = match &unit_arg {
31223 Expression::Literal(lit)
31224 if matches!(lit.as_ref(), Literal::String(_)) =>
31225 {
31226 let Literal::String(s) = lit.as_ref() else {
31227 unreachable!()
31228 };
31229 s.to_ascii_uppercase()
31230 }
31231 _ => return Ok(Expression::Function(f)),
31232 };
31233 match target {
31234 DialectType::BigQuery => {
31235 // BigQuery: DATE_TRUNC(x, UNIT) - unquoted unit
31236 let unit_ident =
31237 Expression::Column(Box::new(crate::expressions::Column {
31238 name: crate::expressions::Identifier::new(unit_str),
31239 table: None,
31240 join_mark: false,
31241 trailing_comments: Vec::new(),
31242 span: None,
31243 inferred_type: None,
31244 }));
31245 Ok(Expression::Function(Box::new(Function::new(
31246 "DATE_TRUNC".to_string(),
31247 vec![expr_arg, unit_ident],
31248 ))))
31249 }
31250 DialectType::Doris => {
31251 // Doris: DATE_TRUNC(x, 'UNIT')
31252 Ok(Expression::Function(Box::new(Function::new(
31253 "DATE_TRUNC".to_string(),
31254 vec![expr_arg, Expression::string(&unit_str)],
31255 ))))
31256 }
31257 DialectType::StarRocks => {
31258 // StarRocks: DATE_TRUNC('UNIT', x) - keep standard order
31259 Ok(Expression::Function(Box::new(Function::new(
31260 "DATE_TRUNC".to_string(),
31261 vec![Expression::string(&unit_str), expr_arg],
31262 ))))
31263 }
31264 DialectType::Spark | DialectType::Databricks => {
31265 // Spark: TRUNC(x, 'UNIT')
31266 Ok(Expression::Function(Box::new(Function::new(
31267 "TRUNC".to_string(),
31268 vec![expr_arg, Expression::string(&unit_str)],
31269 ))))
31270 }
31271 DialectType::MySQL => {
31272 // MySQL: complex expansion based on unit
31273 Self::date_trunc_to_mysql(&unit_str, &expr_arg)
31274 }
31275 _ => Ok(Expression::Function(f)),
31276 }
31277 } else {
31278 Ok(Expression::Function(f))
31279 }
31280 } else {
31281 Ok(e)
31282 }
31283 }
31284
31285 Action::TimestampTruncConvert => {
31286 // TIMESTAMP_TRUNC(x, UNIT[, tz]) from Generic -> target-specific
31287 if let Expression::Function(f) = e {
31288 if f.args.len() >= 2 {
31289 let expr_arg = f.args[0].clone();
31290 let unit_arg = f.args[1].clone();
31291 let tz_arg = if f.args.len() >= 3 {
31292 Some(f.args[2].clone())
31293 } else {
31294 None
31295 };
31296 // Extract unit string
31297 let unit_str = match &unit_arg {
31298 Expression::Literal(lit)
31299 if matches!(lit.as_ref(), Literal::String(_)) =>
31300 {
31301 let Literal::String(s) = lit.as_ref() else {
31302 unreachable!()
31303 };
31304 s.to_ascii_uppercase()
31305 }
31306 Expression::Column(c) => c.name.name.to_ascii_uppercase(),
31307 _ => {
31308 return Ok(Expression::Function(f));
31309 }
31310 };
31311 match target {
31312 DialectType::Spark | DialectType::Databricks => {
31313 // Spark: DATE_TRUNC('UNIT', x)
31314 Ok(Expression::Function(Box::new(Function::new(
31315 "DATE_TRUNC".to_string(),
31316 vec![Expression::string(&unit_str), expr_arg],
31317 ))))
31318 }
31319 DialectType::Doris | DialectType::StarRocks => {
31320 // Doris: DATE_TRUNC(x, 'UNIT')
31321 Ok(Expression::Function(Box::new(Function::new(
31322 "DATE_TRUNC".to_string(),
31323 vec![expr_arg, Expression::string(&unit_str)],
31324 ))))
31325 }
31326 DialectType::BigQuery => {
31327 // BigQuery: TIMESTAMP_TRUNC(x, UNIT) - keep but with unquoted unit
31328 let unit_ident =
31329 Expression::Column(Box::new(crate::expressions::Column {
31330 name: crate::expressions::Identifier::new(unit_str),
31331 table: None,
31332 join_mark: false,
31333 trailing_comments: Vec::new(),
31334 span: None,
31335 inferred_type: None,
31336 }));
31337 let mut args = vec![expr_arg, unit_ident];
31338 if let Some(tz) = tz_arg {
31339 args.push(tz);
31340 }
31341 Ok(Expression::Function(Box::new(Function::new(
31342 "TIMESTAMP_TRUNC".to_string(),
31343 args,
31344 ))))
31345 }
31346 DialectType::DuckDB => {
31347 // DuckDB with timezone: DATE_TRUNC('UNIT', x AT TIME ZONE 'tz') AT TIME ZONE 'tz'
31348 if let Some(tz) = tz_arg {
31349 let tz_str = match &tz {
31350 Expression::Literal(lit)
31351 if matches!(lit.as_ref(), Literal::String(_)) =>
31352 {
31353 let Literal::String(s) = lit.as_ref() else {
31354 unreachable!()
31355 };
31356 s.clone()
31357 }
31358 _ => "UTC".to_string(),
31359 };
31360 // x AT TIME ZONE 'tz'
31361 let at_tz = Expression::AtTimeZone(Box::new(
31362 crate::expressions::AtTimeZone {
31363 this: expr_arg,
31364 zone: Expression::string(&tz_str),
31365 },
31366 ));
31367 // DATE_TRUNC('UNIT', x AT TIME ZONE 'tz')
31368 let trunc = Expression::Function(Box::new(Function::new(
31369 "DATE_TRUNC".to_string(),
31370 vec![Expression::string(&unit_str), at_tz],
31371 )));
31372 // DATE_TRUNC(...) AT TIME ZONE 'tz'
31373 Ok(Expression::AtTimeZone(Box::new(
31374 crate::expressions::AtTimeZone {
31375 this: trunc,
31376 zone: Expression::string(&tz_str),
31377 },
31378 )))
31379 } else {
31380 Ok(Expression::Function(Box::new(Function::new(
31381 "DATE_TRUNC".to_string(),
31382 vec![Expression::string(&unit_str), expr_arg],
31383 ))))
31384 }
31385 }
31386 DialectType::Presto
31387 | DialectType::Trino
31388 | DialectType::Athena
31389 | DialectType::Snowflake => {
31390 // Presto/Snowflake: DATE_TRUNC('UNIT', x) - drop timezone
31391 Ok(Expression::Function(Box::new(Function::new(
31392 "DATE_TRUNC".to_string(),
31393 vec![Expression::string(&unit_str), expr_arg],
31394 ))))
31395 }
31396 _ => {
31397 // For most dialects: DATE_TRUNC('UNIT', x) + tz handling
31398 let mut args = vec![Expression::string(&unit_str), expr_arg];
31399 if let Some(tz) = tz_arg {
31400 args.push(tz);
31401 }
31402 Ok(Expression::Function(Box::new(Function::new(
31403 "DATE_TRUNC".to_string(),
31404 args,
31405 ))))
31406 }
31407 }
31408 } else {
31409 Ok(Expression::Function(f))
31410 }
31411 } else {
31412 Ok(e)
31413 }
31414 }
31415
31416 Action::StrToDateConvert => {
31417 // STR_TO_DATE(x, fmt) from Generic -> dialect-specific date parsing
31418 if let Expression::Function(f) = e {
31419 if f.args.len() == 2 {
31420 let mut args = f.args;
31421 let this = args.remove(0);
31422 let fmt_expr = args.remove(0);
31423 let fmt_str = match &fmt_expr {
31424 Expression::Literal(lit)
31425 if matches!(lit.as_ref(), Literal::String(_)) =>
31426 {
31427 let Literal::String(s) = lit.as_ref() else {
31428 unreachable!()
31429 };
31430 Some(s.clone())
31431 }
31432 _ => None,
31433 };
31434 let default_date = "%Y-%m-%d";
31435 let default_time = "%Y-%m-%d %H:%M:%S";
31436 let is_default = fmt_str
31437 .as_ref()
31438 .map_or(false, |f| f == default_date || f == default_time);
31439
31440 if is_default {
31441 // Default format: handle per-dialect
31442 match target {
31443 DialectType::MySQL
31444 | DialectType::Doris
31445 | DialectType::StarRocks => {
31446 // Keep STR_TO_DATE(x, fmt) as-is
31447 Ok(Expression::Function(Box::new(Function::new(
31448 "STR_TO_DATE".to_string(),
31449 vec![this, fmt_expr],
31450 ))))
31451 }
31452 DialectType::Hive => {
31453 // Hive: CAST(x AS DATE)
31454 Ok(Expression::Cast(Box::new(Cast {
31455 this,
31456 to: DataType::Date,
31457 double_colon_syntax: false,
31458 trailing_comments: Vec::new(),
31459 format: None,
31460 default: None,
31461 inferred_type: None,
31462 })))
31463 }
31464 DialectType::Presto
31465 | DialectType::Trino
31466 | DialectType::Athena => {
31467 // Presto: CAST(DATE_PARSE(x, '%Y-%m-%d') AS DATE)
31468 let date_parse =
31469 Expression::Function(Box::new(Function::new(
31470 "DATE_PARSE".to_string(),
31471 vec![this, fmt_expr],
31472 )));
31473 Ok(Expression::Cast(Box::new(Cast {
31474 this: date_parse,
31475 to: DataType::Date,
31476 double_colon_syntax: false,
31477 trailing_comments: Vec::new(),
31478 format: None,
31479 default: None,
31480 inferred_type: None,
31481 })))
31482 }
31483 _ => {
31484 // Others: TsOrDsToDate (delegates to generator)
31485 Ok(Expression::TsOrDsToDate(Box::new(
31486 crate::expressions::TsOrDsToDate {
31487 this: Box::new(this),
31488 format: None,
31489 safe: None,
31490 },
31491 )))
31492 }
31493 }
31494 } else if let Some(fmt) = fmt_str {
31495 match target {
31496 DialectType::Doris
31497 | DialectType::StarRocks
31498 | DialectType::MySQL => {
31499 // Keep STR_TO_DATE but with normalized format (%H:%M:%S -> %T, %-d -> %e)
31500 let mut normalized = fmt.clone();
31501 normalized = normalized.replace("%-d", "%e");
31502 normalized = normalized.replace("%-m", "%c");
31503 normalized = normalized.replace("%H:%M:%S", "%T");
31504 Ok(Expression::Function(Box::new(Function::new(
31505 "STR_TO_DATE".to_string(),
31506 vec![this, Expression::string(&normalized)],
31507 ))))
31508 }
31509 DialectType::Hive => {
31510 // Hive: CAST(FROM_UNIXTIME(UNIX_TIMESTAMP(x, java_fmt)) AS DATE)
31511 let java_fmt = crate::generator::Generator::strftime_to_java_format_static(&fmt);
31512 let unix_ts =
31513 Expression::Function(Box::new(Function::new(
31514 "UNIX_TIMESTAMP".to_string(),
31515 vec![this, Expression::string(&java_fmt)],
31516 )));
31517 let from_unix =
31518 Expression::Function(Box::new(Function::new(
31519 "FROM_UNIXTIME".to_string(),
31520 vec![unix_ts],
31521 )));
31522 Ok(Expression::Cast(Box::new(Cast {
31523 this: from_unix,
31524 to: DataType::Date,
31525 double_colon_syntax: false,
31526 trailing_comments: Vec::new(),
31527 format: None,
31528 default: None,
31529 inferred_type: None,
31530 })))
31531 }
31532 DialectType::Spark | DialectType::Databricks => {
31533 // Spark: TO_DATE(x, java_fmt)
31534 let java_fmt = crate::generator::Generator::strftime_to_java_format_static(&fmt);
31535 Ok(Expression::Function(Box::new(Function::new(
31536 "TO_DATE".to_string(),
31537 vec![this, Expression::string(&java_fmt)],
31538 ))))
31539 }
31540 DialectType::Drill => {
31541 // Drill: TO_DATE(x, java_fmt) with T quoted as 'T' in Java format
31542 // The generator's string literal escaping will double the quotes: 'T' -> ''T''
31543 let java_fmt = crate::generator::Generator::strftime_to_java_format_static(&fmt);
31544 let java_fmt = java_fmt.replace('T', "'T'");
31545 Ok(Expression::Function(Box::new(Function::new(
31546 "TO_DATE".to_string(),
31547 vec![this, Expression::string(&java_fmt)],
31548 ))))
31549 }
31550 _ => {
31551 // For other dialects: use TsOrDsToDate which delegates to generator
31552 Ok(Expression::TsOrDsToDate(Box::new(
31553 crate::expressions::TsOrDsToDate {
31554 this: Box::new(this),
31555 format: Some(fmt),
31556 safe: None,
31557 },
31558 )))
31559 }
31560 }
31561 } else {
31562 // Non-string format - keep as-is
31563 let mut new_args = Vec::new();
31564 new_args.push(this);
31565 new_args.push(fmt_expr);
31566 Ok(Expression::Function(Box::new(Function::new(
31567 "STR_TO_DATE".to_string(),
31568 new_args,
31569 ))))
31570 }
31571 } else {
31572 Ok(Expression::Function(f))
31573 }
31574 } else {
31575 Ok(e)
31576 }
31577 }
31578
31579 Action::TsOrDsAddConvert => {
31580 // TS_OR_DS_ADD(x, n, 'UNIT') from Generic -> dialect-specific DATE_ADD
31581 if let Expression::Function(f) = e {
31582 if f.args.len() == 3 {
31583 let mut args = f.args;
31584 let x = args.remove(0);
31585 let n = args.remove(0);
31586 let unit_expr = args.remove(0);
31587 let unit_str = match &unit_expr {
31588 Expression::Literal(lit)
31589 if matches!(lit.as_ref(), Literal::String(_)) =>
31590 {
31591 let Literal::String(s) = lit.as_ref() else {
31592 unreachable!()
31593 };
31594 s.to_ascii_uppercase()
31595 }
31596 _ => "DAY".to_string(),
31597 };
31598
31599 match target {
31600 DialectType::Hive
31601 | DialectType::Spark
31602 | DialectType::Databricks => {
31603 // DATE_ADD(x, n) - only supports DAY unit
31604 Ok(Expression::Function(Box::new(Function::new(
31605 "DATE_ADD".to_string(),
31606 vec![x, n],
31607 ))))
31608 }
31609 DialectType::MySQL => {
31610 // DATE_ADD(x, INTERVAL n UNIT)
31611 let iu = match unit_str.as_str() {
31612 "YEAR" => crate::expressions::IntervalUnit::Year,
31613 "QUARTER" => crate::expressions::IntervalUnit::Quarter,
31614 "MONTH" => crate::expressions::IntervalUnit::Month,
31615 "WEEK" => crate::expressions::IntervalUnit::Week,
31616 "HOUR" => crate::expressions::IntervalUnit::Hour,
31617 "MINUTE" => crate::expressions::IntervalUnit::Minute,
31618 "SECOND" => crate::expressions::IntervalUnit::Second,
31619 _ => crate::expressions::IntervalUnit::Day,
31620 };
31621 let interval = Expression::Interval(Box::new(
31622 crate::expressions::Interval {
31623 this: Some(n),
31624 unit: Some(
31625 crate::expressions::IntervalUnitSpec::Simple {
31626 unit: iu,
31627 use_plural: false,
31628 },
31629 ),
31630 },
31631 ));
31632 Ok(Expression::Function(Box::new(Function::new(
31633 "DATE_ADD".to_string(),
31634 vec![x, interval],
31635 ))))
31636 }
31637 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
31638 // DATE_ADD('UNIT', n, CAST(CAST(x AS TIMESTAMP) AS DATE))
31639 let cast_ts = Expression::Cast(Box::new(Cast {
31640 this: x,
31641 to: DataType::Timestamp {
31642 precision: None,
31643 timezone: false,
31644 },
31645 double_colon_syntax: false,
31646 trailing_comments: Vec::new(),
31647 format: None,
31648 default: None,
31649 inferred_type: None,
31650 }));
31651 let cast_date = Expression::Cast(Box::new(Cast {
31652 this: cast_ts,
31653 to: DataType::Date,
31654 double_colon_syntax: false,
31655 trailing_comments: Vec::new(),
31656 format: None,
31657 default: None,
31658 inferred_type: None,
31659 }));
31660 Ok(Expression::Function(Box::new(Function::new(
31661 "DATE_ADD".to_string(),
31662 vec![Expression::string(&unit_str), n, cast_date],
31663 ))))
31664 }
31665 DialectType::DuckDB => {
31666 // CAST(x AS DATE) + INTERVAL n UNIT
31667 let cast_date = Expression::Cast(Box::new(Cast {
31668 this: x,
31669 to: DataType::Date,
31670 double_colon_syntax: false,
31671 trailing_comments: Vec::new(),
31672 format: None,
31673 default: None,
31674 inferred_type: None,
31675 }));
31676 let iu = match unit_str.as_str() {
31677 "YEAR" => crate::expressions::IntervalUnit::Year,
31678 "QUARTER" => crate::expressions::IntervalUnit::Quarter,
31679 "MONTH" => crate::expressions::IntervalUnit::Month,
31680 "WEEK" => crate::expressions::IntervalUnit::Week,
31681 "HOUR" => crate::expressions::IntervalUnit::Hour,
31682 "MINUTE" => crate::expressions::IntervalUnit::Minute,
31683 "SECOND" => crate::expressions::IntervalUnit::Second,
31684 _ => crate::expressions::IntervalUnit::Day,
31685 };
31686 let interval = Expression::Interval(Box::new(
31687 crate::expressions::Interval {
31688 this: Some(n),
31689 unit: Some(
31690 crate::expressions::IntervalUnitSpec::Simple {
31691 unit: iu,
31692 use_plural: false,
31693 },
31694 ),
31695 },
31696 ));
31697 Ok(Expression::Add(Box::new(crate::expressions::BinaryOp {
31698 left: cast_date,
31699 right: interval,
31700 left_comments: Vec::new(),
31701 operator_comments: Vec::new(),
31702 trailing_comments: Vec::new(),
31703 inferred_type: None,
31704 })))
31705 }
31706 DialectType::Drill => {
31707 // DATE_ADD(CAST(x AS DATE), INTERVAL n UNIT)
31708 let cast_date = Expression::Cast(Box::new(Cast {
31709 this: x,
31710 to: DataType::Date,
31711 double_colon_syntax: false,
31712 trailing_comments: Vec::new(),
31713 format: None,
31714 default: None,
31715 inferred_type: None,
31716 }));
31717 let iu = match unit_str.as_str() {
31718 "YEAR" => crate::expressions::IntervalUnit::Year,
31719 "QUARTER" => crate::expressions::IntervalUnit::Quarter,
31720 "MONTH" => crate::expressions::IntervalUnit::Month,
31721 "WEEK" => crate::expressions::IntervalUnit::Week,
31722 "HOUR" => crate::expressions::IntervalUnit::Hour,
31723 "MINUTE" => crate::expressions::IntervalUnit::Minute,
31724 "SECOND" => crate::expressions::IntervalUnit::Second,
31725 _ => crate::expressions::IntervalUnit::Day,
31726 };
31727 let interval = Expression::Interval(Box::new(
31728 crate::expressions::Interval {
31729 this: Some(n),
31730 unit: Some(
31731 crate::expressions::IntervalUnitSpec::Simple {
31732 unit: iu,
31733 use_plural: false,
31734 },
31735 ),
31736 },
31737 ));
31738 Ok(Expression::Function(Box::new(Function::new(
31739 "DATE_ADD".to_string(),
31740 vec![cast_date, interval],
31741 ))))
31742 }
31743 _ => {
31744 // Default: keep as TS_OR_DS_ADD
31745 Ok(Expression::Function(Box::new(Function::new(
31746 "TS_OR_DS_ADD".to_string(),
31747 vec![x, n, unit_expr],
31748 ))))
31749 }
31750 }
31751 } else {
31752 Ok(Expression::Function(f))
31753 }
31754 } else {
31755 Ok(e)
31756 }
31757 }
31758
31759 Action::DateFromUnixDateConvert => {
31760 // DATE_FROM_UNIX_DATE(n) -> DATEADD(DAY, n, CAST('1970-01-01' AS DATE))
31761 if let Expression::Function(f) = e {
31762 // Keep as-is for dialects that support DATE_FROM_UNIX_DATE natively
31763 if matches!(
31764 target,
31765 DialectType::Spark | DialectType::Databricks | DialectType::BigQuery
31766 ) {
31767 return Ok(Expression::Function(Box::new(Function::new(
31768 "DATE_FROM_UNIX_DATE".to_string(),
31769 f.args,
31770 ))));
31771 }
31772 let n = f.args.into_iter().next().unwrap();
31773 let epoch_date = Expression::Cast(Box::new(Cast {
31774 this: Expression::string("1970-01-01"),
31775 to: DataType::Date,
31776 double_colon_syntax: false,
31777 trailing_comments: Vec::new(),
31778 format: None,
31779 default: None,
31780 inferred_type: None,
31781 }));
31782 match target {
31783 DialectType::DuckDB => {
31784 // CAST('1970-01-01' AS DATE) + INTERVAL n DAY
31785 let interval =
31786 Expression::Interval(Box::new(crate::expressions::Interval {
31787 this: Some(n),
31788 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
31789 unit: crate::expressions::IntervalUnit::Day,
31790 use_plural: false,
31791 }),
31792 }));
31793 Ok(Expression::Add(Box::new(
31794 crate::expressions::BinaryOp::new(epoch_date, interval),
31795 )))
31796 }
31797 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
31798 // DATE_ADD('DAY', n, CAST('1970-01-01' AS DATE))
31799 Ok(Expression::Function(Box::new(Function::new(
31800 "DATE_ADD".to_string(),
31801 vec![Expression::string("DAY"), n, epoch_date],
31802 ))))
31803 }
31804 DialectType::Snowflake | DialectType::Redshift | DialectType::TSQL => {
31805 // DATEADD(DAY, n, CAST('1970-01-01' AS DATE))
31806 Ok(Expression::Function(Box::new(Function::new(
31807 "DATEADD".to_string(),
31808 vec![
31809 Expression::Identifier(Identifier::new("DAY")),
31810 n,
31811 epoch_date,
31812 ],
31813 ))))
31814 }
31815 DialectType::BigQuery => {
31816 // DATE_ADD(CAST('1970-01-01' AS DATE), INTERVAL n DAY)
31817 let interval =
31818 Expression::Interval(Box::new(crate::expressions::Interval {
31819 this: Some(n),
31820 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
31821 unit: crate::expressions::IntervalUnit::Day,
31822 use_plural: false,
31823 }),
31824 }));
31825 Ok(Expression::Function(Box::new(Function::new(
31826 "DATE_ADD".to_string(),
31827 vec![epoch_date, interval],
31828 ))))
31829 }
31830 DialectType::MySQL
31831 | DialectType::Doris
31832 | DialectType::StarRocks
31833 | DialectType::Drill => {
31834 // DATE_ADD(CAST('1970-01-01' AS DATE), INTERVAL n DAY)
31835 let interval =
31836 Expression::Interval(Box::new(crate::expressions::Interval {
31837 this: Some(n),
31838 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
31839 unit: crate::expressions::IntervalUnit::Day,
31840 use_plural: false,
31841 }),
31842 }));
31843 Ok(Expression::Function(Box::new(Function::new(
31844 "DATE_ADD".to_string(),
31845 vec![epoch_date, interval],
31846 ))))
31847 }
31848 DialectType::Hive | DialectType::Spark | DialectType::Databricks => {
31849 // DATE_ADD(CAST('1970-01-01' AS DATE), n)
31850 Ok(Expression::Function(Box::new(Function::new(
31851 "DATE_ADD".to_string(),
31852 vec![epoch_date, n],
31853 ))))
31854 }
31855 DialectType::PostgreSQL
31856 | DialectType::Materialize
31857 | DialectType::RisingWave => {
31858 // CAST('1970-01-01' AS DATE) + INTERVAL 'n DAY'
31859 let n_str = match &n {
31860 Expression::Literal(lit)
31861 if matches!(lit.as_ref(), Literal::Number(_)) =>
31862 {
31863 let Literal::Number(s) = lit.as_ref() else {
31864 unreachable!()
31865 };
31866 s.clone()
31867 }
31868 _ => Self::expr_to_string_static(&n),
31869 };
31870 let interval =
31871 Expression::Interval(Box::new(crate::expressions::Interval {
31872 this: Some(Expression::string(&format!("{} DAY", n_str))),
31873 unit: None,
31874 }));
31875 Ok(Expression::Add(Box::new(
31876 crate::expressions::BinaryOp::new(epoch_date, interval),
31877 )))
31878 }
31879 _ => {
31880 // Default: keep as-is
31881 Ok(Expression::Function(Box::new(Function::new(
31882 "DATE_FROM_UNIX_DATE".to_string(),
31883 vec![n],
31884 ))))
31885 }
31886 }
31887 } else {
31888 Ok(e)
31889 }
31890 }
31891
31892 Action::ArrayRemoveConvert => {
31893 // ARRAY_REMOVE(arr, target) -> LIST_FILTER/arrayFilter
31894 if let Expression::ArrayRemove(bf) = e {
31895 let arr = bf.this;
31896 let target_val = bf.expression;
31897 match target {
31898 DialectType::DuckDB => {
31899 let u_id = crate::expressions::Identifier::new("_u");
31900 let lambda =
31901 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
31902 parameters: vec![u_id.clone()],
31903 body: Expression::Neq(Box::new(BinaryOp {
31904 left: Expression::Identifier(u_id),
31905 right: target_val,
31906 left_comments: Vec::new(),
31907 operator_comments: Vec::new(),
31908 trailing_comments: Vec::new(),
31909 inferred_type: None,
31910 })),
31911 colon: false,
31912 parameter_types: Vec::new(),
31913 }));
31914 Ok(Expression::Function(Box::new(Function::new(
31915 "LIST_FILTER".to_string(),
31916 vec![arr, lambda],
31917 ))))
31918 }
31919 DialectType::ClickHouse => {
31920 let u_id = crate::expressions::Identifier::new("_u");
31921 let lambda =
31922 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
31923 parameters: vec![u_id.clone()],
31924 body: Expression::Neq(Box::new(BinaryOp {
31925 left: Expression::Identifier(u_id),
31926 right: target_val,
31927 left_comments: Vec::new(),
31928 operator_comments: Vec::new(),
31929 trailing_comments: Vec::new(),
31930 inferred_type: None,
31931 })),
31932 colon: false,
31933 parameter_types: Vec::new(),
31934 }));
31935 Ok(Expression::Function(Box::new(Function::new(
31936 "arrayFilter".to_string(),
31937 vec![lambda, arr],
31938 ))))
31939 }
31940 DialectType::BigQuery => {
31941 // ARRAY(SELECT _u FROM UNNEST(the_array) AS _u WHERE _u <> target)
31942 let u_id = crate::expressions::Identifier::new("_u");
31943 let u_col =
31944 Expression::Column(Box::new(crate::expressions::Column {
31945 name: u_id.clone(),
31946 table: None,
31947 join_mark: false,
31948 trailing_comments: Vec::new(),
31949 span: None,
31950 inferred_type: None,
31951 }));
31952 let unnest_expr =
31953 Expression::Unnest(Box::new(crate::expressions::UnnestFunc {
31954 this: arr,
31955 expressions: Vec::new(),
31956 with_ordinality: false,
31957 alias: None,
31958 offset_alias: None,
31959 }));
31960 let aliased_unnest =
31961 Expression::Alias(Box::new(crate::expressions::Alias {
31962 this: unnest_expr,
31963 alias: u_id.clone(),
31964 column_aliases: Vec::new(),
31965 alias_explicit_as: false,
31966 alias_keyword: None,
31967 pre_alias_comments: Vec::new(),
31968 trailing_comments: Vec::new(),
31969 inferred_type: None,
31970 }));
31971 let where_cond = Expression::Neq(Box::new(BinaryOp {
31972 left: u_col.clone(),
31973 right: target_val,
31974 left_comments: Vec::new(),
31975 operator_comments: Vec::new(),
31976 trailing_comments: Vec::new(),
31977 inferred_type: None,
31978 }));
31979 let subquery = Expression::Select(Box::new(
31980 crate::expressions::Select::new()
31981 .column(u_col)
31982 .from(aliased_unnest)
31983 .where_(where_cond),
31984 ));
31985 Ok(Expression::ArrayFunc(Box::new(
31986 crate::expressions::ArrayConstructor {
31987 expressions: vec![subquery],
31988 bracket_notation: false,
31989 use_list_keyword: false,
31990 },
31991 )))
31992 }
31993 _ => Ok(Expression::ArrayRemove(Box::new(
31994 crate::expressions::BinaryFunc {
31995 original_name: None,
31996 this: arr,
31997 expression: target_val,
31998 inferred_type: None,
31999 },
32000 ))),
32001 }
32002 } else {
32003 Ok(e)
32004 }
32005 }
32006
32007 Action::ArrayReverseConvert => {
32008 // ARRAY_REVERSE(x) -> arrayReverse(x) for ClickHouse
32009 if let Expression::ArrayReverse(af) = e {
32010 Ok(Expression::Function(Box::new(Function::new(
32011 "arrayReverse".to_string(),
32012 vec![af.this],
32013 ))))
32014 } else {
32015 Ok(e)
32016 }
32017 }
32018
32019 Action::JsonKeysConvert => {
32020 // JSON_KEYS(x) -> JSON_OBJECT_KEYS/OBJECT_KEYS
32021 if let Expression::JsonKeys(uf) = e {
32022 match target {
32023 DialectType::Spark | DialectType::Databricks => {
32024 Ok(Expression::Function(Box::new(Function::new(
32025 "JSON_OBJECT_KEYS".to_string(),
32026 vec![uf.this],
32027 ))))
32028 }
32029 DialectType::Snowflake => Ok(Expression::Function(Box::new(
32030 Function::new("OBJECT_KEYS".to_string(), vec![uf.this]),
32031 ))),
32032 _ => Ok(Expression::JsonKeys(uf)),
32033 }
32034 } else {
32035 Ok(e)
32036 }
32037 }
32038
32039 Action::ParseJsonStrip => {
32040 // PARSE_JSON(x) -> x (strip wrapper for SQLite/Doris)
32041 if let Expression::ParseJson(uf) = e {
32042 Ok(uf.this)
32043 } else {
32044 Ok(e)
32045 }
32046 }
32047
32048 Action::ArraySizeDrill => {
32049 // ARRAY_SIZE(x) -> REPEATED_COUNT(x) for Drill
32050 if let Expression::ArraySize(uf) = e {
32051 Ok(Expression::Function(Box::new(Function::new(
32052 "REPEATED_COUNT".to_string(),
32053 vec![uf.this],
32054 ))))
32055 } else {
32056 Ok(e)
32057 }
32058 }
32059
32060 Action::WeekOfYearToWeekIso => {
32061 // WEEKOFYEAR(x) -> WEEKISO(x) for Snowflake (cross-dialect normalization)
32062 if let Expression::WeekOfYear(uf) = e {
32063 Ok(Expression::Function(Box::new(Function::new(
32064 "WEEKISO".to_string(),
32065 vec![uf.this],
32066 ))))
32067 } else {
32068 Ok(e)
32069 }
32070 }
32071 }
32072 })
32073 }
32074
32075 /// Convert DATE_TRUNC('unit', x) to MySQL-specific expansion
32076 fn date_trunc_to_mysql(unit: &str, expr: &Expression) -> Result<Expression> {
32077 use crate::expressions::Function;
32078 match unit {
32079 "DAY" => {
32080 // DATE(x)
32081 Ok(Expression::Function(Box::new(Function::new(
32082 "DATE".to_string(),
32083 vec![expr.clone()],
32084 ))))
32085 }
32086 "WEEK" => {
32087 // STR_TO_DATE(CONCAT(YEAR(x), ' ', WEEK(x, 1), ' 1'), '%Y %u %w')
32088 let year_x = Expression::Function(Box::new(Function::new(
32089 "YEAR".to_string(),
32090 vec![expr.clone()],
32091 )));
32092 let week_x = Expression::Function(Box::new(Function::new(
32093 "WEEK".to_string(),
32094 vec![expr.clone(), Expression::number(1)],
32095 )));
32096 let concat_args = vec![
32097 year_x,
32098 Expression::string(" "),
32099 week_x,
32100 Expression::string(" 1"),
32101 ];
32102 let concat = Expression::Function(Box::new(Function::new(
32103 "CONCAT".to_string(),
32104 concat_args,
32105 )));
32106 Ok(Expression::Function(Box::new(Function::new(
32107 "STR_TO_DATE".to_string(),
32108 vec![concat, Expression::string("%Y %u %w")],
32109 ))))
32110 }
32111 "MONTH" => {
32112 // STR_TO_DATE(CONCAT(YEAR(x), ' ', MONTH(x), ' 1'), '%Y %c %e')
32113 let year_x = Expression::Function(Box::new(Function::new(
32114 "YEAR".to_string(),
32115 vec![expr.clone()],
32116 )));
32117 let month_x = Expression::Function(Box::new(Function::new(
32118 "MONTH".to_string(),
32119 vec![expr.clone()],
32120 )));
32121 let concat_args = vec![
32122 year_x,
32123 Expression::string(" "),
32124 month_x,
32125 Expression::string(" 1"),
32126 ];
32127 let concat = Expression::Function(Box::new(Function::new(
32128 "CONCAT".to_string(),
32129 concat_args,
32130 )));
32131 Ok(Expression::Function(Box::new(Function::new(
32132 "STR_TO_DATE".to_string(),
32133 vec![concat, Expression::string("%Y %c %e")],
32134 ))))
32135 }
32136 "QUARTER" => {
32137 // STR_TO_DATE(CONCAT(YEAR(x), ' ', QUARTER(x) * 3 - 2, ' 1'), '%Y %c %e')
32138 let year_x = Expression::Function(Box::new(Function::new(
32139 "YEAR".to_string(),
32140 vec![expr.clone()],
32141 )));
32142 let quarter_x = Expression::Function(Box::new(Function::new(
32143 "QUARTER".to_string(),
32144 vec![expr.clone()],
32145 )));
32146 // QUARTER(x) * 3 - 2
32147 let mul = Expression::Mul(Box::new(crate::expressions::BinaryOp {
32148 left: quarter_x,
32149 right: Expression::number(3),
32150 left_comments: Vec::new(),
32151 operator_comments: Vec::new(),
32152 trailing_comments: Vec::new(),
32153 inferred_type: None,
32154 }));
32155 let sub = Expression::Sub(Box::new(crate::expressions::BinaryOp {
32156 left: mul,
32157 right: Expression::number(2),
32158 left_comments: Vec::new(),
32159 operator_comments: Vec::new(),
32160 trailing_comments: Vec::new(),
32161 inferred_type: None,
32162 }));
32163 let concat_args = vec![
32164 year_x,
32165 Expression::string(" "),
32166 sub,
32167 Expression::string(" 1"),
32168 ];
32169 let concat = Expression::Function(Box::new(Function::new(
32170 "CONCAT".to_string(),
32171 concat_args,
32172 )));
32173 Ok(Expression::Function(Box::new(Function::new(
32174 "STR_TO_DATE".to_string(),
32175 vec![concat, Expression::string("%Y %c %e")],
32176 ))))
32177 }
32178 "YEAR" => {
32179 // STR_TO_DATE(CONCAT(YEAR(x), ' 1 1'), '%Y %c %e')
32180 let year_x = Expression::Function(Box::new(Function::new(
32181 "YEAR".to_string(),
32182 vec![expr.clone()],
32183 )));
32184 let concat_args = vec![year_x, Expression::string(" 1 1")];
32185 let concat = Expression::Function(Box::new(Function::new(
32186 "CONCAT".to_string(),
32187 concat_args,
32188 )));
32189 Ok(Expression::Function(Box::new(Function::new(
32190 "STR_TO_DATE".to_string(),
32191 vec![concat, Expression::string("%Y %c %e")],
32192 ))))
32193 }
32194 _ => {
32195 // Unsupported unit -> keep as DATE_TRUNC
32196 Ok(Expression::Function(Box::new(Function::new(
32197 "DATE_TRUNC".to_string(),
32198 vec![Expression::string(unit), expr.clone()],
32199 ))))
32200 }
32201 }
32202 }
32203
32204 /// Check if a DataType is or contains VARCHAR/CHAR (for Spark VARCHAR->STRING normalization)
32205 fn has_varchar_char_type(dt: &crate::expressions::DataType) -> bool {
32206 use crate::expressions::DataType;
32207 match dt {
32208 DataType::VarChar { .. } | DataType::Char { .. } => true,
32209 DataType::Struct { fields, .. } => fields
32210 .iter()
32211 .any(|f| Self::has_varchar_char_type(&f.data_type)),
32212 _ => false,
32213 }
32214 }
32215
32216 /// Recursively normalize VARCHAR/CHAR to STRING in a DataType (for Spark)
32217 fn normalize_varchar_to_string(
32218 dt: crate::expressions::DataType,
32219 ) -> crate::expressions::DataType {
32220 use crate::expressions::DataType;
32221 match dt {
32222 DataType::VarChar { .. } | DataType::Char { .. } => DataType::Custom {
32223 name: "STRING".to_string(),
32224 },
32225 DataType::Struct { fields, nested } => {
32226 let fields = fields
32227 .into_iter()
32228 .map(|mut f| {
32229 f.data_type = Self::normalize_varchar_to_string(f.data_type);
32230 f
32231 })
32232 .collect();
32233 DataType::Struct { fields, nested }
32234 }
32235 other => other,
32236 }
32237 }
32238
32239 /// Normalize an interval string like '1day' or ' 2 days ' to proper INTERVAL expression
32240 fn normalize_interval_string(expr: Expression, target: DialectType) -> Expression {
32241 if let Expression::Literal(ref lit) = expr {
32242 if let crate::expressions::Literal::String(ref s) = lit.as_ref() {
32243 // Try to parse patterns like '1day', '1 day', '2 days', ' 2 days '
32244 let trimmed = s.trim();
32245
32246 // Find where digits end and unit text begins
32247 let digit_end = trimmed
32248 .find(|c: char| !c.is_ascii_digit())
32249 .unwrap_or(trimmed.len());
32250 if digit_end == 0 || digit_end == trimmed.len() {
32251 return expr;
32252 }
32253 let num = &trimmed[..digit_end];
32254 let unit_text = trimmed[digit_end..].trim().to_ascii_uppercase();
32255 if unit_text.is_empty() {
32256 return expr;
32257 }
32258
32259 let known_units = [
32260 "DAY", "DAYS", "HOUR", "HOURS", "MINUTE", "MINUTES", "SECOND", "SECONDS",
32261 "WEEK", "WEEKS", "MONTH", "MONTHS", "YEAR", "YEARS",
32262 ];
32263 if !known_units.contains(&unit_text.as_str()) {
32264 return expr;
32265 }
32266
32267 let unit_str = unit_text.clone();
32268 // Singularize
32269 let unit_singular = if unit_str.ends_with('S') && unit_str.len() > 3 {
32270 &unit_str[..unit_str.len() - 1]
32271 } else {
32272 &unit_str
32273 };
32274 let unit = unit_singular;
32275
32276 match target {
32277 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
32278 // INTERVAL '2' DAY
32279 let iu = match unit {
32280 "DAY" => crate::expressions::IntervalUnit::Day,
32281 "HOUR" => crate::expressions::IntervalUnit::Hour,
32282 "MINUTE" => crate::expressions::IntervalUnit::Minute,
32283 "SECOND" => crate::expressions::IntervalUnit::Second,
32284 "WEEK" => crate::expressions::IntervalUnit::Week,
32285 "MONTH" => crate::expressions::IntervalUnit::Month,
32286 "YEAR" => crate::expressions::IntervalUnit::Year,
32287 _ => return expr,
32288 };
32289 return Expression::Interval(Box::new(crate::expressions::Interval {
32290 this: Some(Expression::string(num)),
32291 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
32292 unit: iu,
32293 use_plural: false,
32294 }),
32295 }));
32296 }
32297 DialectType::PostgreSQL | DialectType::Redshift | DialectType::DuckDB => {
32298 // INTERVAL '2 DAYS'
32299 let plural = if num != "1" && !unit_str.ends_with('S') {
32300 format!("{} {}S", num, unit)
32301 } else if unit_str.ends_with('S') {
32302 format!("{} {}", num, unit_str)
32303 } else {
32304 format!("{} {}", num, unit)
32305 };
32306 return Expression::Interval(Box::new(crate::expressions::Interval {
32307 this: Some(Expression::string(&plural)),
32308 unit: None,
32309 }));
32310 }
32311 _ => {
32312 // Spark/Databricks/Hive: INTERVAL '1' DAY
32313 let iu = match unit {
32314 "DAY" => crate::expressions::IntervalUnit::Day,
32315 "HOUR" => crate::expressions::IntervalUnit::Hour,
32316 "MINUTE" => crate::expressions::IntervalUnit::Minute,
32317 "SECOND" => crate::expressions::IntervalUnit::Second,
32318 "WEEK" => crate::expressions::IntervalUnit::Week,
32319 "MONTH" => crate::expressions::IntervalUnit::Month,
32320 "YEAR" => crate::expressions::IntervalUnit::Year,
32321 _ => return expr,
32322 };
32323 return Expression::Interval(Box::new(crate::expressions::Interval {
32324 this: Some(Expression::string(num)),
32325 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
32326 unit: iu,
32327 use_plural: false,
32328 }),
32329 }));
32330 }
32331 }
32332 }
32333 }
32334 // If it's already an INTERVAL expression, pass through
32335 expr
32336 }
32337
32338 /// Rewrite SELECT expressions containing UNNEST into expanded form with CROSS JOINs.
32339 /// DuckDB: SELECT UNNEST(arr1), UNNEST(arr2) ->
32340 /// BigQuery: SELECT IF(pos = pos_2, col, NULL) AS col, ... FROM UNNEST(GENERATE_ARRAY(0, ...)) AS pos CROSS JOIN ...
32341 /// Presto: SELECT IF(_u.pos = _u_2.pos_2, _u_2.col) AS col, ... FROM UNNEST(SEQUENCE(1, ...)) AS _u(pos) CROSS JOIN ...
32342 fn rewrite_unnest_expansion(
32343 select: &crate::expressions::Select,
32344 target: DialectType,
32345 ) -> Option<crate::expressions::Select> {
32346 use crate::expressions::{
32347 Alias, BinaryOp, Column, From, Function, Identifier, Join, JoinKind, Literal,
32348 UnnestFunc,
32349 };
32350
32351 let index_offset: i64 = match target {
32352 DialectType::Presto | DialectType::Trino => 1,
32353 _ => 0, // BigQuery, Snowflake
32354 };
32355
32356 let if_func_name = match target {
32357 DialectType::Snowflake => "IFF",
32358 _ => "IF",
32359 };
32360
32361 let array_length_func = match target {
32362 DialectType::BigQuery => "ARRAY_LENGTH",
32363 DialectType::Presto | DialectType::Trino => "CARDINALITY",
32364 DialectType::Snowflake => "ARRAY_SIZE",
32365 _ => "ARRAY_LENGTH",
32366 };
32367
32368 let use_table_aliases = matches!(
32369 target,
32370 DialectType::Presto | DialectType::Trino | DialectType::Snowflake
32371 );
32372 let null_third_arg = matches!(target, DialectType::BigQuery | DialectType::Snowflake);
32373
32374 fn make_col(name: &str, table: Option<&str>) -> Expression {
32375 if let Some(tbl) = table {
32376 Expression::boxed_column(Column {
32377 name: Identifier::new(name.to_string()),
32378 table: Some(Identifier::new(tbl.to_string())),
32379 join_mark: false,
32380 trailing_comments: Vec::new(),
32381 span: None,
32382 inferred_type: None,
32383 })
32384 } else {
32385 Expression::Identifier(Identifier::new(name.to_string()))
32386 }
32387 }
32388
32389 fn make_join(this: Expression) -> Join {
32390 Join {
32391 this,
32392 on: None,
32393 using: Vec::new(),
32394 kind: JoinKind::Cross,
32395 use_inner_keyword: false,
32396 use_outer_keyword: false,
32397 deferred_condition: false,
32398 join_hint: None,
32399 match_condition: None,
32400 pivots: Vec::new(),
32401 comments: Vec::new(),
32402 nesting_group: 0,
32403 directed: false,
32404 }
32405 }
32406
32407 // Collect UNNEST info from SELECT expressions
32408 struct UnnestInfo {
32409 arr_expr: Expression,
32410 col_alias: String,
32411 pos_alias: String,
32412 source_alias: String,
32413 original_expr: Expression,
32414 has_outer_alias: Option<String>,
32415 }
32416
32417 let mut unnest_infos: Vec<UnnestInfo> = Vec::new();
32418 let mut col_counter = 0usize;
32419 let mut pos_counter = 1usize;
32420 let mut source_counter = 1usize;
32421
32422 fn extract_unnest_arg(expr: &Expression) -> Option<Expression> {
32423 match expr {
32424 Expression::Unnest(u) => Some(u.this.clone()),
32425 Expression::Function(f)
32426 if f.name.eq_ignore_ascii_case("UNNEST") && !f.args.is_empty() =>
32427 {
32428 Some(f.args[0].clone())
32429 }
32430 Expression::Alias(a) => extract_unnest_arg(&a.this),
32431 Expression::Add(op)
32432 | Expression::Sub(op)
32433 | Expression::Mul(op)
32434 | Expression::Div(op) => {
32435 extract_unnest_arg(&op.left).or_else(|| extract_unnest_arg(&op.right))
32436 }
32437 _ => None,
32438 }
32439 }
32440
32441 fn get_alias_name(expr: &Expression) -> Option<String> {
32442 if let Expression::Alias(a) = expr {
32443 Some(a.alias.name.clone())
32444 } else {
32445 None
32446 }
32447 }
32448
32449 for sel_expr in &select.expressions {
32450 if let Some(arr) = extract_unnest_arg(sel_expr) {
32451 col_counter += 1;
32452 pos_counter += 1;
32453 source_counter += 1;
32454
32455 let col_alias = if col_counter == 1 {
32456 "col".to_string()
32457 } else {
32458 format!("col_{}", col_counter)
32459 };
32460 let pos_alias = format!("pos_{}", pos_counter);
32461 let source_alias = format!("_u_{}", source_counter);
32462 let has_outer_alias = get_alias_name(sel_expr);
32463
32464 unnest_infos.push(UnnestInfo {
32465 arr_expr: arr,
32466 col_alias,
32467 pos_alias,
32468 source_alias,
32469 original_expr: sel_expr.clone(),
32470 has_outer_alias,
32471 });
32472 }
32473 }
32474
32475 if unnest_infos.is_empty() {
32476 return None;
32477 }
32478
32479 let series_alias = "pos".to_string();
32480 let series_source_alias = "_u".to_string();
32481 let tbl_ref = if use_table_aliases {
32482 Some(series_source_alias.as_str())
32483 } else {
32484 None
32485 };
32486
32487 // Build new SELECT expressions
32488 let mut new_select_exprs = Vec::new();
32489 for info in &unnest_infos {
32490 let actual_col_name = info.has_outer_alias.as_ref().unwrap_or(&info.col_alias);
32491 let src_ref = if use_table_aliases {
32492 Some(info.source_alias.as_str())
32493 } else {
32494 None
32495 };
32496
32497 let pos_col = make_col(&series_alias, tbl_ref);
32498 let unnest_pos_col = make_col(&info.pos_alias, src_ref);
32499 let col_ref = make_col(actual_col_name, src_ref);
32500
32501 let eq_cond = Expression::Eq(Box::new(BinaryOp::new(
32502 pos_col.clone(),
32503 unnest_pos_col.clone(),
32504 )));
32505 let mut if_args = vec![eq_cond, col_ref];
32506 if null_third_arg {
32507 if_args.push(Expression::Null(crate::expressions::Null));
32508 }
32509
32510 let if_expr =
32511 Expression::Function(Box::new(Function::new(if_func_name.to_string(), if_args)));
32512 let final_expr = Self::replace_unnest_with_if(&info.original_expr, &if_expr);
32513
32514 new_select_exprs.push(Expression::Alias(Box::new(Alias::new(
32515 final_expr,
32516 Identifier::new(actual_col_name.clone()),
32517 ))));
32518 }
32519
32520 // Build array size expressions for GREATEST
32521 let size_exprs: Vec<Expression> = unnest_infos
32522 .iter()
32523 .map(|info| {
32524 Expression::Function(Box::new(Function::new(
32525 array_length_func.to_string(),
32526 vec![info.arr_expr.clone()],
32527 )))
32528 })
32529 .collect();
32530
32531 let greatest =
32532 Expression::Function(Box::new(Function::new("GREATEST".to_string(), size_exprs)));
32533
32534 let series_end = if index_offset == 0 {
32535 Expression::Sub(Box::new(BinaryOp::new(
32536 greatest,
32537 Expression::Literal(Box::new(Literal::Number("1".to_string()))),
32538 )))
32539 } else {
32540 greatest
32541 };
32542
32543 // Build the position array source
32544 let series_unnest_expr = match target {
32545 DialectType::BigQuery => {
32546 let gen_array = Expression::Function(Box::new(Function::new(
32547 "GENERATE_ARRAY".to_string(),
32548 vec![
32549 Expression::Literal(Box::new(Literal::Number("0".to_string()))),
32550 series_end,
32551 ],
32552 )));
32553 Expression::Unnest(Box::new(UnnestFunc {
32554 this: gen_array,
32555 expressions: Vec::new(),
32556 with_ordinality: false,
32557 alias: None,
32558 offset_alias: None,
32559 }))
32560 }
32561 DialectType::Presto | DialectType::Trino => {
32562 let sequence = Expression::Function(Box::new(Function::new(
32563 "SEQUENCE".to_string(),
32564 vec![
32565 Expression::Literal(Box::new(Literal::Number("1".to_string()))),
32566 series_end,
32567 ],
32568 )));
32569 Expression::Unnest(Box::new(UnnestFunc {
32570 this: sequence,
32571 expressions: Vec::new(),
32572 with_ordinality: false,
32573 alias: None,
32574 offset_alias: None,
32575 }))
32576 }
32577 DialectType::Snowflake => {
32578 let range_end = Expression::Add(Box::new(BinaryOp::new(
32579 Expression::Paren(Box::new(crate::expressions::Paren {
32580 this: series_end,
32581 trailing_comments: Vec::new(),
32582 })),
32583 Expression::Literal(Box::new(Literal::Number("1".to_string()))),
32584 )));
32585 let gen_range = Expression::Function(Box::new(Function::new(
32586 "ARRAY_GENERATE_RANGE".to_string(),
32587 vec![
32588 Expression::Literal(Box::new(Literal::Number("0".to_string()))),
32589 range_end,
32590 ],
32591 )));
32592 let flatten_arg =
32593 Expression::NamedArgument(Box::new(crate::expressions::NamedArgument {
32594 name: Identifier::new("INPUT".to_string()),
32595 value: gen_range,
32596 separator: crate::expressions::NamedArgSeparator::DArrow,
32597 }));
32598 let flatten = Expression::Function(Box::new(Function::new(
32599 "FLATTEN".to_string(),
32600 vec![flatten_arg],
32601 )));
32602 Expression::Function(Box::new(Function::new("TABLE".to_string(), vec![flatten])))
32603 }
32604 _ => return None,
32605 };
32606
32607 // Build series alias expression
32608 let series_alias_expr = if use_table_aliases {
32609 let col_aliases = if matches!(target, DialectType::Snowflake) {
32610 vec![
32611 Identifier::new("seq".to_string()),
32612 Identifier::new("key".to_string()),
32613 Identifier::new("path".to_string()),
32614 Identifier::new("index".to_string()),
32615 Identifier::new(series_alias.clone()),
32616 Identifier::new("this".to_string()),
32617 ]
32618 } else {
32619 vec![Identifier::new(series_alias.clone())]
32620 };
32621 Expression::Alias(Box::new(Alias {
32622 this: series_unnest_expr,
32623 alias: Identifier::new(series_source_alias.clone()),
32624 column_aliases: col_aliases,
32625 alias_explicit_as: false,
32626 alias_keyword: None,
32627 pre_alias_comments: Vec::new(),
32628 trailing_comments: Vec::new(),
32629 inferred_type: None,
32630 }))
32631 } else {
32632 Expression::Alias(Box::new(Alias::new(
32633 series_unnest_expr,
32634 Identifier::new(series_alias.clone()),
32635 )))
32636 };
32637
32638 // Build CROSS JOINs for each UNNEST
32639 let mut joins = Vec::new();
32640 for info in &unnest_infos {
32641 let actual_col_name = info.has_outer_alias.as_ref().unwrap_or(&info.col_alias);
32642
32643 let unnest_join_expr = match target {
32644 DialectType::BigQuery => {
32645 // UNNEST([1,2,3]) AS col WITH OFFSET AS pos_2
32646 let unnest = UnnestFunc {
32647 this: info.arr_expr.clone(),
32648 expressions: Vec::new(),
32649 with_ordinality: true,
32650 alias: Some(Identifier::new(actual_col_name.clone())),
32651 offset_alias: Some(Identifier::new(info.pos_alias.clone())),
32652 };
32653 Expression::Unnest(Box::new(unnest))
32654 }
32655 DialectType::Presto | DialectType::Trino => {
32656 let unnest = UnnestFunc {
32657 this: info.arr_expr.clone(),
32658 expressions: Vec::new(),
32659 with_ordinality: true,
32660 alias: None,
32661 offset_alias: None,
32662 };
32663 Expression::Alias(Box::new(Alias {
32664 this: Expression::Unnest(Box::new(unnest)),
32665 alias: Identifier::new(info.source_alias.clone()),
32666 column_aliases: vec![
32667 Identifier::new(actual_col_name.clone()),
32668 Identifier::new(info.pos_alias.clone()),
32669 ],
32670 alias_explicit_as: false,
32671 alias_keyword: None,
32672 pre_alias_comments: Vec::new(),
32673 trailing_comments: Vec::new(),
32674 inferred_type: None,
32675 }))
32676 }
32677 DialectType::Snowflake => {
32678 let flatten_arg =
32679 Expression::NamedArgument(Box::new(crate::expressions::NamedArgument {
32680 name: Identifier::new("INPUT".to_string()),
32681 value: info.arr_expr.clone(),
32682 separator: crate::expressions::NamedArgSeparator::DArrow,
32683 }));
32684 let flatten = Expression::Function(Box::new(Function::new(
32685 "FLATTEN".to_string(),
32686 vec![flatten_arg],
32687 )));
32688 let table_fn = Expression::Function(Box::new(Function::new(
32689 "TABLE".to_string(),
32690 vec![flatten],
32691 )));
32692 Expression::Alias(Box::new(Alias {
32693 this: table_fn,
32694 alias: Identifier::new(info.source_alias.clone()),
32695 column_aliases: vec![
32696 Identifier::new("seq".to_string()),
32697 Identifier::new("key".to_string()),
32698 Identifier::new("path".to_string()),
32699 Identifier::new(info.pos_alias.clone()),
32700 Identifier::new(actual_col_name.clone()),
32701 Identifier::new("this".to_string()),
32702 ],
32703 alias_explicit_as: false,
32704 alias_keyword: None,
32705 pre_alias_comments: Vec::new(),
32706 trailing_comments: Vec::new(),
32707 inferred_type: None,
32708 }))
32709 }
32710 _ => return None,
32711 };
32712
32713 joins.push(make_join(unnest_join_expr));
32714 }
32715
32716 // Build WHERE clause
32717 let mut where_conditions: Vec<Expression> = Vec::new();
32718 for info in &unnest_infos {
32719 let src_ref = if use_table_aliases {
32720 Some(info.source_alias.as_str())
32721 } else {
32722 None
32723 };
32724 let pos_col = make_col(&series_alias, tbl_ref);
32725 let unnest_pos_col = make_col(&info.pos_alias, src_ref);
32726
32727 let arr_size = Expression::Function(Box::new(Function::new(
32728 array_length_func.to_string(),
32729 vec![info.arr_expr.clone()],
32730 )));
32731
32732 let size_ref = if index_offset == 0 {
32733 Expression::Paren(Box::new(crate::expressions::Paren {
32734 this: Expression::Sub(Box::new(BinaryOp::new(
32735 arr_size,
32736 Expression::Literal(Box::new(Literal::Number("1".to_string()))),
32737 ))),
32738 trailing_comments: Vec::new(),
32739 }))
32740 } else {
32741 arr_size
32742 };
32743
32744 let eq = Expression::Eq(Box::new(BinaryOp::new(
32745 pos_col.clone(),
32746 unnest_pos_col.clone(),
32747 )));
32748 let gt = Expression::Gt(Box::new(BinaryOp::new(pos_col, size_ref.clone())));
32749 let pos_eq_size = Expression::Eq(Box::new(BinaryOp::new(unnest_pos_col, size_ref)));
32750 let and_cond = Expression::And(Box::new(BinaryOp::new(gt, pos_eq_size)));
32751 let paren_and = Expression::Paren(Box::new(crate::expressions::Paren {
32752 this: and_cond,
32753 trailing_comments: Vec::new(),
32754 }));
32755 let or_cond = Expression::Or(Box::new(BinaryOp::new(eq, paren_and)));
32756
32757 where_conditions.push(or_cond);
32758 }
32759
32760 let where_expr = if where_conditions.len() == 1 {
32761 // Single condition: no parens needed
32762 where_conditions.into_iter().next().unwrap()
32763 } else {
32764 // Multiple conditions: wrap each OR in parens, then combine with AND
32765 let wrap = |e: Expression| {
32766 Expression::Paren(Box::new(crate::expressions::Paren {
32767 this: e,
32768 trailing_comments: Vec::new(),
32769 }))
32770 };
32771 let mut iter = where_conditions.into_iter();
32772 let first = wrap(iter.next().unwrap());
32773 let second = wrap(iter.next().unwrap());
32774 let mut combined = Expression::Paren(Box::new(crate::expressions::Paren {
32775 this: Expression::And(Box::new(BinaryOp::new(first, second))),
32776 trailing_comments: Vec::new(),
32777 }));
32778 for cond in iter {
32779 combined = Expression::And(Box::new(BinaryOp::new(combined, wrap(cond))));
32780 }
32781 combined
32782 };
32783
32784 // Build the new SELECT
32785 let mut new_select = select.clone();
32786 new_select.expressions = new_select_exprs;
32787
32788 if new_select.from.is_some() {
32789 let mut all_joins = vec![make_join(series_alias_expr)];
32790 all_joins.extend(joins);
32791 new_select.joins.extend(all_joins);
32792 } else {
32793 new_select.from = Some(From {
32794 expressions: vec![series_alias_expr],
32795 });
32796 new_select.joins.extend(joins);
32797 }
32798
32799 if let Some(ref existing_where) = new_select.where_clause {
32800 let combined = Expression::And(Box::new(BinaryOp::new(
32801 existing_where.this.clone(),
32802 where_expr,
32803 )));
32804 new_select.where_clause = Some(crate::expressions::Where { this: combined });
32805 } else {
32806 new_select.where_clause = Some(crate::expressions::Where { this: where_expr });
32807 }
32808
32809 Some(new_select)
32810 }
32811
32812 /// Helper to replace UNNEST(...) inside an expression with a replacement expression.
32813 fn replace_unnest_with_if(original: &Expression, replacement: &Expression) -> Expression {
32814 match original {
32815 Expression::Unnest(_) => replacement.clone(),
32816 Expression::Function(f) if f.name.eq_ignore_ascii_case("UNNEST") => replacement.clone(),
32817 Expression::Alias(a) => Self::replace_unnest_with_if(&a.this, replacement),
32818 Expression::Add(op) => {
32819 let left = Self::replace_unnest_with_if(&op.left, replacement);
32820 let right = Self::replace_unnest_with_if(&op.right, replacement);
32821 Expression::Add(Box::new(crate::expressions::BinaryOp::new(left, right)))
32822 }
32823 Expression::Sub(op) => {
32824 let left = Self::replace_unnest_with_if(&op.left, replacement);
32825 let right = Self::replace_unnest_with_if(&op.right, replacement);
32826 Expression::Sub(Box::new(crate::expressions::BinaryOp::new(left, right)))
32827 }
32828 Expression::Mul(op) => {
32829 let left = Self::replace_unnest_with_if(&op.left, replacement);
32830 let right = Self::replace_unnest_with_if(&op.right, replacement);
32831 Expression::Mul(Box::new(crate::expressions::BinaryOp::new(left, right)))
32832 }
32833 Expression::Div(op) => {
32834 let left = Self::replace_unnest_with_if(&op.left, replacement);
32835 let right = Self::replace_unnest_with_if(&op.right, replacement);
32836 Expression::Div(Box::new(crate::expressions::BinaryOp::new(left, right)))
32837 }
32838 _ => original.clone(),
32839 }
32840 }
32841
32842 /// Decompose a JSON path like `$.y[0].z` into individual parts: `["y", "0", "z"]`.
32843 /// Strips `$` prefix, handles bracket notation, quoted strings, and removes `[*]` wildcards.
32844 fn decompose_json_path(path: &str) -> Vec<String> {
32845 let mut parts = Vec::new();
32846 let path = if path.starts_with("$.") {
32847 &path[2..]
32848 } else if path.starts_with('$') {
32849 &path[1..]
32850 } else {
32851 path
32852 };
32853 if path.is_empty() {
32854 return parts;
32855 }
32856 let mut current = String::new();
32857 let chars: Vec<char> = path.chars().collect();
32858 let mut i = 0;
32859 while i < chars.len() {
32860 match chars[i] {
32861 '.' => {
32862 if !current.is_empty() {
32863 parts.push(current.clone());
32864 current.clear();
32865 }
32866 i += 1;
32867 }
32868 '[' => {
32869 if !current.is_empty() {
32870 parts.push(current.clone());
32871 current.clear();
32872 }
32873 i += 1;
32874 let mut bracket_content = String::new();
32875 while i < chars.len() && chars[i] != ']' {
32876 if chars[i] == '"' || chars[i] == '\'' {
32877 let quote = chars[i];
32878 i += 1;
32879 while i < chars.len() && chars[i] != quote {
32880 bracket_content.push(chars[i]);
32881 i += 1;
32882 }
32883 if i < chars.len() {
32884 i += 1;
32885 }
32886 } else {
32887 bracket_content.push(chars[i]);
32888 i += 1;
32889 }
32890 }
32891 if i < chars.len() {
32892 i += 1;
32893 }
32894 if bracket_content != "*" {
32895 parts.push(bracket_content);
32896 }
32897 }
32898 _ => {
32899 current.push(chars[i]);
32900 i += 1;
32901 }
32902 }
32903 }
32904 if !current.is_empty() {
32905 parts.push(current);
32906 }
32907 parts
32908 }
32909
32910 /// Strip `$` prefix from a JSON path, keeping the rest.
32911 /// `$.y[0].z` -> `y[0].z`, `$["a b"]` -> `["a b"]`
32912 fn strip_json_dollar_prefix(path: &str) -> String {
32913 if path.starts_with("$.") {
32914 path[2..].to_string()
32915 } else if path.starts_with('$') {
32916 path[1..].to_string()
32917 } else {
32918 path.to_string()
32919 }
32920 }
32921
32922 /// Strip `[*]` wildcards from a JSON path.
32923 /// `$.y[*]` -> `$.y`, `$.y[*].z` -> `$.y.z`
32924 fn strip_json_wildcards(path: &str) -> String {
32925 path.replace("[*]", "")
32926 .replace("..", ".") // Clean double dots from `$.y[*].z` -> `$.y..z`
32927 .trim_end_matches('.')
32928 .to_string()
32929 }
32930
32931 /// Convert bracket notation to dot notation for JSON paths.
32932 /// `$["a b"]` -> `$."a b"`, `$["key"]` -> `$.key`
32933 fn bracket_to_dot_notation(path: &str) -> String {
32934 let mut result = String::new();
32935 let chars: Vec<char> = path.chars().collect();
32936 let mut i = 0;
32937 while i < chars.len() {
32938 if chars[i] == '[' {
32939 // Read bracket content
32940 i += 1;
32941 let mut bracket_content = String::new();
32942 let mut is_quoted = false;
32943 let mut _quote_char = '"';
32944 while i < chars.len() && chars[i] != ']' {
32945 if chars[i] == '"' || chars[i] == '\'' {
32946 is_quoted = true;
32947 _quote_char = chars[i];
32948 i += 1;
32949 while i < chars.len() && chars[i] != _quote_char {
32950 bracket_content.push(chars[i]);
32951 i += 1;
32952 }
32953 if i < chars.len() {
32954 i += 1;
32955 }
32956 } else {
32957 bracket_content.push(chars[i]);
32958 i += 1;
32959 }
32960 }
32961 if i < chars.len() {
32962 i += 1;
32963 } // skip ]
32964 if bracket_content == "*" {
32965 // Keep wildcard as-is
32966 result.push_str("[*]");
32967 } else if is_quoted {
32968 // Quoted bracket -> dot notation with quotes
32969 result.push('.');
32970 result.push('"');
32971 result.push_str(&bracket_content);
32972 result.push('"');
32973 } else {
32974 // Numeric index -> keep as bracket
32975 result.push('[');
32976 result.push_str(&bracket_content);
32977 result.push(']');
32978 }
32979 } else {
32980 result.push(chars[i]);
32981 i += 1;
32982 }
32983 }
32984 result
32985 }
32986
32987 /// Convert JSON path bracket quoted strings to use single quotes instead of double quotes.
32988 /// `$["a b"]` -> `$['a b']`
32989 fn bracket_to_single_quotes(path: &str) -> String {
32990 let mut result = String::new();
32991 let chars: Vec<char> = path.chars().collect();
32992 let mut i = 0;
32993 while i < chars.len() {
32994 if chars[i] == '[' && i + 1 < chars.len() && chars[i + 1] == '"' {
32995 result.push('[');
32996 result.push('\'');
32997 i += 2; // skip [ and "
32998 while i < chars.len() && chars[i] != '"' {
32999 result.push(chars[i]);
33000 i += 1;
33001 }
33002 if i < chars.len() {
33003 i += 1;
33004 } // skip closing "
33005 result.push('\'');
33006 } else {
33007 result.push(chars[i]);
33008 i += 1;
33009 }
33010 }
33011 result
33012 }
33013
33014 /// Transform TSQL SELECT INTO -> CREATE TABLE AS for DuckDB/Snowflake
33015 /// or PostgreSQL #temp -> TEMPORARY.
33016 /// Also strips # from INSERT INTO #table for non-TSQL targets.
33017 fn transform_select_into(
33018 expr: Expression,
33019 _source: DialectType,
33020 target: DialectType,
33021 ) -> Expression {
33022 use crate::expressions::{CreateTable, Expression, TableRef};
33023
33024 // Handle INSERT INTO #temp -> INSERT INTO temp for non-TSQL targets
33025 if let Expression::Insert(ref insert) = expr {
33026 if insert.table.name.name.starts_with('#')
33027 && !matches!(target, DialectType::TSQL | DialectType::Fabric)
33028 {
33029 let mut new_insert = insert.clone();
33030 new_insert.table.name.name =
33031 insert.table.name.name.trim_start_matches('#').to_string();
33032 return Expression::Insert(new_insert);
33033 }
33034 return expr;
33035 }
33036
33037 if let Expression::Select(ref select) = expr {
33038 if let Some(ref into) = select.into {
33039 let table_name_raw = match &into.this {
33040 Expression::Table(tr) => tr.name.name.clone(),
33041 Expression::Identifier(id) => id.name.clone(),
33042 _ => String::new(),
33043 };
33044 let is_temp = table_name_raw.starts_with('#') || into.temporary;
33045 let clean_name = table_name_raw.trim_start_matches('#').to_string();
33046
33047 match target {
33048 DialectType::DuckDB | DialectType::Snowflake => {
33049 // SELECT INTO -> CREATE TABLE AS SELECT
33050 let mut new_select = select.clone();
33051 new_select.into = None;
33052 let ct = CreateTable {
33053 name: TableRef::new(clean_name),
33054 on_cluster: None,
33055 columns: Vec::new(),
33056 constraints: Vec::new(),
33057 if_not_exists: false,
33058 temporary: is_temp,
33059 or_replace: false,
33060 table_modifier: None,
33061 as_select: Some(Expression::Select(new_select)),
33062 as_select_parenthesized: false,
33063 on_commit: None,
33064 clone_source: None,
33065 clone_at_clause: None,
33066 shallow_clone: false,
33067 deep_clone: false,
33068 is_copy: false,
33069 leading_comments: Vec::new(),
33070 with_properties: Vec::new(),
33071 teradata_post_name_options: Vec::new(),
33072 with_data: None,
33073 with_statistics: None,
33074 teradata_indexes: Vec::new(),
33075 with_cte: None,
33076 properties: Vec::new(),
33077 partition_of: None,
33078 post_table_properties: Vec::new(),
33079 mysql_table_options: Vec::new(),
33080 inherits: Vec::new(),
33081 on_property: None,
33082 copy_grants: false,
33083 using_template: None,
33084 rollup: None,
33085 uuid: None,
33086 with_partition_columns: Vec::new(),
33087 with_connection: None,
33088 };
33089 return Expression::CreateTable(Box::new(ct));
33090 }
33091 DialectType::PostgreSQL | DialectType::Redshift => {
33092 // PostgreSQL: #foo -> INTO TEMPORARY foo
33093 if is_temp && !into.temporary {
33094 let mut new_select = select.clone();
33095 let mut new_into = into.clone();
33096 new_into.temporary = true;
33097 new_into.unlogged = false;
33098 new_into.this = Expression::Table(Box::new(TableRef::new(clean_name)));
33099 new_select.into = Some(new_into);
33100 Expression::Select(new_select)
33101 } else {
33102 expr
33103 }
33104 }
33105 _ => expr,
33106 }
33107 } else {
33108 expr
33109 }
33110 } else {
33111 expr
33112 }
33113 }
33114
33115 /// Transform CREATE TABLE WITH properties for cross-dialect transpilation.
33116 /// Handles FORMAT, PARTITIONED_BY, and other Presto WITH properties.
33117 fn transform_create_table_properties(
33118 ct: &mut crate::expressions::CreateTable,
33119 _source: DialectType,
33120 target: DialectType,
33121 ) {
33122 use crate::expressions::{
33123 BinaryOp, BooleanLiteral, Expression, FileFormatProperty, Identifier, Literal,
33124 Properties,
33125 };
33126
33127 // Helper to convert a raw property value string to the correct Expression
33128 let value_to_expr = |v: &str| -> Expression {
33129 let trimmed = v.trim();
33130 // Check if it's a quoted string (starts and ends with ')
33131 if trimmed.starts_with('\'') && trimmed.ends_with('\'') {
33132 Expression::Literal(Box::new(Literal::String(
33133 trimmed[1..trimmed.len() - 1].to_string(),
33134 )))
33135 }
33136 // Check if it's a number
33137 else if trimmed.parse::<i64>().is_ok() || trimmed.parse::<f64>().is_ok() {
33138 Expression::Literal(Box::new(Literal::Number(trimmed.to_string())))
33139 }
33140 // Check if it's ARRAY[...] or ARRAY(...)
33141 else if trimmed.len() >= 5 && trimmed[..5].eq_ignore_ascii_case("ARRAY") {
33142 // Convert ARRAY['y'] to ARRAY('y') for Hive/Spark
33143 let inner = trimmed
33144 .trim_start_matches(|c: char| c.is_alphabetic()) // Remove ARRAY
33145 .trim_start_matches('[')
33146 .trim_start_matches('(')
33147 .trim_end_matches(']')
33148 .trim_end_matches(')');
33149 let elements: Vec<Expression> = inner
33150 .split(',')
33151 .map(|e| {
33152 let elem = e.trim().trim_matches('\'');
33153 Expression::Literal(Box::new(Literal::String(elem.to_string())))
33154 })
33155 .collect();
33156 Expression::Function(Box::new(crate::expressions::Function::new(
33157 "ARRAY".to_string(),
33158 elements,
33159 )))
33160 }
33161 // Otherwise, just output as identifier (unquoted)
33162 else {
33163 Expression::Identifier(Identifier::new(trimmed.to_string()))
33164 }
33165 };
33166
33167 if ct.with_properties.is_empty() && ct.properties.is_empty() {
33168 return;
33169 }
33170
33171 // Handle Presto-style WITH properties
33172 if !ct.with_properties.is_empty() {
33173 // Extract FORMAT property and remaining properties
33174 let mut format_value: Option<String> = None;
33175 let mut partitioned_by: Option<String> = None;
33176 let mut other_props: Vec<(String, String)> = Vec::new();
33177
33178 for (key, value) in ct.with_properties.drain(..) {
33179 if key.eq_ignore_ascii_case("FORMAT") {
33180 // Strip surrounding quotes from value if present
33181 format_value = Some(value.trim_matches('\'').to_string());
33182 } else if key.eq_ignore_ascii_case("PARTITIONED_BY") {
33183 partitioned_by = Some(value);
33184 } else {
33185 other_props.push((key, value));
33186 }
33187 }
33188
33189 match target {
33190 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
33191 // Presto: keep WITH properties but lowercase 'format' key
33192 if let Some(fmt) = format_value {
33193 ct.with_properties
33194 .push(("format".to_string(), format!("'{}'", fmt)));
33195 }
33196 if let Some(part) = partitioned_by {
33197 // Convert (col1, col2) to ARRAY['col1', 'col2'] format
33198 let trimmed = part.trim();
33199 let inner = trimmed.trim_start_matches('(').trim_end_matches(')');
33200 // Also handle ARRAY['...'] format - keep as-is
33201 if trimmed.len() >= 5 && trimmed[..5].eq_ignore_ascii_case("ARRAY") {
33202 ct.with_properties
33203 .push(("PARTITIONED_BY".to_string(), part));
33204 } else {
33205 // Parse column names from the parenthesized list
33206 let cols: Vec<&str> = inner
33207 .split(',')
33208 .map(|c| c.trim().trim_matches('"').trim_matches('\''))
33209 .collect();
33210 let array_val = format!(
33211 "ARRAY[{}]",
33212 cols.iter()
33213 .map(|c| format!("'{}'", c))
33214 .collect::<Vec<_>>()
33215 .join(", ")
33216 );
33217 ct.with_properties
33218 .push(("PARTITIONED_BY".to_string(), array_val));
33219 }
33220 }
33221 ct.with_properties.extend(other_props);
33222 }
33223 DialectType::Hive => {
33224 // Hive: FORMAT -> STORED AS, other props -> TBLPROPERTIES
33225 if let Some(fmt) = format_value {
33226 ct.properties.push(Expression::FileFormatProperty(Box::new(
33227 FileFormatProperty {
33228 this: Some(Box::new(Expression::Identifier(Identifier::new(fmt)))),
33229 expressions: vec![],
33230 hive_format: Some(Box::new(Expression::Boolean(BooleanLiteral {
33231 value: true,
33232 }))),
33233 },
33234 )));
33235 }
33236 if let Some(_part) = partitioned_by {
33237 // PARTITIONED_BY handling is complex - move columns to partitioned by
33238 // For now, the partition columns are extracted from the column list
33239 Self::apply_partitioned_by(ct, &_part, target);
33240 }
33241 if !other_props.is_empty() {
33242 let eq_exprs: Vec<Expression> = other_props
33243 .into_iter()
33244 .map(|(k, v)| {
33245 Expression::Eq(Box::new(BinaryOp::new(
33246 Expression::Literal(Box::new(Literal::String(k))),
33247 value_to_expr(&v),
33248 )))
33249 })
33250 .collect();
33251 ct.properties
33252 .push(Expression::Properties(Box::new(Properties {
33253 expressions: eq_exprs,
33254 })));
33255 }
33256 }
33257 DialectType::Spark | DialectType::Databricks => {
33258 // Spark: FORMAT -> USING, other props -> TBLPROPERTIES
33259 if let Some(fmt) = format_value {
33260 ct.properties.push(Expression::FileFormatProperty(Box::new(
33261 FileFormatProperty {
33262 this: Some(Box::new(Expression::Identifier(Identifier::new(fmt)))),
33263 expressions: vec![],
33264 hive_format: None, // None means USING syntax
33265 },
33266 )));
33267 }
33268 if let Some(_part) = partitioned_by {
33269 Self::apply_partitioned_by(ct, &_part, target);
33270 }
33271 if !other_props.is_empty() {
33272 let eq_exprs: Vec<Expression> = other_props
33273 .into_iter()
33274 .map(|(k, v)| {
33275 Expression::Eq(Box::new(BinaryOp::new(
33276 Expression::Literal(Box::new(Literal::String(k))),
33277 value_to_expr(&v),
33278 )))
33279 })
33280 .collect();
33281 ct.properties
33282 .push(Expression::Properties(Box::new(Properties {
33283 expressions: eq_exprs,
33284 })));
33285 }
33286 }
33287 DialectType::DuckDB => {
33288 // DuckDB: strip all WITH properties (FORMAT, PARTITIONED_BY, etc.)
33289 // Keep nothing
33290 }
33291 _ => {
33292 // For other dialects, keep WITH properties as-is
33293 if let Some(fmt) = format_value {
33294 ct.with_properties
33295 .push(("FORMAT".to_string(), format!("'{}'", fmt)));
33296 }
33297 if let Some(part) = partitioned_by {
33298 ct.with_properties
33299 .push(("PARTITIONED_BY".to_string(), part));
33300 }
33301 ct.with_properties.extend(other_props);
33302 }
33303 }
33304 }
33305
33306 // Handle STORED AS 'PARQUET' (quoted format name) -> STORED AS PARQUET (unquoted)
33307 // and Hive STORED AS -> Presto WITH (format=...) conversion
33308 if !ct.properties.is_empty() {
33309 let is_presto_target = matches!(
33310 target,
33311 DialectType::Presto | DialectType::Trino | DialectType::Athena
33312 );
33313 let is_duckdb_target = matches!(target, DialectType::DuckDB);
33314
33315 if is_presto_target || is_duckdb_target {
33316 let mut new_properties = Vec::new();
33317 for prop in ct.properties.drain(..) {
33318 match &prop {
33319 Expression::FileFormatProperty(ffp) => {
33320 if is_presto_target {
33321 // Convert STORED AS/USING to WITH (format=...)
33322 if let Some(ref fmt_expr) = ffp.this {
33323 let fmt_str = match fmt_expr.as_ref() {
33324 Expression::Identifier(id) => id.name.clone(),
33325 Expression::Literal(lit)
33326 if matches!(lit.as_ref(), Literal::String(_)) =>
33327 {
33328 let Literal::String(s) = lit.as_ref() else {
33329 unreachable!()
33330 };
33331 s.clone()
33332 }
33333 _ => {
33334 new_properties.push(prop);
33335 continue;
33336 }
33337 };
33338 ct.with_properties
33339 .push(("format".to_string(), format!("'{}'", fmt_str)));
33340 }
33341 }
33342 // DuckDB: just strip file format properties
33343 }
33344 // Convert TBLPROPERTIES to WITH properties for Presto target
33345 Expression::Properties(props) if is_presto_target => {
33346 for expr in &props.expressions {
33347 if let Expression::Eq(eq) = expr {
33348 // Extract key and value from the Eq expression
33349 let key = match &eq.left {
33350 Expression::Literal(lit)
33351 if matches!(lit.as_ref(), Literal::String(_)) =>
33352 {
33353 let Literal::String(s) = lit.as_ref() else {
33354 unreachable!()
33355 };
33356 s.clone()
33357 }
33358 Expression::Identifier(id) => id.name.clone(),
33359 _ => continue,
33360 };
33361 let value = match &eq.right {
33362 Expression::Literal(lit)
33363 if matches!(lit.as_ref(), Literal::String(_)) =>
33364 {
33365 let Literal::String(s) = lit.as_ref() else {
33366 unreachable!()
33367 };
33368 format!("'{}'", s)
33369 }
33370 Expression::Literal(lit)
33371 if matches!(lit.as_ref(), Literal::Number(_)) =>
33372 {
33373 let Literal::Number(n) = lit.as_ref() else {
33374 unreachable!()
33375 };
33376 n.clone()
33377 }
33378 Expression::Identifier(id) => id.name.clone(),
33379 _ => continue,
33380 };
33381 ct.with_properties.push((key, value));
33382 }
33383 }
33384 }
33385 // Convert PartitionedByProperty for Presto target
33386 Expression::PartitionedByProperty(ref pbp) if is_presto_target => {
33387 // Check if it contains ColumnDef expressions (Hive-style with types)
33388 if let Expression::Tuple(ref tuple) = *pbp.this {
33389 let mut col_names: Vec<String> = Vec::new();
33390 let mut col_defs: Vec<crate::expressions::ColumnDef> = Vec::new();
33391 let mut has_col_defs = false;
33392 for expr in &tuple.expressions {
33393 if let Expression::ColumnDef(ref cd) = expr {
33394 has_col_defs = true;
33395 col_names.push(cd.name.name.clone());
33396 col_defs.push(*cd.clone());
33397 } else if let Expression::Column(ref col) = expr {
33398 col_names.push(col.name.name.clone());
33399 } else if let Expression::Identifier(ref id) = expr {
33400 col_names.push(id.name.clone());
33401 } else {
33402 // For function expressions like MONTHS(y), serialize to SQL
33403 let generic = Dialect::get(DialectType::Generic);
33404 if let Ok(sql) = generic.generate(expr) {
33405 col_names.push(sql);
33406 }
33407 }
33408 }
33409 if has_col_defs {
33410 // Merge partition column defs into the main column list
33411 for cd in col_defs {
33412 ct.columns.push(cd);
33413 }
33414 }
33415 if !col_names.is_empty() {
33416 // Add PARTITIONED_BY property
33417 let array_val = format!(
33418 "ARRAY[{}]",
33419 col_names
33420 .iter()
33421 .map(|n| format!("'{}'", n))
33422 .collect::<Vec<_>>()
33423 .join(", ")
33424 );
33425 ct.with_properties
33426 .push(("PARTITIONED_BY".to_string(), array_val));
33427 }
33428 }
33429 // Skip - don't keep in properties
33430 }
33431 _ => {
33432 if !is_duckdb_target {
33433 new_properties.push(prop);
33434 }
33435 }
33436 }
33437 }
33438 ct.properties = new_properties;
33439 } else {
33440 // For Hive/Spark targets, unquote format names in STORED AS
33441 for prop in &mut ct.properties {
33442 if let Expression::FileFormatProperty(ref mut ffp) = prop {
33443 if let Some(ref mut fmt_expr) = ffp.this {
33444 if let Expression::Literal(lit) = fmt_expr.as_ref() {
33445 if let Literal::String(s) = lit.as_ref() {
33446 // Convert STORED AS 'PARQUET' to STORED AS PARQUET (unquote)
33447 let unquoted = s.clone();
33448 *fmt_expr =
33449 Box::new(Expression::Identifier(Identifier::new(unquoted)));
33450 }
33451 }
33452 }
33453 }
33454 }
33455 }
33456 }
33457 }
33458
33459 /// Apply PARTITIONED_BY conversion: move partition columns from column list to PARTITIONED BY
33460 fn apply_partitioned_by(
33461 ct: &mut crate::expressions::CreateTable,
33462 partitioned_by_value: &str,
33463 target: DialectType,
33464 ) {
33465 use crate::expressions::{Column, Expression, Identifier, PartitionedByProperty, Tuple};
33466
33467 // Parse the ARRAY['col1', 'col2'] value to extract column names
33468 let mut col_names: Vec<String> = Vec::new();
33469 // The value looks like ARRAY['y', 'z'] or ARRAY('y', 'z')
33470 let inner = partitioned_by_value
33471 .trim()
33472 .trim_start_matches("ARRAY")
33473 .trim_start_matches('[')
33474 .trim_start_matches('(')
33475 .trim_end_matches(']')
33476 .trim_end_matches(')');
33477 for part in inner.split(',') {
33478 let col = part.trim().trim_matches('\'').trim_matches('"');
33479 if !col.is_empty() {
33480 col_names.push(col.to_string());
33481 }
33482 }
33483
33484 if col_names.is_empty() {
33485 return;
33486 }
33487
33488 if matches!(target, DialectType::Hive) {
33489 // Hive: PARTITIONED BY (col_name type, ...) - move columns out of column list
33490 let mut partition_col_defs = Vec::new();
33491 for col_name in &col_names {
33492 // Find and remove from columns
33493 if let Some(pos) = ct
33494 .columns
33495 .iter()
33496 .position(|c| c.name.name.eq_ignore_ascii_case(col_name))
33497 {
33498 let col_def = ct.columns.remove(pos);
33499 partition_col_defs.push(Expression::ColumnDef(Box::new(col_def)));
33500 }
33501 }
33502 if !partition_col_defs.is_empty() {
33503 ct.properties
33504 .push(Expression::PartitionedByProperty(Box::new(
33505 PartitionedByProperty {
33506 this: Box::new(Expression::Tuple(Box::new(Tuple {
33507 expressions: partition_col_defs,
33508 }))),
33509 },
33510 )));
33511 }
33512 } else if matches!(target, DialectType::Spark | DialectType::Databricks) {
33513 // Spark: PARTITIONED BY (col1, col2) - just column names, keep in column list
33514 // Use quoted identifiers to match the quoting style of the original column definitions
33515 let partition_exprs: Vec<Expression> = col_names
33516 .iter()
33517 .map(|name| {
33518 // Check if the column exists in the column list and use its quoting
33519 let is_quoted = ct
33520 .columns
33521 .iter()
33522 .any(|c| c.name.name.eq_ignore_ascii_case(name) && c.name.quoted);
33523 let ident = if is_quoted {
33524 Identifier::quoted(name.clone())
33525 } else {
33526 Identifier::new(name.clone())
33527 };
33528 Expression::boxed_column(Column {
33529 name: ident,
33530 table: None,
33531 join_mark: false,
33532 trailing_comments: Vec::new(),
33533 span: None,
33534 inferred_type: None,
33535 })
33536 })
33537 .collect();
33538 ct.properties
33539 .push(Expression::PartitionedByProperty(Box::new(
33540 PartitionedByProperty {
33541 this: Box::new(Expression::Tuple(Box::new(Tuple {
33542 expressions: partition_exprs,
33543 }))),
33544 },
33545 )));
33546 }
33547 // DuckDB: strip partitioned_by entirely (already handled)
33548 }
33549
33550 /// Convert a DataType to Spark's type string format (using angle brackets)
33551 fn data_type_to_spark_string(dt: &crate::expressions::DataType) -> String {
33552 use crate::expressions::DataType;
33553 match dt {
33554 DataType::Int { .. } => "INT".to_string(),
33555 DataType::BigInt { .. } => "BIGINT".to_string(),
33556 DataType::SmallInt { .. } => "SMALLINT".to_string(),
33557 DataType::TinyInt { .. } => "TINYINT".to_string(),
33558 DataType::Float { .. } => "FLOAT".to_string(),
33559 DataType::Double { .. } => "DOUBLE".to_string(),
33560 DataType::Decimal {
33561 precision: Some(p),
33562 scale: Some(s),
33563 } => format!("DECIMAL({}, {})", p, s),
33564 DataType::Decimal {
33565 precision: Some(p), ..
33566 } => format!("DECIMAL({})", p),
33567 DataType::Decimal { .. } => "DECIMAL".to_string(),
33568 DataType::VarChar { .. } | DataType::Text | DataType::String { .. } => {
33569 "STRING".to_string()
33570 }
33571 DataType::Char { .. } => "STRING".to_string(),
33572 DataType::Boolean => "BOOLEAN".to_string(),
33573 DataType::Date => "DATE".to_string(),
33574 DataType::Timestamp { .. } => "TIMESTAMP".to_string(),
33575 DataType::Json | DataType::JsonB => "STRING".to_string(),
33576 DataType::Binary { .. } => "BINARY".to_string(),
33577 DataType::Array { element_type, .. } => {
33578 format!("ARRAY<{}>", Self::data_type_to_spark_string(element_type))
33579 }
33580 DataType::Map {
33581 key_type,
33582 value_type,
33583 } => format!(
33584 "MAP<{}, {}>",
33585 Self::data_type_to_spark_string(key_type),
33586 Self::data_type_to_spark_string(value_type)
33587 ),
33588 DataType::Struct { fields, .. } => {
33589 let field_strs: Vec<String> = fields
33590 .iter()
33591 .map(|f| {
33592 if f.name.is_empty() {
33593 Self::data_type_to_spark_string(&f.data_type)
33594 } else {
33595 format!(
33596 "{}: {}",
33597 f.name,
33598 Self::data_type_to_spark_string(&f.data_type)
33599 )
33600 }
33601 })
33602 .collect();
33603 format!("STRUCT<{}>", field_strs.join(", "))
33604 }
33605 DataType::Custom { name } => name.clone(),
33606 _ => format!("{:?}", dt),
33607 }
33608 }
33609
33610 /// Extract value and unit from an Interval expression
33611 /// Returns (value_expression, IntervalUnit)
33612 fn extract_interval_parts(
33613 interval_expr: &Expression,
33614 ) -> Option<(Expression, crate::expressions::IntervalUnit)> {
33615 use crate::expressions::{DataType, IntervalUnit, IntervalUnitSpec, Literal};
33616
33617 fn unit_from_str(unit: &str) -> Option<IntervalUnit> {
33618 match unit.trim().to_ascii_uppercase().as_str() {
33619 "YEAR" | "YEARS" => Some(IntervalUnit::Year),
33620 "QUARTER" | "QUARTERS" => Some(IntervalUnit::Quarter),
33621 "MONTH" | "MONTHS" | "MON" | "MONS" | "MM" => Some(IntervalUnit::Month),
33622 "WEEK" | "WEEKS" | "ISOWEEK" => Some(IntervalUnit::Week),
33623 "DAY" | "DAYS" => Some(IntervalUnit::Day),
33624 "HOUR" | "HOURS" => Some(IntervalUnit::Hour),
33625 "MINUTE" | "MINUTES" => Some(IntervalUnit::Minute),
33626 "SECOND" | "SECONDS" => Some(IntervalUnit::Second),
33627 "MILLISECOND" | "MILLISECONDS" => Some(IntervalUnit::Millisecond),
33628 "MICROSECOND" | "MICROSECONDS" => Some(IntervalUnit::Microsecond),
33629 "NANOSECOND" | "NANOSECONDS" => Some(IntervalUnit::Nanosecond),
33630 _ => None,
33631 }
33632 }
33633
33634 fn parts_from_literal_string(s: &str) -> Option<(Expression, IntervalUnit)> {
33635 let mut parts = s.split_whitespace();
33636 let value = parts.next()?;
33637 let unit = unit_from_str(parts.next()?)?;
33638 Some((
33639 Expression::Literal(Box::new(Literal::String(value.to_string()))),
33640 unit,
33641 ))
33642 }
33643
33644 fn unit_from_spec(unit: &IntervalUnitSpec) -> Option<IntervalUnit> {
33645 match unit {
33646 IntervalUnitSpec::Simple { unit, .. } => Some(*unit),
33647 IntervalUnitSpec::Expr(expr) => match expr.as_ref() {
33648 Expression::Day(_) => Some(IntervalUnit::Day),
33649 Expression::Month(_) => Some(IntervalUnit::Month),
33650 Expression::Year(_) => Some(IntervalUnit::Year),
33651 Expression::Identifier(id) => unit_from_str(&id.name),
33652 Expression::Var(v) => unit_from_str(&v.this),
33653 Expression::Column(col) => unit_from_str(&col.name.name),
33654 _ => None,
33655 },
33656 _ => None,
33657 }
33658 }
33659
33660 match interval_expr {
33661 Expression::Interval(iv) => {
33662 let val = iv.this.clone().unwrap_or(Expression::number(0));
33663 if let Expression::Literal(lit) = &val {
33664 if let Literal::String(s) = lit.as_ref() {
33665 if let Some(parts) = parts_from_literal_string(s) {
33666 return Some(parts);
33667 }
33668 }
33669 }
33670 let unit = iv
33671 .unit
33672 .as_ref()
33673 .and_then(unit_from_spec)
33674 .unwrap_or(IntervalUnit::Day);
33675 Some((val, unit))
33676 }
33677 Expression::Cast(cast) if matches!(cast.to, DataType::Interval { .. }) => {
33678 if let Expression::Literal(lit) = &cast.this {
33679 if let Literal::String(s) = lit.as_ref() {
33680 if let Some(parts) = parts_from_literal_string(s) {
33681 return Some(parts);
33682 }
33683 }
33684 }
33685 let unit = match &cast.to {
33686 DataType::Interval {
33687 unit: Some(unit), ..
33688 } => unit_from_str(unit).unwrap_or(IntervalUnit::Day),
33689 _ => IntervalUnit::Day,
33690 };
33691 Some((cast.this.clone(), unit))
33692 }
33693 _ => None,
33694 }
33695 }
33696
33697 fn rewrite_tsql_interval_arithmetic(expr: &Expression) -> Option<Expression> {
33698 match expr {
33699 Expression::Add(op) => {
33700 Self::extract_interval_parts(&op.right)?;
33701 Some(Self::build_tsql_dateadd_from_interval(
33702 op.left.clone(),
33703 &op.right,
33704 false,
33705 ))
33706 }
33707 Expression::Sub(op) => {
33708 Self::extract_interval_parts(&op.right)?;
33709 Some(Self::build_tsql_dateadd_from_interval(
33710 op.left.clone(),
33711 &op.right,
33712 true,
33713 ))
33714 }
33715 _ => None,
33716 }
33717 }
33718
33719 fn build_tsql_dateadd_from_interval(
33720 date: Expression,
33721 interval: &Expression,
33722 subtract: bool,
33723 ) -> Expression {
33724 let (value, unit) = Self::extract_interval_parts(interval)
33725 .unwrap_or_else(|| (interval.clone(), crate::expressions::IntervalUnit::Day));
33726 let unit = Self::interval_unit_to_string(&unit);
33727 let amount = Self::tsql_dateadd_amount(value, subtract);
33728
33729 Expression::Function(Box::new(Function::new(
33730 "DATEADD".to_string(),
33731 vec![Expression::Identifier(Identifier::new(unit)), amount, date],
33732 )))
33733 }
33734
33735 fn tsql_dateadd_amount(value: Expression, negate: bool) -> Expression {
33736 use crate::expressions::{Parameter, ParameterStyle, UnaryOp};
33737
33738 fn numeric_literal_value(value: &Expression) -> Option<&str> {
33739 match value {
33740 Expression::Literal(lit) => match lit.as_ref() {
33741 crate::expressions::Literal::Number(n)
33742 | crate::expressions::Literal::String(n) => Some(n.as_str()),
33743 _ => None,
33744 },
33745 _ => None,
33746 }
33747 }
33748
33749 fn colon_parameter(value: &Expression) -> Option<Expression> {
33750 let Expression::Literal(lit) = value else {
33751 return None;
33752 };
33753 let crate::expressions::Literal::String(s) = lit.as_ref() else {
33754 return None;
33755 };
33756 let name = s.strip_prefix(':')?;
33757 if name.is_empty()
33758 || !name
33759 .chars()
33760 .all(|ch| ch.is_ascii_alphanumeric() || ch == '_')
33761 {
33762 return None;
33763 }
33764
33765 Some(Expression::Parameter(Box::new(Parameter {
33766 name: if name.chars().all(|ch| ch.is_ascii_digit()) {
33767 None
33768 } else {
33769 Some(name.to_string())
33770 },
33771 index: name.parse::<u32>().ok(),
33772 style: ParameterStyle::Colon,
33773 quoted: false,
33774 string_quoted: false,
33775 expression: None,
33776 })))
33777 }
33778
33779 let value = colon_parameter(&value).unwrap_or(value);
33780
33781 if let Some(n) = numeric_literal_value(&value) {
33782 if let Ok(parsed) = n.parse::<f64>() {
33783 let normalized = if negate { -parsed } else { parsed };
33784 let rendered = if normalized.fract() == 0.0 {
33785 format!("{}", normalized as i64)
33786 } else {
33787 normalized.to_string()
33788 };
33789 return Expression::Literal(Box::new(crate::expressions::Literal::Number(
33790 rendered,
33791 )));
33792 }
33793 }
33794
33795 if !negate {
33796 return value;
33797 }
33798
33799 match value {
33800 Expression::Neg(op) => op.this,
33801 other => Expression::Neg(Box::new(UnaryOp {
33802 this: other,
33803 inferred_type: None,
33804 })),
33805 }
33806 }
33807
33808 /// Normalize BigQuery-specific functions to standard forms that target dialects can handle
33809 fn normalize_bigquery_function(
33810 e: Expression,
33811 source: DialectType,
33812 target: DialectType,
33813 ) -> Result<Expression> {
33814 use crate::expressions::{BinaryOp, Cast, DataType, Function, Identifier, Literal, Paren};
33815
33816 let f = if let Expression::Function(f) = e {
33817 *f
33818 } else {
33819 return Ok(e);
33820 };
33821 let name = f.name.to_ascii_uppercase();
33822 let mut args = f.args;
33823
33824 /// Helper to extract unit string from an identifier, column, or literal expression
33825 fn get_unit_str(expr: &Expression) -> String {
33826 match expr {
33827 Expression::Identifier(id) => id.name.to_ascii_uppercase(),
33828 Expression::Var(v) => v.this.to_ascii_uppercase(),
33829 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)) => {
33830 let Literal::String(s) = lit.as_ref() else {
33831 unreachable!()
33832 };
33833 s.to_ascii_uppercase()
33834 }
33835 Expression::Column(col) => col.name.name.to_ascii_uppercase(),
33836 // Handle WEEK(MONDAY), WEEK(SUNDAY) etc. which are parsed as Function("WEEK", [Column("MONDAY")])
33837 Expression::Function(f) => {
33838 let base = f.name.to_ascii_uppercase();
33839 if !f.args.is_empty() {
33840 // e.g., WEEK(MONDAY) -> "WEEK(MONDAY)"
33841 let inner = get_unit_str(&f.args[0]);
33842 format!("{}({})", base, inner)
33843 } else {
33844 base
33845 }
33846 }
33847 _ => "DAY".to_string(),
33848 }
33849 }
33850
33851 /// Parse unit string to IntervalUnit
33852 fn parse_interval_unit(s: &str) -> crate::expressions::IntervalUnit {
33853 match s {
33854 "YEAR" => crate::expressions::IntervalUnit::Year,
33855 "QUARTER" => crate::expressions::IntervalUnit::Quarter,
33856 "MONTH" => crate::expressions::IntervalUnit::Month,
33857 "WEEK" | "ISOWEEK" => crate::expressions::IntervalUnit::Week,
33858 "DAY" => crate::expressions::IntervalUnit::Day,
33859 "HOUR" => crate::expressions::IntervalUnit::Hour,
33860 "MINUTE" => crate::expressions::IntervalUnit::Minute,
33861 "SECOND" => crate::expressions::IntervalUnit::Second,
33862 "MILLISECOND" => crate::expressions::IntervalUnit::Millisecond,
33863 "MICROSECOND" => crate::expressions::IntervalUnit::Microsecond,
33864 _ if s.starts_with("WEEK(") => crate::expressions::IntervalUnit::Week,
33865 _ => crate::expressions::IntervalUnit::Day,
33866 }
33867 }
33868
33869 match name.as_str() {
33870 // TIMESTAMP_DIFF(date1, date2, unit) -> TIMESTAMPDIFF(unit, date2, date1)
33871 // (BigQuery: result = date1 - date2, Standard: result = end - start)
33872 "TIMESTAMP_DIFF" | "DATETIME_DIFF" | "TIME_DIFF" if args.len() == 3 => {
33873 let date1 = args.remove(0);
33874 let date2 = args.remove(0);
33875 let unit_expr = args.remove(0);
33876 let unit_str = get_unit_str(&unit_expr);
33877
33878 if matches!(target, DialectType::BigQuery) {
33879 // BigQuery -> BigQuery: just uppercase the unit
33880 let unit = Expression::Identifier(Identifier::new(unit_str.clone()));
33881 return Ok(Expression::Function(Box::new(Function::new(
33882 f.name,
33883 vec![date1, date2, unit],
33884 ))));
33885 }
33886
33887 // For Snowflake: use TimestampDiff expression so it generates TIMESTAMPDIFF
33888 // (Function("TIMESTAMPDIFF") would be converted to DATEDIFF by Snowflake's function normalization)
33889 if matches!(target, DialectType::Snowflake) {
33890 return Ok(Expression::TimestampDiff(Box::new(
33891 crate::expressions::TimestampDiff {
33892 this: Box::new(date2),
33893 expression: Box::new(date1),
33894 unit: Some(unit_str),
33895 },
33896 )));
33897 }
33898
33899 // For DuckDB: DATE_DIFF('UNIT', start, end) with proper CAST
33900 if matches!(target, DialectType::DuckDB) {
33901 let (cast_d1, cast_d2) = if name == "TIME_DIFF" {
33902 // CAST to TIME
33903 let cast_fn = |e: Expression| -> Expression {
33904 match e {
33905 Expression::Literal(lit)
33906 if matches!(lit.as_ref(), Literal::String(_)) =>
33907 {
33908 let Literal::String(s) = lit.as_ref() else {
33909 unreachable!()
33910 };
33911 Expression::Cast(Box::new(Cast {
33912 this: Expression::Literal(Box::new(Literal::String(
33913 s.clone(),
33914 ))),
33915 to: DataType::Custom {
33916 name: "TIME".to_string(),
33917 },
33918 trailing_comments: vec![],
33919 double_colon_syntax: false,
33920 format: None,
33921 default: None,
33922 inferred_type: None,
33923 }))
33924 }
33925 other => other,
33926 }
33927 };
33928 (cast_fn(date1), cast_fn(date2))
33929 } else if name == "DATETIME_DIFF" {
33930 // CAST to TIMESTAMP
33931 (
33932 Self::ensure_cast_timestamp(date1),
33933 Self::ensure_cast_timestamp(date2),
33934 )
33935 } else {
33936 // TIMESTAMP_DIFF: CAST to TIMESTAMPTZ
33937 (
33938 Self::ensure_cast_timestamptz(date1),
33939 Self::ensure_cast_timestamptz(date2),
33940 )
33941 };
33942 return Ok(Expression::Function(Box::new(Function::new(
33943 "DATE_DIFF".to_string(),
33944 vec![
33945 Expression::Literal(Box::new(Literal::String(unit_str))),
33946 cast_d2,
33947 cast_d1,
33948 ],
33949 ))));
33950 }
33951
33952 // Convert to standard TIMESTAMPDIFF(unit, start, end)
33953 let unit = Expression::Identifier(Identifier::new(unit_str));
33954 Ok(Expression::Function(Box::new(Function::new(
33955 "TIMESTAMPDIFF".to_string(),
33956 vec![unit, date2, date1],
33957 ))))
33958 }
33959
33960 // DATEDIFF(unit, start, end) -> target-specific form
33961 // Used by: Redshift, Snowflake, TSQL, Databricks, Spark
33962 "DATEDIFF" if args.len() == 3 => {
33963 let arg0 = args.remove(0);
33964 let arg1 = args.remove(0);
33965 let arg2 = args.remove(0);
33966 let unit_str = get_unit_str(&arg0);
33967
33968 // Redshift DATEDIFF(unit, start, end) order: result = end - start
33969 // Snowflake DATEDIFF(unit, start, end) order: result = end - start
33970 // TSQL DATEDIFF(unit, start, end) order: result = end - start
33971
33972 if matches!(target, DialectType::Snowflake) {
33973 // Snowflake: DATEDIFF(UNIT, start, end) - uppercase unit
33974 let unit = Expression::Identifier(Identifier::new(unit_str));
33975 return Ok(Expression::Function(Box::new(Function::new(
33976 "DATEDIFF".to_string(),
33977 vec![unit, arg1, arg2],
33978 ))));
33979 }
33980
33981 if matches!(target, DialectType::DuckDB) {
33982 // DuckDB: DATE_DIFF('UNIT', start, end) with CAST
33983 let cast_d1 = Self::ensure_cast_timestamp(arg1);
33984 let cast_d2 = Self::ensure_cast_timestamp(arg2);
33985 return Ok(Expression::Function(Box::new(Function::new(
33986 "DATE_DIFF".to_string(),
33987 vec![
33988 Expression::Literal(Box::new(Literal::String(unit_str))),
33989 cast_d1,
33990 cast_d2,
33991 ],
33992 ))));
33993 }
33994
33995 if matches!(target, DialectType::BigQuery) {
33996 // BigQuery: DATE_DIFF(end_date, start_date, UNIT) - reversed args, CAST to DATETIME
33997 let cast_d1 = Self::ensure_cast_datetime(arg1);
33998 let cast_d2 = Self::ensure_cast_datetime(arg2);
33999 let unit = Expression::Identifier(Identifier::new(unit_str));
34000 return Ok(Expression::Function(Box::new(Function::new(
34001 "DATE_DIFF".to_string(),
34002 vec![cast_d2, cast_d1, unit],
34003 ))));
34004 }
34005
34006 if matches!(target, DialectType::Spark | DialectType::Databricks) {
34007 // Spark/Databricks: DATEDIFF(UNIT, start, end) - uppercase unit
34008 let unit = Expression::Identifier(Identifier::new(unit_str));
34009 return Ok(Expression::Function(Box::new(Function::new(
34010 "DATEDIFF".to_string(),
34011 vec![unit, arg1, arg2],
34012 ))));
34013 }
34014
34015 if matches!(target, DialectType::Hive) {
34016 // Hive: DATEDIFF(end, start) for DAY only, use MONTHS_BETWEEN for MONTH
34017 match unit_str.as_str() {
34018 "MONTH" => {
34019 return Ok(Expression::Function(Box::new(Function::new(
34020 "CAST".to_string(),
34021 vec![Expression::Function(Box::new(Function::new(
34022 "MONTHS_BETWEEN".to_string(),
34023 vec![arg2, arg1],
34024 )))],
34025 ))));
34026 }
34027 "WEEK" => {
34028 return Ok(Expression::Cast(Box::new(Cast {
34029 this: Expression::Div(Box::new(crate::expressions::BinaryOp::new(
34030 Expression::Function(Box::new(Function::new(
34031 "DATEDIFF".to_string(),
34032 vec![arg2, arg1],
34033 ))),
34034 Expression::Literal(Box::new(Literal::Number("7".to_string()))),
34035 ))),
34036 to: DataType::Int {
34037 length: None,
34038 integer_spelling: false,
34039 },
34040 trailing_comments: vec![],
34041 double_colon_syntax: false,
34042 format: None,
34043 default: None,
34044 inferred_type: None,
34045 })));
34046 }
34047 _ => {
34048 // Default: DATEDIFF(end, start) for DAY
34049 return Ok(Expression::Function(Box::new(Function::new(
34050 "DATEDIFF".to_string(),
34051 vec![arg2, arg1],
34052 ))));
34053 }
34054 }
34055 }
34056
34057 if matches!(
34058 target,
34059 DialectType::Presto | DialectType::Trino | DialectType::Athena
34060 ) {
34061 // Presto/Trino: DATE_DIFF('UNIT', start, end)
34062 return Ok(Expression::Function(Box::new(Function::new(
34063 "DATE_DIFF".to_string(),
34064 vec![
34065 Expression::Literal(Box::new(Literal::String(unit_str))),
34066 arg1,
34067 arg2,
34068 ],
34069 ))));
34070 }
34071
34072 if matches!(target, DialectType::TSQL) {
34073 // TSQL: DATEDIFF(UNIT, start, CAST(end AS DATETIME2))
34074 let cast_d2 = Self::ensure_cast_datetime2(arg2);
34075 let unit = Expression::Identifier(Identifier::new(unit_str));
34076 return Ok(Expression::Function(Box::new(Function::new(
34077 "DATEDIFF".to_string(),
34078 vec![unit, arg1, cast_d2],
34079 ))));
34080 }
34081
34082 if matches!(target, DialectType::PostgreSQL) {
34083 // PostgreSQL doesn't have DATEDIFF - use date subtraction or EXTRACT
34084 // For now, use DATEDIFF (passthrough) with uppercased unit
34085 let unit = Expression::Identifier(Identifier::new(unit_str));
34086 return Ok(Expression::Function(Box::new(Function::new(
34087 "DATEDIFF".to_string(),
34088 vec![unit, arg1, arg2],
34089 ))));
34090 }
34091
34092 // Default: DATEDIFF(UNIT, start, end) with uppercase unit
34093 let unit = Expression::Identifier(Identifier::new(unit_str));
34094 Ok(Expression::Function(Box::new(Function::new(
34095 "DATEDIFF".to_string(),
34096 vec![unit, arg1, arg2],
34097 ))))
34098 }
34099
34100 // DATE_DIFF(date1, date2, unit) -> standard form
34101 "DATE_DIFF" if args.len() == 3 => {
34102 let date1 = args.remove(0);
34103 let date2 = args.remove(0);
34104 let unit_expr = args.remove(0);
34105 let unit_str = get_unit_str(&unit_expr);
34106
34107 if matches!(target, DialectType::BigQuery) {
34108 // BigQuery -> BigQuery: just uppercase the unit, normalize WEEK(SUNDAY) -> WEEK
34109 let norm_unit = if unit_str == "WEEK(SUNDAY)" {
34110 "WEEK".to_string()
34111 } else {
34112 unit_str
34113 };
34114 let norm_d1 = Self::date_literal_to_cast(date1);
34115 let norm_d2 = Self::date_literal_to_cast(date2);
34116 let unit = Expression::Identifier(Identifier::new(norm_unit));
34117 return Ok(Expression::Function(Box::new(Function::new(
34118 f.name,
34119 vec![norm_d1, norm_d2, unit],
34120 ))));
34121 }
34122
34123 if matches!(target, DialectType::MySQL) {
34124 // MySQL DATEDIFF only takes 2 args (date1, date2), returns day difference
34125 let norm_d1 = Self::date_literal_to_cast(date1);
34126 let norm_d2 = Self::date_literal_to_cast(date2);
34127 return Ok(Expression::Function(Box::new(Function::new(
34128 "DATEDIFF".to_string(),
34129 vec![norm_d1, norm_d2],
34130 ))));
34131 }
34132
34133 if matches!(target, DialectType::StarRocks) {
34134 // StarRocks: DATE_DIFF('UNIT', date1, date2) - unit as string, args NOT swapped
34135 let norm_d1 = Self::date_literal_to_cast(date1);
34136 let norm_d2 = Self::date_literal_to_cast(date2);
34137 return Ok(Expression::Function(Box::new(Function::new(
34138 "DATE_DIFF".to_string(),
34139 vec![
34140 Expression::Literal(Box::new(Literal::String(unit_str))),
34141 norm_d1,
34142 norm_d2,
34143 ],
34144 ))));
34145 }
34146
34147 if matches!(target, DialectType::DuckDB) {
34148 // DuckDB: DATE_DIFF('UNIT', date2, date1) with proper CAST for dates
34149 let norm_d1 = Self::ensure_cast_date(date1);
34150 let norm_d2 = Self::ensure_cast_date(date2);
34151
34152 // Handle WEEK variants: WEEK(MONDAY)/WEEK(SUNDAY)/ISOWEEK/WEEK
34153 let is_week_variant = unit_str == "WEEK"
34154 || unit_str.starts_with("WEEK(")
34155 || unit_str == "ISOWEEK";
34156 if is_week_variant {
34157 // For DuckDB, WEEK-based diffs use DATE_TRUNC approach
34158 // WEEK(MONDAY) / ISOWEEK: DATE_DIFF('WEEK', DATE_TRUNC('WEEK', d2), DATE_TRUNC('WEEK', d1))
34159 // WEEK / WEEK(SUNDAY): DATE_DIFF('WEEK', DATE_TRUNC('WEEK', d2 + INTERVAL '1' DAY), DATE_TRUNC('WEEK', d1 + INTERVAL '1' DAY))
34160 // WEEK(SATURDAY): DATE_DIFF('WEEK', DATE_TRUNC('WEEK', d2 + INTERVAL '-5' DAY), DATE_TRUNC('WEEK', d1 + INTERVAL '-5' DAY))
34161 let day_offset = if unit_str == "WEEK(MONDAY)" || unit_str == "ISOWEEK" {
34162 None // ISO weeks start on Monday, aligned with DATE_TRUNC('WEEK')
34163 } else if unit_str == "WEEK" || unit_str == "WEEK(SUNDAY)" {
34164 Some("1") // Shift Sunday to Monday alignment
34165 } else if unit_str == "WEEK(SATURDAY)" {
34166 Some("-5")
34167 } else if unit_str == "WEEK(TUESDAY)" {
34168 Some("-1")
34169 } else if unit_str == "WEEK(WEDNESDAY)" {
34170 Some("-2")
34171 } else if unit_str == "WEEK(THURSDAY)" {
34172 Some("-3")
34173 } else if unit_str == "WEEK(FRIDAY)" {
34174 Some("-4")
34175 } else {
34176 Some("1") // default to Sunday
34177 };
34178
34179 let make_trunc = |date: Expression, offset: Option<&str>| -> Expression {
34180 let shifted = if let Some(off) = offset {
34181 let interval =
34182 Expression::Interval(Box::new(crate::expressions::Interval {
34183 this: Some(Expression::Literal(Box::new(Literal::String(
34184 off.to_string(),
34185 )))),
34186 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
34187 unit: crate::expressions::IntervalUnit::Day,
34188 use_plural: false,
34189 }),
34190 }));
34191 Expression::Add(Box::new(crate::expressions::BinaryOp::new(
34192 date, interval,
34193 )))
34194 } else {
34195 date
34196 };
34197 Expression::Function(Box::new(Function::new(
34198 "DATE_TRUNC".to_string(),
34199 vec![
34200 Expression::Literal(Box::new(Literal::String(
34201 "WEEK".to_string(),
34202 ))),
34203 shifted,
34204 ],
34205 )))
34206 };
34207
34208 let trunc_d2 = make_trunc(norm_d2, day_offset);
34209 let trunc_d1 = make_trunc(norm_d1, day_offset);
34210 return Ok(Expression::Function(Box::new(Function::new(
34211 "DATE_DIFF".to_string(),
34212 vec![
34213 Expression::Literal(Box::new(Literal::String("WEEK".to_string()))),
34214 trunc_d2,
34215 trunc_d1,
34216 ],
34217 ))));
34218 }
34219
34220 return Ok(Expression::Function(Box::new(Function::new(
34221 "DATE_DIFF".to_string(),
34222 vec![
34223 Expression::Literal(Box::new(Literal::String(unit_str))),
34224 norm_d2,
34225 norm_d1,
34226 ],
34227 ))));
34228 }
34229
34230 // Default: DATEDIFF(unit, date2, date1)
34231 let unit = Expression::Identifier(Identifier::new(unit_str));
34232 Ok(Expression::Function(Box::new(Function::new(
34233 "DATEDIFF".to_string(),
34234 vec![unit, date2, date1],
34235 ))))
34236 }
34237
34238 // TIMESTAMP_ADD(ts, INTERVAL n UNIT) -> target-specific
34239 "TIMESTAMP_ADD" | "DATETIME_ADD" | "TIME_ADD" if args.len() == 2 => {
34240 let ts = args.remove(0);
34241 let interval_expr = args.remove(0);
34242 let (val, unit) =
34243 Self::extract_interval_parts(&interval_expr).unwrap_or_else(|| {
34244 (interval_expr.clone(), crate::expressions::IntervalUnit::Day)
34245 });
34246
34247 match target {
34248 DialectType::Snowflake => {
34249 // TIMESTAMPADD(UNIT, val, CAST(ts AS TIMESTAMPTZ))
34250 // Use TimestampAdd expression so Snowflake generates TIMESTAMPADD
34251 // (Function("TIMESTAMPADD") would be converted to DATEADD by Snowflake's function normalization)
34252 let unit_str = Self::interval_unit_to_string(&unit);
34253 let cast_ts = Self::maybe_cast_ts_to_tz(ts, &name);
34254 Ok(Expression::TimestampAdd(Box::new(
34255 crate::expressions::TimestampAdd {
34256 this: Box::new(val),
34257 expression: Box::new(cast_ts),
34258 unit: Some(unit_str.to_string()),
34259 },
34260 )))
34261 }
34262 DialectType::Spark | DialectType::Databricks => {
34263 if name == "DATETIME_ADD" && matches!(target, DialectType::Spark) {
34264 // Spark DATETIME_ADD: ts + INTERVAL val UNIT
34265 let interval =
34266 Expression::Interval(Box::new(crate::expressions::Interval {
34267 this: Some(val),
34268 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
34269 unit,
34270 use_plural: false,
34271 }),
34272 }));
34273 Ok(Expression::Add(Box::new(
34274 crate::expressions::BinaryOp::new(ts, interval),
34275 )))
34276 } else if name == "DATETIME_ADD"
34277 && matches!(target, DialectType::Databricks)
34278 {
34279 // Databricks DATETIME_ADD: TIMESTAMPADD(UNIT, val, ts)
34280 let unit_str = Self::interval_unit_to_string(&unit);
34281 Ok(Expression::Function(Box::new(Function::new(
34282 "TIMESTAMPADD".to_string(),
34283 vec![Expression::Identifier(Identifier::new(unit_str)), val, ts],
34284 ))))
34285 } else {
34286 // Presto-style: DATE_ADD('unit', val, CAST(ts AS TIMESTAMP))
34287 let unit_str = Self::interval_unit_to_string(&unit);
34288 let cast_ts =
34289 if name.starts_with("TIMESTAMP") || name.starts_with("DATETIME") {
34290 Self::maybe_cast_ts(ts)
34291 } else {
34292 ts
34293 };
34294 Ok(Expression::Function(Box::new(Function::new(
34295 "DATE_ADD".to_string(),
34296 vec![
34297 Expression::Identifier(Identifier::new(unit_str)),
34298 val,
34299 cast_ts,
34300 ],
34301 ))))
34302 }
34303 }
34304 DialectType::MySQL => {
34305 // DATE_ADD(TIMESTAMP(ts), INTERVAL val UNIT) for MySQL
34306 let mysql_ts = if name.starts_with("TIMESTAMP") {
34307 // Check if already wrapped in TIMESTAMP() function (from cross-dialect normalization)
34308 match &ts {
34309 Expression::Function(ref inner_f)
34310 if inner_f.name.eq_ignore_ascii_case("TIMESTAMP") =>
34311 {
34312 // Already wrapped, keep as-is
34313 ts
34314 }
34315 _ => {
34316 // Unwrap typed literals: TIMESTAMP '...' -> '...' for TIMESTAMP() wrapper
34317 let unwrapped = match ts {
34318 Expression::Literal(lit)
34319 if matches!(lit.as_ref(), Literal::Timestamp(_)) =>
34320 {
34321 let Literal::Timestamp(s) = lit.as_ref() else {
34322 unreachable!()
34323 };
34324 Expression::Literal(Box::new(Literal::String(
34325 s.clone(),
34326 )))
34327 }
34328 other => other,
34329 };
34330 Expression::Function(Box::new(Function::new(
34331 "TIMESTAMP".to_string(),
34332 vec![unwrapped],
34333 )))
34334 }
34335 }
34336 } else {
34337 ts
34338 };
34339 Ok(Expression::DateAdd(Box::new(
34340 crate::expressions::DateAddFunc {
34341 this: mysql_ts,
34342 interval: val,
34343 unit,
34344 },
34345 )))
34346 }
34347 _ => {
34348 // DuckDB and others use DateAdd expression (DuckDB converts to + INTERVAL)
34349 let cast_ts = if matches!(target, DialectType::DuckDB) {
34350 if name == "DATETIME_ADD" {
34351 Self::ensure_cast_timestamp(ts)
34352 } else if name.starts_with("TIMESTAMP") {
34353 Self::maybe_cast_ts_to_tz(ts, &name)
34354 } else {
34355 ts
34356 }
34357 } else {
34358 ts
34359 };
34360 Ok(Expression::DateAdd(Box::new(
34361 crate::expressions::DateAddFunc {
34362 this: cast_ts,
34363 interval: val,
34364 unit,
34365 },
34366 )))
34367 }
34368 }
34369 }
34370
34371 // TIMESTAMP_SUB(ts, INTERVAL n UNIT) -> target-specific
34372 "TIMESTAMP_SUB" | "DATETIME_SUB" | "TIME_SUB" if args.len() == 2 => {
34373 let ts = args.remove(0);
34374 let interval_expr = args.remove(0);
34375 let (val, unit) =
34376 Self::extract_interval_parts(&interval_expr).unwrap_or_else(|| {
34377 (interval_expr.clone(), crate::expressions::IntervalUnit::Day)
34378 });
34379
34380 match target {
34381 DialectType::Snowflake => {
34382 // TIMESTAMPADD(UNIT, val * -1, CAST(ts AS TIMESTAMPTZ))
34383 let unit_str = Self::interval_unit_to_string(&unit);
34384 let cast_ts = Self::maybe_cast_ts_to_tz(ts, &name);
34385 let neg_val = Expression::Mul(Box::new(crate::expressions::BinaryOp::new(
34386 val,
34387 Expression::Neg(Box::new(crate::expressions::UnaryOp {
34388 this: Expression::number(1),
34389 inferred_type: None,
34390 })),
34391 )));
34392 Ok(Expression::TimestampAdd(Box::new(
34393 crate::expressions::TimestampAdd {
34394 this: Box::new(neg_val),
34395 expression: Box::new(cast_ts),
34396 unit: Some(unit_str.to_string()),
34397 },
34398 )))
34399 }
34400 DialectType::Spark | DialectType::Databricks => {
34401 if (name == "DATETIME_SUB" && matches!(target, DialectType::Spark))
34402 || (name == "TIMESTAMP_SUB" && matches!(target, DialectType::Spark))
34403 {
34404 // Spark: ts - INTERVAL val UNIT
34405 let cast_ts = if name.starts_with("TIMESTAMP") {
34406 Self::maybe_cast_ts(ts)
34407 } else {
34408 ts
34409 };
34410 let interval =
34411 Expression::Interval(Box::new(crate::expressions::Interval {
34412 this: Some(val),
34413 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
34414 unit,
34415 use_plural: false,
34416 }),
34417 }));
34418 Ok(Expression::Sub(Box::new(
34419 crate::expressions::BinaryOp::new(cast_ts, interval),
34420 )))
34421 } else {
34422 // Databricks: TIMESTAMPADD(UNIT, val * -1, ts)
34423 let unit_str = Self::interval_unit_to_string(&unit);
34424 let neg_val =
34425 Expression::Mul(Box::new(crate::expressions::BinaryOp::new(
34426 val,
34427 Expression::Neg(Box::new(crate::expressions::UnaryOp {
34428 this: Expression::number(1),
34429 inferred_type: None,
34430 })),
34431 )));
34432 Ok(Expression::Function(Box::new(Function::new(
34433 "TIMESTAMPADD".to_string(),
34434 vec![
34435 Expression::Identifier(Identifier::new(unit_str)),
34436 neg_val,
34437 ts,
34438 ],
34439 ))))
34440 }
34441 }
34442 DialectType::MySQL => {
34443 let mysql_ts = if name.starts_with("TIMESTAMP") {
34444 // Check if already wrapped in TIMESTAMP() function (from cross-dialect normalization)
34445 match &ts {
34446 Expression::Function(ref inner_f)
34447 if inner_f.name.eq_ignore_ascii_case("TIMESTAMP") =>
34448 {
34449 // Already wrapped, keep as-is
34450 ts
34451 }
34452 _ => {
34453 let unwrapped = match ts {
34454 Expression::Literal(lit)
34455 if matches!(lit.as_ref(), Literal::Timestamp(_)) =>
34456 {
34457 let Literal::Timestamp(s) = lit.as_ref() else {
34458 unreachable!()
34459 };
34460 Expression::Literal(Box::new(Literal::String(
34461 s.clone(),
34462 )))
34463 }
34464 other => other,
34465 };
34466 Expression::Function(Box::new(Function::new(
34467 "TIMESTAMP".to_string(),
34468 vec![unwrapped],
34469 )))
34470 }
34471 }
34472 } else {
34473 ts
34474 };
34475 Ok(Expression::DateSub(Box::new(
34476 crate::expressions::DateAddFunc {
34477 this: mysql_ts,
34478 interval: val,
34479 unit,
34480 },
34481 )))
34482 }
34483 _ => {
34484 let cast_ts = if matches!(target, DialectType::DuckDB) {
34485 if name == "DATETIME_SUB" {
34486 Self::ensure_cast_timestamp(ts)
34487 } else if name.starts_with("TIMESTAMP") {
34488 Self::maybe_cast_ts_to_tz(ts, &name)
34489 } else {
34490 ts
34491 }
34492 } else {
34493 ts
34494 };
34495 Ok(Expression::DateSub(Box::new(
34496 crate::expressions::DateAddFunc {
34497 this: cast_ts,
34498 interval: val,
34499 unit,
34500 },
34501 )))
34502 }
34503 }
34504 }
34505
34506 // DATE_SUB(date, INTERVAL n UNIT) -> target-specific
34507 "DATE_SUB" if args.len() == 2 => {
34508 let date = args.remove(0);
34509 let interval_expr = args.remove(0);
34510 let (val, unit) =
34511 Self::extract_interval_parts(&interval_expr).unwrap_or_else(|| {
34512 (interval_expr.clone(), crate::expressions::IntervalUnit::Day)
34513 });
34514
34515 match target {
34516 DialectType::Databricks | DialectType::Spark => {
34517 // Databricks/Spark: DATE_ADD(date, -val)
34518 // Use DateAdd expression with negative val so it generates correctly
34519 // The generator will output DATE_ADD(date, INTERVAL -val DAY)
34520 // Then Databricks transform converts 2-arg DATE_ADD(date, interval) to DATEADD(DAY, interval, date)
34521 // Instead, we directly output as a simple negated DateSub
34522 Ok(Expression::DateSub(Box::new(
34523 crate::expressions::DateAddFunc {
34524 this: date,
34525 interval: val,
34526 unit,
34527 },
34528 )))
34529 }
34530 DialectType::DuckDB => {
34531 // DuckDB: CAST(date AS DATE) - INTERVAL 'val' UNIT
34532 let cast_date = Self::ensure_cast_date(date);
34533 let interval =
34534 Expression::Interval(Box::new(crate::expressions::Interval {
34535 this: Some(val),
34536 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
34537 unit,
34538 use_plural: false,
34539 }),
34540 }));
34541 Ok(Expression::Sub(Box::new(
34542 crate::expressions::BinaryOp::new(cast_date, interval),
34543 )))
34544 }
34545 DialectType::Snowflake => {
34546 // Snowflake: Let Snowflake's own DateSub -> DATEADD(UNIT, val * -1, date) handler work
34547 // Just ensure the date is cast properly
34548 let cast_date = Self::ensure_cast_date(date);
34549 Ok(Expression::DateSub(Box::new(
34550 crate::expressions::DateAddFunc {
34551 this: cast_date,
34552 interval: val,
34553 unit,
34554 },
34555 )))
34556 }
34557 DialectType::PostgreSQL => {
34558 // PostgreSQL: date - INTERVAL 'val UNIT'
34559 let unit_str = Self::interval_unit_to_string(&unit);
34560 let interval =
34561 Expression::Interval(Box::new(crate::expressions::Interval {
34562 this: Some(Expression::Literal(Box::new(Literal::String(
34563 format!("{} {}", Self::expr_to_string(&val), unit_str),
34564 )))),
34565 unit: None,
34566 }));
34567 Ok(Expression::Sub(Box::new(
34568 crate::expressions::BinaryOp::new(date, interval),
34569 )))
34570 }
34571 _ => Ok(Expression::DateSub(Box::new(
34572 crate::expressions::DateAddFunc {
34573 this: date,
34574 interval: val,
34575 unit,
34576 },
34577 ))),
34578 }
34579 }
34580
34581 // DATEADD(unit, val, date) -> target-specific form
34582 // Used by: Redshift, Snowflake, TSQL, ClickHouse
34583 "DATEADD" if args.len() == 3 => {
34584 let arg0 = args.remove(0);
34585 let arg1 = args.remove(0);
34586 let arg2 = args.remove(0);
34587 let unit_str = get_unit_str(&arg0);
34588
34589 if matches!(target, DialectType::Snowflake | DialectType::TSQL) {
34590 // Keep DATEADD(UNIT, val, date) with uppercased unit
34591 let unit = Expression::Identifier(Identifier::new(unit_str));
34592 // Only CAST to DATETIME2 for TSQL target when source is NOT Spark/Databricks family
34593 let date = if matches!(target, DialectType::TSQL)
34594 && !matches!(
34595 source,
34596 DialectType::Spark | DialectType::Databricks | DialectType::Hive
34597 ) {
34598 Self::ensure_cast_datetime2(arg2)
34599 } else {
34600 arg2
34601 };
34602 return Ok(Expression::Function(Box::new(Function::new(
34603 "DATEADD".to_string(),
34604 vec![unit, arg1, date],
34605 ))));
34606 }
34607
34608 if matches!(target, DialectType::DuckDB) {
34609 // DuckDB: date + INTERVAL 'val' UNIT
34610 let iu = parse_interval_unit(&unit_str);
34611 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
34612 this: Some(arg1),
34613 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
34614 unit: iu,
34615 use_plural: false,
34616 }),
34617 }));
34618 let cast_date = Self::ensure_cast_timestamp(arg2);
34619 return Ok(Expression::Add(Box::new(
34620 crate::expressions::BinaryOp::new(cast_date, interval),
34621 )));
34622 }
34623
34624 if matches!(target, DialectType::BigQuery) {
34625 // BigQuery: DATE_ADD(date, INTERVAL val UNIT) or TIMESTAMP_ADD(ts, INTERVAL val UNIT)
34626 let iu = parse_interval_unit(&unit_str);
34627 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
34628 this: Some(arg1),
34629 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
34630 unit: iu,
34631 use_plural: false,
34632 }),
34633 }));
34634 return Ok(Expression::Function(Box::new(Function::new(
34635 "DATE_ADD".to_string(),
34636 vec![arg2, interval],
34637 ))));
34638 }
34639
34640 if matches!(target, DialectType::Databricks) {
34641 // Databricks: keep DATEADD(UNIT, val, date) format
34642 let unit = Expression::Identifier(Identifier::new(unit_str));
34643 return Ok(Expression::Function(Box::new(Function::new(
34644 "DATEADD".to_string(),
34645 vec![unit, arg1, arg2],
34646 ))));
34647 }
34648
34649 if matches!(target, DialectType::Spark) {
34650 // Spark: convert month-based units to ADD_MONTHS, rest to DATE_ADD
34651 fn multiply_expr_dateadd(expr: Expression, factor: i64) -> Expression {
34652 if let Expression::Literal(lit) = &expr {
34653 if let crate::expressions::Literal::Number(n) = lit.as_ref() {
34654 if let Ok(val) = n.parse::<i64>() {
34655 return Expression::Literal(Box::new(
34656 crate::expressions::Literal::Number(
34657 (val * factor).to_string(),
34658 ),
34659 ));
34660 }
34661 }
34662 }
34663 Expression::Mul(Box::new(crate::expressions::BinaryOp::new(
34664 expr,
34665 Expression::Literal(Box::new(crate::expressions::Literal::Number(
34666 factor.to_string(),
34667 ))),
34668 )))
34669 }
34670 match unit_str.as_str() {
34671 "YEAR" => {
34672 let months = multiply_expr_dateadd(arg1, 12);
34673 return Ok(Expression::Function(Box::new(Function::new(
34674 "ADD_MONTHS".to_string(),
34675 vec![arg2, months],
34676 ))));
34677 }
34678 "QUARTER" => {
34679 let months = multiply_expr_dateadd(arg1, 3);
34680 return Ok(Expression::Function(Box::new(Function::new(
34681 "ADD_MONTHS".to_string(),
34682 vec![arg2, months],
34683 ))));
34684 }
34685 "MONTH" => {
34686 return Ok(Expression::Function(Box::new(Function::new(
34687 "ADD_MONTHS".to_string(),
34688 vec![arg2, arg1],
34689 ))));
34690 }
34691 "WEEK" => {
34692 let days = multiply_expr_dateadd(arg1, 7);
34693 return Ok(Expression::Function(Box::new(Function::new(
34694 "DATE_ADD".to_string(),
34695 vec![arg2, days],
34696 ))));
34697 }
34698 "DAY" => {
34699 return Ok(Expression::Function(Box::new(Function::new(
34700 "DATE_ADD".to_string(),
34701 vec![arg2, arg1],
34702 ))));
34703 }
34704 _ => {
34705 let unit = Expression::Identifier(Identifier::new(unit_str));
34706 return Ok(Expression::Function(Box::new(Function::new(
34707 "DATE_ADD".to_string(),
34708 vec![unit, arg1, arg2],
34709 ))));
34710 }
34711 }
34712 }
34713
34714 if matches!(target, DialectType::Hive) {
34715 // Hive: DATE_ADD(date, val) for DAY, or date + INTERVAL for others
34716 match unit_str.as_str() {
34717 "DAY" => {
34718 return Ok(Expression::Function(Box::new(Function::new(
34719 "DATE_ADD".to_string(),
34720 vec![arg2, arg1],
34721 ))));
34722 }
34723 "MONTH" => {
34724 return Ok(Expression::Function(Box::new(Function::new(
34725 "ADD_MONTHS".to_string(),
34726 vec![arg2, arg1],
34727 ))));
34728 }
34729 _ => {
34730 let iu = parse_interval_unit(&unit_str);
34731 let interval =
34732 Expression::Interval(Box::new(crate::expressions::Interval {
34733 this: Some(arg1),
34734 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
34735 unit: iu,
34736 use_plural: false,
34737 }),
34738 }));
34739 return Ok(Expression::Add(Box::new(
34740 crate::expressions::BinaryOp::new(arg2, interval),
34741 )));
34742 }
34743 }
34744 }
34745
34746 if matches!(target, DialectType::PostgreSQL) {
34747 // PostgreSQL: date + INTERVAL 'val UNIT'
34748 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
34749 this: Some(Expression::Literal(Box::new(Literal::String(format!(
34750 "{} {}",
34751 Self::expr_to_string(&arg1),
34752 unit_str
34753 ))))),
34754 unit: None,
34755 }));
34756 return Ok(Expression::Add(Box::new(
34757 crate::expressions::BinaryOp::new(arg2, interval),
34758 )));
34759 }
34760
34761 if matches!(
34762 target,
34763 DialectType::Presto | DialectType::Trino | DialectType::Athena
34764 ) {
34765 // Presto/Trino: DATE_ADD('UNIT', val, date)
34766 return Ok(Expression::Function(Box::new(Function::new(
34767 "DATE_ADD".to_string(),
34768 vec![
34769 Expression::Literal(Box::new(Literal::String(unit_str))),
34770 arg1,
34771 arg2,
34772 ],
34773 ))));
34774 }
34775
34776 if matches!(target, DialectType::ClickHouse) {
34777 // ClickHouse: DATE_ADD(UNIT, val, date)
34778 let unit = Expression::Identifier(Identifier::new(unit_str));
34779 return Ok(Expression::Function(Box::new(Function::new(
34780 "DATE_ADD".to_string(),
34781 vec![unit, arg1, arg2],
34782 ))));
34783 }
34784
34785 // Default: keep DATEADD with uppercased unit
34786 let unit = Expression::Identifier(Identifier::new(unit_str));
34787 Ok(Expression::Function(Box::new(Function::new(
34788 "DATEADD".to_string(),
34789 vec![unit, arg1, arg2],
34790 ))))
34791 }
34792
34793 // DATE_ADD(unit, val, date) - 3 arg form from ClickHouse/Presto
34794 "DATE_ADD" if args.len() == 3 => {
34795 let arg0 = args.remove(0);
34796 let arg1 = args.remove(0);
34797 let arg2 = args.remove(0);
34798 let unit_str = get_unit_str(&arg0);
34799
34800 if matches!(
34801 target,
34802 DialectType::Presto | DialectType::Trino | DialectType::Athena
34803 ) {
34804 // Presto/Trino: DATE_ADD('UNIT', val, date)
34805 return Ok(Expression::Function(Box::new(Function::new(
34806 "DATE_ADD".to_string(),
34807 vec![
34808 Expression::Literal(Box::new(Literal::String(unit_str))),
34809 arg1,
34810 arg2,
34811 ],
34812 ))));
34813 }
34814
34815 if matches!(
34816 target,
34817 DialectType::Snowflake | DialectType::TSQL | DialectType::Redshift
34818 ) {
34819 // DATEADD(UNIT, val, date)
34820 let unit = Expression::Identifier(Identifier::new(unit_str));
34821 let date = if matches!(target, DialectType::TSQL) {
34822 Self::ensure_cast_datetime2(arg2)
34823 } else {
34824 arg2
34825 };
34826 return Ok(Expression::Function(Box::new(Function::new(
34827 "DATEADD".to_string(),
34828 vec![unit, arg1, date],
34829 ))));
34830 }
34831
34832 if matches!(target, DialectType::DuckDB) {
34833 // DuckDB: date + INTERVAL val UNIT
34834 let iu = parse_interval_unit(&unit_str);
34835 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
34836 this: Some(arg1),
34837 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
34838 unit: iu,
34839 use_plural: false,
34840 }),
34841 }));
34842 return Ok(Expression::Add(Box::new(
34843 crate::expressions::BinaryOp::new(arg2, interval),
34844 )));
34845 }
34846
34847 if matches!(target, DialectType::Spark | DialectType::Databricks) {
34848 // Spark: DATE_ADD(UNIT, val, date) with uppercased unit
34849 let unit = Expression::Identifier(Identifier::new(unit_str));
34850 return Ok(Expression::Function(Box::new(Function::new(
34851 "DATE_ADD".to_string(),
34852 vec![unit, arg1, arg2],
34853 ))));
34854 }
34855
34856 // Default: DATE_ADD(UNIT, val, date)
34857 let unit = Expression::Identifier(Identifier::new(unit_str));
34858 Ok(Expression::Function(Box::new(Function::new(
34859 "DATE_ADD".to_string(),
34860 vec![unit, arg1, arg2],
34861 ))))
34862 }
34863
34864 // DATE_ADD(date, INTERVAL val UNIT) - 2 arg BigQuery form
34865 "DATE_ADD" if args.len() == 2 => {
34866 let date = args.remove(0);
34867 let interval_expr = args.remove(0);
34868 let (val, unit) =
34869 Self::extract_interval_parts(&interval_expr).unwrap_or_else(|| {
34870 (interval_expr.clone(), crate::expressions::IntervalUnit::Day)
34871 });
34872 let unit_str = Self::interval_unit_to_string(&unit);
34873
34874 match target {
34875 DialectType::DuckDB => {
34876 // DuckDB: CAST(date AS DATE) + INTERVAL 'val' UNIT
34877 let cast_date = Self::ensure_cast_date(date);
34878 let quoted_val = Self::quote_interval_val(&val);
34879 let interval =
34880 Expression::Interval(Box::new(crate::expressions::Interval {
34881 this: Some(quoted_val),
34882 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
34883 unit,
34884 use_plural: false,
34885 }),
34886 }));
34887 Ok(Expression::Add(Box::new(
34888 crate::expressions::BinaryOp::new(cast_date, interval),
34889 )))
34890 }
34891 DialectType::PostgreSQL => {
34892 // PostgreSQL: date + INTERVAL 'val UNIT'
34893 let interval =
34894 Expression::Interval(Box::new(crate::expressions::Interval {
34895 this: Some(Expression::Literal(Box::new(Literal::String(
34896 format!("{} {}", Self::expr_to_string(&val), unit_str),
34897 )))),
34898 unit: None,
34899 }));
34900 Ok(Expression::Add(Box::new(
34901 crate::expressions::BinaryOp::new(date, interval),
34902 )))
34903 }
34904 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
34905 // Presto: DATE_ADD('UNIT', CAST('val' AS BIGINT), date)
34906 let val_str = Self::expr_to_string(&val);
34907 Ok(Expression::Function(Box::new(Function::new(
34908 "DATE_ADD".to_string(),
34909 vec![
34910 Expression::Literal(Box::new(Literal::String(
34911 unit_str.to_string(),
34912 ))),
34913 Expression::Cast(Box::new(Cast {
34914 this: Expression::Literal(Box::new(Literal::String(val_str))),
34915 to: DataType::BigInt { length: None },
34916 trailing_comments: vec![],
34917 double_colon_syntax: false,
34918 format: None,
34919 default: None,
34920 inferred_type: None,
34921 })),
34922 date,
34923 ],
34924 ))))
34925 }
34926 DialectType::Spark | DialectType::Hive => {
34927 // Spark/Hive: DATE_ADD(date, val) for DAY
34928 match unit_str {
34929 "DAY" => Ok(Expression::Function(Box::new(Function::new(
34930 "DATE_ADD".to_string(),
34931 vec![date, val],
34932 )))),
34933 "MONTH" => Ok(Expression::Function(Box::new(Function::new(
34934 "ADD_MONTHS".to_string(),
34935 vec![date, val],
34936 )))),
34937 _ => {
34938 let iu = parse_interval_unit(&unit_str);
34939 let interval =
34940 Expression::Interval(Box::new(crate::expressions::Interval {
34941 this: Some(val),
34942 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
34943 unit: iu,
34944 use_plural: false,
34945 }),
34946 }));
34947 Ok(Expression::Function(Box::new(Function::new(
34948 "DATE_ADD".to_string(),
34949 vec![date, interval],
34950 ))))
34951 }
34952 }
34953 }
34954 DialectType::Snowflake => {
34955 // Snowflake: DATEADD(UNIT, 'val', CAST(date AS DATE))
34956 let cast_date = Self::ensure_cast_date(date);
34957 let val_str = Self::expr_to_string(&val);
34958 Ok(Expression::Function(Box::new(Function::new(
34959 "DATEADD".to_string(),
34960 vec![
34961 Expression::Identifier(Identifier::new(unit_str)),
34962 Expression::Literal(Box::new(Literal::String(val_str))),
34963 cast_date,
34964 ],
34965 ))))
34966 }
34967 DialectType::TSQL | DialectType::Fabric => {
34968 let cast_date = Self::ensure_cast_datetime2(date);
34969 Ok(Expression::Function(Box::new(Function::new(
34970 "DATEADD".to_string(),
34971 vec![
34972 Expression::Identifier(Identifier::new(unit_str)),
34973 val,
34974 cast_date,
34975 ],
34976 ))))
34977 }
34978 DialectType::Redshift => Ok(Expression::Function(Box::new(Function::new(
34979 "DATEADD".to_string(),
34980 vec![Expression::Identifier(Identifier::new(unit_str)), val, date],
34981 )))),
34982 DialectType::MySQL => {
34983 // MySQL: DATE_ADD(date, INTERVAL 'val' UNIT)
34984 let quoted_val = Self::quote_interval_val(&val);
34985 let iu = parse_interval_unit(&unit_str);
34986 let interval =
34987 Expression::Interval(Box::new(crate::expressions::Interval {
34988 this: Some(quoted_val),
34989 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
34990 unit: iu,
34991 use_plural: false,
34992 }),
34993 }));
34994 Ok(Expression::Function(Box::new(Function::new(
34995 "DATE_ADD".to_string(),
34996 vec![date, interval],
34997 ))))
34998 }
34999 DialectType::BigQuery => {
35000 // BigQuery: DATE_ADD(date, INTERVAL 'val' UNIT)
35001 let quoted_val = Self::quote_interval_val(&val);
35002 let iu = parse_interval_unit(&unit_str);
35003 let interval =
35004 Expression::Interval(Box::new(crate::expressions::Interval {
35005 this: Some(quoted_val),
35006 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
35007 unit: iu,
35008 use_plural: false,
35009 }),
35010 }));
35011 Ok(Expression::Function(Box::new(Function::new(
35012 "DATE_ADD".to_string(),
35013 vec![date, interval],
35014 ))))
35015 }
35016 DialectType::Databricks => Ok(Expression::Function(Box::new(Function::new(
35017 "DATEADD".to_string(),
35018 vec![Expression::Identifier(Identifier::new(unit_str)), val, date],
35019 )))),
35020 _ => {
35021 // Default: keep as DATE_ADD with decomposed interval
35022 Ok(Expression::DateAdd(Box::new(
35023 crate::expressions::DateAddFunc {
35024 this: date,
35025 interval: val,
35026 unit,
35027 },
35028 )))
35029 }
35030 }
35031 }
35032
35033 // ADD_MONTHS(date, val) -> target-specific form
35034 "ADD_MONTHS" if args.len() == 2 => {
35035 let date = args.remove(0);
35036 let val = args.remove(0);
35037
35038 if matches!(target, DialectType::TSQL) {
35039 // TSQL: DATEADD(MONTH, val, CAST(date AS DATETIME2))
35040 let cast_date = Self::ensure_cast_datetime2(date);
35041 return Ok(Expression::Function(Box::new(Function::new(
35042 "DATEADD".to_string(),
35043 vec![
35044 Expression::Identifier(Identifier::new("MONTH")),
35045 val,
35046 cast_date,
35047 ],
35048 ))));
35049 }
35050
35051 if matches!(target, DialectType::DuckDB) {
35052 // DuckDB: date + INTERVAL val MONTH
35053 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
35054 this: Some(val),
35055 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
35056 unit: crate::expressions::IntervalUnit::Month,
35057 use_plural: false,
35058 }),
35059 }));
35060 return Ok(Expression::Add(Box::new(
35061 crate::expressions::BinaryOp::new(date, interval),
35062 )));
35063 }
35064
35065 if matches!(target, DialectType::Snowflake) {
35066 // Snowflake: keep ADD_MONTHS when source is also Snowflake, else DATEADD
35067 if matches!(source, DialectType::Snowflake) {
35068 return Ok(Expression::Function(Box::new(Function::new(
35069 "ADD_MONTHS".to_string(),
35070 vec![date, val],
35071 ))));
35072 }
35073 return Ok(Expression::Function(Box::new(Function::new(
35074 "DATEADD".to_string(),
35075 vec![Expression::Identifier(Identifier::new("MONTH")), val, date],
35076 ))));
35077 }
35078
35079 if matches!(target, DialectType::Spark | DialectType::Databricks) {
35080 // Spark: ADD_MONTHS(date, val) - keep as is
35081 return Ok(Expression::Function(Box::new(Function::new(
35082 "ADD_MONTHS".to_string(),
35083 vec![date, val],
35084 ))));
35085 }
35086
35087 if matches!(target, DialectType::Hive) {
35088 return Ok(Expression::Function(Box::new(Function::new(
35089 "ADD_MONTHS".to_string(),
35090 vec![date, val],
35091 ))));
35092 }
35093
35094 if matches!(
35095 target,
35096 DialectType::Presto | DialectType::Trino | DialectType::Athena
35097 ) {
35098 // Presto: DATE_ADD('MONTH', val, date)
35099 return Ok(Expression::Function(Box::new(Function::new(
35100 "DATE_ADD".to_string(),
35101 vec![
35102 Expression::Literal(Box::new(Literal::String("MONTH".to_string()))),
35103 val,
35104 date,
35105 ],
35106 ))));
35107 }
35108
35109 // Default: keep ADD_MONTHS
35110 Ok(Expression::Function(Box::new(Function::new(
35111 "ADD_MONTHS".to_string(),
35112 vec![date, val],
35113 ))))
35114 }
35115
35116 // SAFE_DIVIDE(x, y) -> target-specific form directly
35117 "SAFE_DIVIDE" if args.len() == 2 => {
35118 let x = args.remove(0);
35119 let y = args.remove(0);
35120 // Wrap x and y in parens if they're complex expressions
35121 let y_ref = match &y {
35122 Expression::Column(_) | Expression::Literal(_) | Expression::Identifier(_) => {
35123 y.clone()
35124 }
35125 _ => Expression::Paren(Box::new(Paren {
35126 this: y.clone(),
35127 trailing_comments: vec![],
35128 })),
35129 };
35130 let x_ref = match &x {
35131 Expression::Column(_) | Expression::Literal(_) | Expression::Identifier(_) => {
35132 x.clone()
35133 }
35134 _ => Expression::Paren(Box::new(Paren {
35135 this: x.clone(),
35136 trailing_comments: vec![],
35137 })),
35138 };
35139 let condition = Expression::Neq(Box::new(crate::expressions::BinaryOp::new(
35140 y_ref.clone(),
35141 Expression::number(0),
35142 )));
35143 let div_expr = Expression::Div(Box::new(crate::expressions::BinaryOp::new(
35144 x_ref.clone(),
35145 y_ref.clone(),
35146 )));
35147
35148 match target {
35149 DialectType::Spark | DialectType::Databricks => Ok(Expression::Function(
35150 Box::new(Function::new("TRY_DIVIDE".to_string(), vec![x, y])),
35151 )),
35152 DialectType::DuckDB | DialectType::PostgreSQL => {
35153 // CASE WHEN y <> 0 THEN x / y ELSE NULL END
35154 let result_div = if matches!(target, DialectType::PostgreSQL) {
35155 let cast_x = Expression::Cast(Box::new(Cast {
35156 this: x_ref,
35157 to: DataType::Custom {
35158 name: "DOUBLE PRECISION".to_string(),
35159 },
35160 trailing_comments: vec![],
35161 double_colon_syntax: false,
35162 format: None,
35163 default: None,
35164 inferred_type: None,
35165 }));
35166 Expression::Div(Box::new(crate::expressions::BinaryOp::new(
35167 cast_x, y_ref,
35168 )))
35169 } else {
35170 div_expr
35171 };
35172 Ok(Expression::Case(Box::new(crate::expressions::Case {
35173 operand: None,
35174 whens: vec![(condition, result_div)],
35175 else_: Some(Expression::Null(crate::expressions::Null)),
35176 comments: Vec::new(),
35177 inferred_type: None,
35178 })))
35179 }
35180 DialectType::Snowflake => {
35181 // IFF(y <> 0, x / y, NULL)
35182 Ok(Expression::IfFunc(Box::new(crate::expressions::IfFunc {
35183 condition,
35184 true_value: div_expr,
35185 false_value: Some(Expression::Null(crate::expressions::Null)),
35186 original_name: Some("IFF".to_string()),
35187 inferred_type: None,
35188 })))
35189 }
35190 DialectType::Presto | DialectType::Trino => {
35191 // IF(y <> 0, CAST(x AS DOUBLE) / y, NULL)
35192 let cast_x = Expression::Cast(Box::new(Cast {
35193 this: x_ref,
35194 to: DataType::Double {
35195 precision: None,
35196 scale: None,
35197 },
35198 trailing_comments: vec![],
35199 double_colon_syntax: false,
35200 format: None,
35201 default: None,
35202 inferred_type: None,
35203 }));
35204 let cast_div = Expression::Div(Box::new(
35205 crate::expressions::BinaryOp::new(cast_x, y_ref),
35206 ));
35207 Ok(Expression::IfFunc(Box::new(crate::expressions::IfFunc {
35208 condition,
35209 true_value: cast_div,
35210 false_value: Some(Expression::Null(crate::expressions::Null)),
35211 original_name: None,
35212 inferred_type: None,
35213 })))
35214 }
35215 _ => {
35216 // IF(y <> 0, x / y, NULL)
35217 Ok(Expression::IfFunc(Box::new(crate::expressions::IfFunc {
35218 condition,
35219 true_value: div_expr,
35220 false_value: Some(Expression::Null(crate::expressions::Null)),
35221 original_name: None,
35222 inferred_type: None,
35223 })))
35224 }
35225 }
35226 }
35227
35228 // GENERATE_UUID() -> UUID() with CAST to string
35229 "GENERATE_UUID" => {
35230 let uuid_expr = Expression::Uuid(Box::new(crate::expressions::Uuid {
35231 this: None,
35232 name: None,
35233 is_string: None,
35234 }));
35235 // Most targets need CAST(UUID() AS TEXT/VARCHAR/STRING)
35236 let cast_type = match target {
35237 DialectType::DuckDB => Some(DataType::Text),
35238 DialectType::Presto | DialectType::Trino => Some(DataType::VarChar {
35239 length: None,
35240 parenthesized_length: false,
35241 }),
35242 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
35243 Some(DataType::String { length: None })
35244 }
35245 _ => None,
35246 };
35247 if let Some(dt) = cast_type {
35248 Ok(Expression::Cast(Box::new(Cast {
35249 this: uuid_expr,
35250 to: dt,
35251 trailing_comments: vec![],
35252 double_colon_syntax: false,
35253 format: None,
35254 default: None,
35255 inferred_type: None,
35256 })))
35257 } else {
35258 Ok(uuid_expr)
35259 }
35260 }
35261
35262 // COUNTIF(x) -> CountIf expression
35263 "COUNTIF" if args.len() == 1 => {
35264 let arg = args.remove(0);
35265 Ok(Expression::CountIf(Box::new(crate::expressions::AggFunc {
35266 this: arg,
35267 distinct: false,
35268 filter: None,
35269 order_by: vec![],
35270 name: None,
35271 ignore_nulls: None,
35272 having_max: None,
35273 limit: None,
35274 inferred_type: None,
35275 })))
35276 }
35277
35278 // EDIT_DISTANCE(col1, col2, ...) -> Levenshtein expression
35279 "EDIT_DISTANCE" => {
35280 // Strip named arguments (max_distance => N) and pass as positional
35281 let mut positional_args: Vec<Expression> = vec![];
35282 for arg in args {
35283 match arg {
35284 Expression::NamedArgument(na) => {
35285 positional_args.push(na.value);
35286 }
35287 other => positional_args.push(other),
35288 }
35289 }
35290 if positional_args.len() >= 2 {
35291 let col1 = positional_args.remove(0);
35292 let col2 = positional_args.remove(0);
35293 let levenshtein = crate::expressions::BinaryFunc {
35294 this: col1,
35295 expression: col2,
35296 original_name: None,
35297 inferred_type: None,
35298 };
35299 // Pass extra args through a function wrapper with all args
35300 if !positional_args.is_empty() {
35301 let max_dist = positional_args.remove(0);
35302 // DuckDB: CASE WHEN LEVENSHTEIN(a, b) IS NULL OR max IS NULL THEN NULL ELSE LEAST(LEVENSHTEIN(a, b), max) END
35303 if matches!(target, DialectType::DuckDB) {
35304 let lev = Expression::Function(Box::new(Function::new(
35305 "LEVENSHTEIN".to_string(),
35306 vec![levenshtein.this, levenshtein.expression],
35307 )));
35308 let lev_is_null =
35309 Expression::IsNull(Box::new(crate::expressions::IsNull {
35310 this: lev.clone(),
35311 not: false,
35312 postfix_form: false,
35313 }));
35314 let max_is_null =
35315 Expression::IsNull(Box::new(crate::expressions::IsNull {
35316 this: max_dist.clone(),
35317 not: false,
35318 postfix_form: false,
35319 }));
35320 let null_check =
35321 Expression::Or(Box::new(crate::expressions::BinaryOp {
35322 left: lev_is_null,
35323 right: max_is_null,
35324 left_comments: Vec::new(),
35325 operator_comments: Vec::new(),
35326 trailing_comments: Vec::new(),
35327 inferred_type: None,
35328 }));
35329 let least =
35330 Expression::Least(Box::new(crate::expressions::VarArgFunc {
35331 expressions: vec![lev, max_dist],
35332 original_name: None,
35333 inferred_type: None,
35334 }));
35335 return Ok(Expression::Case(Box::new(crate::expressions::Case {
35336 operand: None,
35337 whens: vec![(
35338 null_check,
35339 Expression::Null(crate::expressions::Null),
35340 )],
35341 else_: Some(least),
35342 comments: Vec::new(),
35343 inferred_type: None,
35344 })));
35345 }
35346 let mut all_args = vec![levenshtein.this, levenshtein.expression, max_dist];
35347 all_args.extend(positional_args);
35348 // PostgreSQL: use LEVENSHTEIN_LESS_EQUAL when max_distance is provided
35349 let func_name = if matches!(target, DialectType::PostgreSQL) {
35350 "LEVENSHTEIN_LESS_EQUAL"
35351 } else {
35352 "LEVENSHTEIN"
35353 };
35354 return Ok(Expression::Function(Box::new(Function::new(
35355 func_name.to_string(),
35356 all_args,
35357 ))));
35358 }
35359 Ok(Expression::Levenshtein(Box::new(levenshtein)))
35360 } else {
35361 Ok(Expression::Function(Box::new(Function::new(
35362 "EDIT_DISTANCE".to_string(),
35363 positional_args,
35364 ))))
35365 }
35366 }
35367
35368 // TIMESTAMP_SECONDS(x) -> UnixToTime with scale 0
35369 "TIMESTAMP_SECONDS" if args.len() == 1 => {
35370 let arg = args.remove(0);
35371 Ok(Expression::UnixToTime(Box::new(
35372 crate::expressions::UnixToTime {
35373 this: Box::new(arg),
35374 scale: Some(0),
35375 zone: None,
35376 hours: None,
35377 minutes: None,
35378 format: None,
35379 target_type: None,
35380 },
35381 )))
35382 }
35383
35384 // TIMESTAMP_MILLIS(x) -> UnixToTime with scale 3
35385 "TIMESTAMP_MILLIS" if args.len() == 1 => {
35386 let arg = args.remove(0);
35387 Ok(Expression::UnixToTime(Box::new(
35388 crate::expressions::UnixToTime {
35389 this: Box::new(arg),
35390 scale: Some(3),
35391 zone: None,
35392 hours: None,
35393 minutes: None,
35394 format: None,
35395 target_type: None,
35396 },
35397 )))
35398 }
35399
35400 // TIMESTAMP_MICROS(x) -> UnixToTime with scale 6
35401 "TIMESTAMP_MICROS" if args.len() == 1 => {
35402 let arg = args.remove(0);
35403 Ok(Expression::UnixToTime(Box::new(
35404 crate::expressions::UnixToTime {
35405 this: Box::new(arg),
35406 scale: Some(6),
35407 zone: None,
35408 hours: None,
35409 minutes: None,
35410 format: None,
35411 target_type: None,
35412 },
35413 )))
35414 }
35415
35416 // DIV(x, y) -> IntDiv expression
35417 "DIV" if args.len() == 2 => {
35418 let x = args.remove(0);
35419 let y = args.remove(0);
35420 Ok(Expression::IntDiv(Box::new(
35421 crate::expressions::BinaryFunc {
35422 this: x,
35423 expression: y,
35424 original_name: None,
35425 inferred_type: None,
35426 },
35427 )))
35428 }
35429
35430 // TO_HEX(x) -> target-specific form
35431 "TO_HEX" if args.len() == 1 => {
35432 let arg = args.remove(0);
35433 // Check if inner function already returns hex string in certain targets
35434 let inner_returns_hex = matches!(&arg, Expression::Function(f) if matches!(f.name.as_str(), "MD5" | "SHA1" | "SHA256" | "SHA512"));
35435 if matches!(target, DialectType::BigQuery) {
35436 // BQ->BQ: keep as TO_HEX
35437 Ok(Expression::Function(Box::new(Function::new(
35438 "TO_HEX".to_string(),
35439 vec![arg],
35440 ))))
35441 } else if matches!(target, DialectType::DuckDB) && inner_returns_hex {
35442 // DuckDB: MD5/SHA already return hex strings, so TO_HEX is redundant
35443 Ok(arg)
35444 } else if matches!(target, DialectType::Snowflake) && inner_returns_hex {
35445 // Snowflake: TO_HEX(SHA1(x)) -> TO_CHAR(SHA1_BINARY(x))
35446 // TO_HEX(MD5(x)) -> TO_CHAR(MD5_BINARY(x))
35447 // TO_HEX(SHA256(x)) -> TO_CHAR(SHA2_BINARY(x, 256))
35448 // TO_HEX(SHA512(x)) -> TO_CHAR(SHA2_BINARY(x, 512))
35449 if let Expression::Function(ref inner_f) = arg {
35450 let inner_args = inner_f.args.clone();
35451 let binary_func = match inner_f.name.to_ascii_uppercase().as_str() {
35452 "SHA1" => Expression::Function(Box::new(Function::new(
35453 "SHA1_BINARY".to_string(),
35454 inner_args,
35455 ))),
35456 "MD5" => Expression::Function(Box::new(Function::new(
35457 "MD5_BINARY".to_string(),
35458 inner_args,
35459 ))),
35460 "SHA256" => {
35461 let mut a = inner_args;
35462 a.push(Expression::number(256));
35463 Expression::Function(Box::new(Function::new(
35464 "SHA2_BINARY".to_string(),
35465 a,
35466 )))
35467 }
35468 "SHA512" => {
35469 let mut a = inner_args;
35470 a.push(Expression::number(512));
35471 Expression::Function(Box::new(Function::new(
35472 "SHA2_BINARY".to_string(),
35473 a,
35474 )))
35475 }
35476 _ => arg.clone(),
35477 };
35478 Ok(Expression::Function(Box::new(Function::new(
35479 "TO_CHAR".to_string(),
35480 vec![binary_func],
35481 ))))
35482 } else {
35483 let inner = Expression::Function(Box::new(Function::new(
35484 "HEX".to_string(),
35485 vec![arg],
35486 )));
35487 Ok(Expression::Lower(Box::new(
35488 crate::expressions::UnaryFunc::new(inner),
35489 )))
35490 }
35491 } else if matches!(target, DialectType::Presto | DialectType::Trino) {
35492 let inner = Expression::Function(Box::new(Function::new(
35493 "TO_HEX".to_string(),
35494 vec![arg],
35495 )));
35496 Ok(Expression::Lower(Box::new(
35497 crate::expressions::UnaryFunc::new(inner),
35498 )))
35499 } else {
35500 let inner =
35501 Expression::Function(Box::new(Function::new("HEX".to_string(), vec![arg])));
35502 Ok(Expression::Lower(Box::new(
35503 crate::expressions::UnaryFunc::new(inner),
35504 )))
35505 }
35506 }
35507
35508 // LAST_DAY(date, unit) -> strip unit for most targets, or transform for PostgreSQL
35509 "LAST_DAY" if args.len() == 2 => {
35510 let date = args.remove(0);
35511 let _unit = args.remove(0); // Strip the unit (MONTH is default)
35512 Ok(Expression::Function(Box::new(Function::new(
35513 "LAST_DAY".to_string(),
35514 vec![date],
35515 ))))
35516 }
35517
35518 // GENERATE_ARRAY(start, end, step?) -> GenerateSeries expression
35519 "GENERATE_ARRAY" => {
35520 let start = args.get(0).cloned();
35521 let end = args.get(1).cloned();
35522 let step = args.get(2).cloned();
35523 Ok(Expression::GenerateSeries(Box::new(
35524 crate::expressions::GenerateSeries {
35525 start: start.map(Box::new),
35526 end: end.map(Box::new),
35527 step: step.map(Box::new),
35528 is_end_exclusive: None,
35529 },
35530 )))
35531 }
35532
35533 // GENERATE_TIMESTAMP_ARRAY(start, end, step) -> GenerateSeries expression
35534 "GENERATE_TIMESTAMP_ARRAY" => {
35535 let start = args.get(0).cloned();
35536 let end = args.get(1).cloned();
35537 let step = args.get(2).cloned();
35538
35539 if matches!(target, DialectType::DuckDB) {
35540 // DuckDB: GENERATE_SERIES(CAST(start AS TIMESTAMP), CAST(end AS TIMESTAMP), step)
35541 // Only cast string literals - leave columns/expressions as-is
35542 let maybe_cast_ts = |expr: Expression| -> Expression {
35543 if matches!(&expr, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)))
35544 {
35545 Expression::Cast(Box::new(Cast {
35546 this: expr,
35547 to: DataType::Timestamp {
35548 precision: None,
35549 timezone: false,
35550 },
35551 trailing_comments: vec![],
35552 double_colon_syntax: false,
35553 format: None,
35554 default: None,
35555 inferred_type: None,
35556 }))
35557 } else {
35558 expr
35559 }
35560 };
35561 let cast_start = start.map(maybe_cast_ts);
35562 let cast_end = end.map(maybe_cast_ts);
35563 Ok(Expression::GenerateSeries(Box::new(
35564 crate::expressions::GenerateSeries {
35565 start: cast_start.map(Box::new),
35566 end: cast_end.map(Box::new),
35567 step: step.map(Box::new),
35568 is_end_exclusive: None,
35569 },
35570 )))
35571 } else {
35572 Ok(Expression::GenerateSeries(Box::new(
35573 crate::expressions::GenerateSeries {
35574 start: start.map(Box::new),
35575 end: end.map(Box::new),
35576 step: step.map(Box::new),
35577 is_end_exclusive: None,
35578 },
35579 )))
35580 }
35581 }
35582
35583 // TO_JSON(x) -> target-specific (from Spark/Hive)
35584 "TO_JSON" => {
35585 match target {
35586 DialectType::Presto | DialectType::Trino => {
35587 // JSON_FORMAT(CAST(x AS JSON))
35588 let arg = args
35589 .into_iter()
35590 .next()
35591 .unwrap_or(Expression::Null(crate::expressions::Null));
35592 let cast_json = Expression::Cast(Box::new(Cast {
35593 this: arg,
35594 to: DataType::Custom {
35595 name: "JSON".to_string(),
35596 },
35597 trailing_comments: vec![],
35598 double_colon_syntax: false,
35599 format: None,
35600 default: None,
35601 inferred_type: None,
35602 }));
35603 Ok(Expression::Function(Box::new(Function::new(
35604 "JSON_FORMAT".to_string(),
35605 vec![cast_json],
35606 ))))
35607 }
35608 DialectType::BigQuery => Ok(Expression::Function(Box::new(Function::new(
35609 "TO_JSON_STRING".to_string(),
35610 args,
35611 )))),
35612 DialectType::DuckDB => {
35613 // CAST(TO_JSON(x) AS TEXT)
35614 let arg = args
35615 .into_iter()
35616 .next()
35617 .unwrap_or(Expression::Null(crate::expressions::Null));
35618 let to_json = Expression::Function(Box::new(Function::new(
35619 "TO_JSON".to_string(),
35620 vec![arg],
35621 )));
35622 Ok(Expression::Cast(Box::new(Cast {
35623 this: to_json,
35624 to: DataType::Text,
35625 trailing_comments: vec![],
35626 double_colon_syntax: false,
35627 format: None,
35628 default: None,
35629 inferred_type: None,
35630 })))
35631 }
35632 _ => Ok(Expression::Function(Box::new(Function::new(
35633 "TO_JSON".to_string(),
35634 args,
35635 )))),
35636 }
35637 }
35638
35639 // TO_JSON_STRING(x) -> target-specific
35640 "TO_JSON_STRING" => {
35641 match target {
35642 DialectType::Spark | DialectType::Databricks | DialectType::Hive => Ok(
35643 Expression::Function(Box::new(Function::new("TO_JSON".to_string(), args))),
35644 ),
35645 DialectType::Presto | DialectType::Trino => {
35646 // JSON_FORMAT(CAST(x AS JSON))
35647 let arg = args
35648 .into_iter()
35649 .next()
35650 .unwrap_or(Expression::Null(crate::expressions::Null));
35651 let cast_json = Expression::Cast(Box::new(Cast {
35652 this: arg,
35653 to: DataType::Custom {
35654 name: "JSON".to_string(),
35655 },
35656 trailing_comments: vec![],
35657 double_colon_syntax: false,
35658 format: None,
35659 default: None,
35660 inferred_type: None,
35661 }));
35662 Ok(Expression::Function(Box::new(Function::new(
35663 "JSON_FORMAT".to_string(),
35664 vec![cast_json],
35665 ))))
35666 }
35667 DialectType::DuckDB => {
35668 // CAST(TO_JSON(x) AS TEXT)
35669 let arg = args
35670 .into_iter()
35671 .next()
35672 .unwrap_or(Expression::Null(crate::expressions::Null));
35673 let to_json = Expression::Function(Box::new(Function::new(
35674 "TO_JSON".to_string(),
35675 vec![arg],
35676 )));
35677 Ok(Expression::Cast(Box::new(Cast {
35678 this: to_json,
35679 to: DataType::Text,
35680 trailing_comments: vec![],
35681 double_colon_syntax: false,
35682 format: None,
35683 default: None,
35684 inferred_type: None,
35685 })))
35686 }
35687 DialectType::Snowflake => {
35688 // TO_JSON(x)
35689 Ok(Expression::Function(Box::new(Function::new(
35690 "TO_JSON".to_string(),
35691 args,
35692 ))))
35693 }
35694 _ => Ok(Expression::Function(Box::new(Function::new(
35695 "TO_JSON_STRING".to_string(),
35696 args,
35697 )))),
35698 }
35699 }
35700
35701 // SAFE_ADD(x, y) -> SafeAdd expression
35702 "SAFE_ADD" if args.len() == 2 => {
35703 let x = args.remove(0);
35704 let y = args.remove(0);
35705 Ok(Expression::SafeAdd(Box::new(crate::expressions::SafeAdd {
35706 this: Box::new(x),
35707 expression: Box::new(y),
35708 })))
35709 }
35710
35711 // SAFE_SUBTRACT(x, y) -> SafeSubtract expression
35712 "SAFE_SUBTRACT" if args.len() == 2 => {
35713 let x = args.remove(0);
35714 let y = args.remove(0);
35715 Ok(Expression::SafeSubtract(Box::new(
35716 crate::expressions::SafeSubtract {
35717 this: Box::new(x),
35718 expression: Box::new(y),
35719 },
35720 )))
35721 }
35722
35723 // SAFE_MULTIPLY(x, y) -> SafeMultiply expression
35724 "SAFE_MULTIPLY" if args.len() == 2 => {
35725 let x = args.remove(0);
35726 let y = args.remove(0);
35727 Ok(Expression::SafeMultiply(Box::new(
35728 crate::expressions::SafeMultiply {
35729 this: Box::new(x),
35730 expression: Box::new(y),
35731 },
35732 )))
35733 }
35734
35735 // REGEXP_CONTAINS(str, pattern) -> RegexpLike expression
35736 "REGEXP_CONTAINS" if args.len() == 2 => {
35737 let str_expr = args.remove(0);
35738 let pattern = args.remove(0);
35739 Ok(Expression::RegexpLike(Box::new(
35740 crate::expressions::RegexpFunc {
35741 this: str_expr,
35742 pattern,
35743 flags: None,
35744 },
35745 )))
35746 }
35747
35748 // CONTAINS_SUBSTR(a, b) -> CONTAINS(LOWER(a), LOWER(b))
35749 "CONTAINS_SUBSTR" if args.len() == 2 => {
35750 let a = args.remove(0);
35751 let b = args.remove(0);
35752 let lower_a = Expression::Lower(Box::new(crate::expressions::UnaryFunc::new(a)));
35753 let lower_b = Expression::Lower(Box::new(crate::expressions::UnaryFunc::new(b)));
35754 Ok(Expression::Function(Box::new(Function::new(
35755 "CONTAINS".to_string(),
35756 vec![lower_a, lower_b],
35757 ))))
35758 }
35759
35760 // INT64(x) -> CAST(x AS BIGINT)
35761 "INT64" if args.len() == 1 => {
35762 let arg = args.remove(0);
35763 Ok(Expression::Cast(Box::new(Cast {
35764 this: arg,
35765 to: DataType::BigInt { length: None },
35766 trailing_comments: vec![],
35767 double_colon_syntax: false,
35768 format: None,
35769 default: None,
35770 inferred_type: None,
35771 })))
35772 }
35773
35774 // INSTR(str, substr) -> target-specific
35775 "INSTR" if args.len() >= 2 => {
35776 let str_expr = args.remove(0);
35777 let substr = args.remove(0);
35778 if matches!(target, DialectType::Snowflake) {
35779 // CHARINDEX(substr, str)
35780 Ok(Expression::Function(Box::new(Function::new(
35781 "CHARINDEX".to_string(),
35782 vec![substr, str_expr],
35783 ))))
35784 } else if matches!(target, DialectType::BigQuery) {
35785 // Keep as INSTR
35786 Ok(Expression::Function(Box::new(Function::new(
35787 "INSTR".to_string(),
35788 vec![str_expr, substr],
35789 ))))
35790 } else {
35791 // Default: keep as INSTR
35792 Ok(Expression::Function(Box::new(Function::new(
35793 "INSTR".to_string(),
35794 vec![str_expr, substr],
35795 ))))
35796 }
35797 }
35798
35799 // BigQuery DATE_TRUNC(expr, unit) -> DATE_TRUNC('unit', expr) for standard SQL
35800 "DATE_TRUNC" if args.len() == 2 => {
35801 let expr = args.remove(0);
35802 let unit_expr = args.remove(0);
35803 let unit_str = get_unit_str(&unit_expr);
35804
35805 match target {
35806 DialectType::DuckDB
35807 | DialectType::Snowflake
35808 | DialectType::PostgreSQL
35809 | DialectType::Presto
35810 | DialectType::Trino
35811 | DialectType::Databricks
35812 | DialectType::Spark
35813 | DialectType::Redshift
35814 | DialectType::ClickHouse
35815 | DialectType::TSQL => {
35816 // Standard: DATE_TRUNC('UNIT', expr)
35817 Ok(Expression::Function(Box::new(Function::new(
35818 "DATE_TRUNC".to_string(),
35819 vec![
35820 Expression::Literal(Box::new(Literal::String(unit_str))),
35821 expr,
35822 ],
35823 ))))
35824 }
35825 _ => {
35826 // Keep BigQuery arg order: DATE_TRUNC(expr, unit)
35827 Ok(Expression::Function(Box::new(Function::new(
35828 "DATE_TRUNC".to_string(),
35829 vec![expr, unit_expr],
35830 ))))
35831 }
35832 }
35833 }
35834
35835 // TIMESTAMP_TRUNC / DATETIME_TRUNC -> target-specific
35836 "TIMESTAMP_TRUNC" | "DATETIME_TRUNC" if args.len() >= 2 => {
35837 // TIMESTAMP_TRUNC(ts, unit) or TIMESTAMP_TRUNC(ts, unit, timezone)
35838 let ts = args.remove(0);
35839 let unit_expr = args.remove(0);
35840 let tz = if !args.is_empty() {
35841 Some(args.remove(0))
35842 } else {
35843 None
35844 };
35845 let unit_str = get_unit_str(&unit_expr);
35846
35847 match target {
35848 DialectType::DuckDB => {
35849 // DuckDB: DATE_TRUNC('UNIT', CAST(ts AS TIMESTAMPTZ))
35850 // With timezone: DATE_TRUNC('UNIT', ts AT TIME ZONE 'tz') AT TIME ZONE 'tz' (for DAY granularity)
35851 // Without timezone for MINUTE+ granularity: just DATE_TRUNC
35852 let is_coarse = matches!(
35853 unit_str.as_str(),
35854 "DAY" | "WEEK" | "MONTH" | "QUARTER" | "YEAR"
35855 );
35856 // For DATETIME_TRUNC, cast string args to TIMESTAMP
35857 let cast_ts = if name == "DATETIME_TRUNC" {
35858 match ts {
35859 Expression::Literal(ref lit)
35860 if matches!(lit.as_ref(), Literal::String(ref _s)) =>
35861 {
35862 Expression::Cast(Box::new(Cast {
35863 this: ts,
35864 to: DataType::Timestamp {
35865 precision: None,
35866 timezone: false,
35867 },
35868 trailing_comments: vec![],
35869 double_colon_syntax: false,
35870 format: None,
35871 default: None,
35872 inferred_type: None,
35873 }))
35874 }
35875 _ => Self::maybe_cast_ts_to_tz(ts, &name),
35876 }
35877 } else {
35878 Self::maybe_cast_ts_to_tz(ts, &name)
35879 };
35880
35881 if let Some(tz_arg) = tz {
35882 if is_coarse {
35883 // DATE_TRUNC('UNIT', ts AT TIME ZONE 'tz') AT TIME ZONE 'tz'
35884 let at_tz = Expression::AtTimeZone(Box::new(
35885 crate::expressions::AtTimeZone {
35886 this: cast_ts,
35887 zone: tz_arg.clone(),
35888 },
35889 ));
35890 let date_trunc = Expression::Function(Box::new(Function::new(
35891 "DATE_TRUNC".to_string(),
35892 vec![
35893 Expression::Literal(Box::new(Literal::String(unit_str))),
35894 at_tz,
35895 ],
35896 )));
35897 Ok(Expression::AtTimeZone(Box::new(
35898 crate::expressions::AtTimeZone {
35899 this: date_trunc,
35900 zone: tz_arg,
35901 },
35902 )))
35903 } else {
35904 // For MINUTE/HOUR: no AT TIME ZONE wrapper, just DATE_TRUNC('UNIT', ts)
35905 Ok(Expression::Function(Box::new(Function::new(
35906 "DATE_TRUNC".to_string(),
35907 vec![
35908 Expression::Literal(Box::new(Literal::String(unit_str))),
35909 cast_ts,
35910 ],
35911 ))))
35912 }
35913 } else {
35914 // No timezone: DATE_TRUNC('UNIT', CAST(ts AS TIMESTAMPTZ))
35915 Ok(Expression::Function(Box::new(Function::new(
35916 "DATE_TRUNC".to_string(),
35917 vec![
35918 Expression::Literal(Box::new(Literal::String(unit_str))),
35919 cast_ts,
35920 ],
35921 ))))
35922 }
35923 }
35924 DialectType::Databricks | DialectType::Spark => {
35925 // Databricks/Spark: DATE_TRUNC('UNIT', ts)
35926 Ok(Expression::Function(Box::new(Function::new(
35927 "DATE_TRUNC".to_string(),
35928 vec![Expression::Literal(Box::new(Literal::String(unit_str))), ts],
35929 ))))
35930 }
35931 _ => {
35932 // Default: keep as TIMESTAMP_TRUNC('UNIT', ts, [tz])
35933 let unit = Expression::Literal(Box::new(Literal::String(unit_str)));
35934 let mut date_trunc_args = vec![unit, ts];
35935 if let Some(tz_arg) = tz {
35936 date_trunc_args.push(tz_arg);
35937 }
35938 Ok(Expression::Function(Box::new(Function::new(
35939 "TIMESTAMP_TRUNC".to_string(),
35940 date_trunc_args,
35941 ))))
35942 }
35943 }
35944 }
35945
35946 // TIME(h, m, s) -> target-specific, TIME('string') -> CAST('string' AS TIME)
35947 "TIME" => {
35948 if args.len() == 3 {
35949 // TIME(h, m, s) constructor
35950 match target {
35951 DialectType::TSQL => {
35952 // TIMEFROMPARTS(h, m, s, 0, 0)
35953 args.push(Expression::number(0));
35954 args.push(Expression::number(0));
35955 Ok(Expression::Function(Box::new(Function::new(
35956 "TIMEFROMPARTS".to_string(),
35957 args,
35958 ))))
35959 }
35960 DialectType::MySQL => Ok(Expression::Function(Box::new(Function::new(
35961 "MAKETIME".to_string(),
35962 args,
35963 )))),
35964 DialectType::PostgreSQL => Ok(Expression::Function(Box::new(
35965 Function::new("MAKE_TIME".to_string(), args),
35966 ))),
35967 _ => Ok(Expression::Function(Box::new(Function::new(
35968 "TIME".to_string(),
35969 args,
35970 )))),
35971 }
35972 } else if args.len() == 1 {
35973 let arg = args.remove(0);
35974 if matches!(target, DialectType::Spark) {
35975 // Spark: CAST(x AS TIMESTAMP) (yes, TIMESTAMP not TIME)
35976 Ok(Expression::Cast(Box::new(Cast {
35977 this: arg,
35978 to: DataType::Timestamp {
35979 timezone: false,
35980 precision: None,
35981 },
35982 trailing_comments: vec![],
35983 double_colon_syntax: false,
35984 format: None,
35985 default: None,
35986 inferred_type: None,
35987 })))
35988 } else {
35989 // Most targets: CAST(x AS TIME)
35990 Ok(Expression::Cast(Box::new(Cast {
35991 this: arg,
35992 to: DataType::Time {
35993 precision: None,
35994 timezone: false,
35995 },
35996 trailing_comments: vec![],
35997 double_colon_syntax: false,
35998 format: None,
35999 default: None,
36000 inferred_type: None,
36001 })))
36002 }
36003 } else if args.len() == 2 {
36004 // TIME(expr, timezone) -> CAST(CAST(expr AS TIMESTAMPTZ) AT TIME ZONE tz AS TIME)
36005 let expr = args.remove(0);
36006 let tz = args.remove(0);
36007 let cast_tstz = Expression::Cast(Box::new(Cast {
36008 this: expr,
36009 to: DataType::Timestamp {
36010 timezone: true,
36011 precision: None,
36012 },
36013 trailing_comments: vec![],
36014 double_colon_syntax: false,
36015 format: None,
36016 default: None,
36017 inferred_type: None,
36018 }));
36019 let at_tz = Expression::AtTimeZone(Box::new(crate::expressions::AtTimeZone {
36020 this: cast_tstz,
36021 zone: tz,
36022 }));
36023 Ok(Expression::Cast(Box::new(Cast {
36024 this: at_tz,
36025 to: DataType::Time {
36026 precision: None,
36027 timezone: false,
36028 },
36029 trailing_comments: vec![],
36030 double_colon_syntax: false,
36031 format: None,
36032 default: None,
36033 inferred_type: None,
36034 })))
36035 } else {
36036 Ok(Expression::Function(Box::new(Function::new(
36037 "TIME".to_string(),
36038 args,
36039 ))))
36040 }
36041 }
36042
36043 // DATETIME('string') -> CAST('string' AS TIMESTAMP)
36044 // DATETIME('date', TIME 'time') -> CAST(CAST('date' AS DATE) + CAST('time' AS TIME) AS TIMESTAMP)
36045 // DATETIME('string', 'timezone') -> CAST(CAST('string' AS TIMESTAMPTZ) AT TIME ZONE tz AS TIMESTAMP)
36046 // DATETIME(y, m, d, h, min, s) -> target-specific
36047 "DATETIME" => {
36048 // For BigQuery target: keep DATETIME function but convert TIME literal to CAST
36049 if matches!(target, DialectType::BigQuery) {
36050 if args.len() == 2 {
36051 let has_time_literal = matches!(&args[1], Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Time(_)));
36052 if has_time_literal {
36053 let first = args.remove(0);
36054 let second = args.remove(0);
36055 let time_as_cast = match second {
36056 Expression::Literal(lit)
36057 if matches!(lit.as_ref(), Literal::Time(_)) =>
36058 {
36059 let Literal::Time(s) = lit.as_ref() else {
36060 unreachable!()
36061 };
36062 Expression::Cast(Box::new(Cast {
36063 this: Expression::Literal(Box::new(Literal::String(
36064 s.clone(),
36065 ))),
36066 to: DataType::Time {
36067 precision: None,
36068 timezone: false,
36069 },
36070 trailing_comments: vec![],
36071 double_colon_syntax: false,
36072 format: None,
36073 default: None,
36074 inferred_type: None,
36075 }))
36076 }
36077 other => other,
36078 };
36079 return Ok(Expression::Function(Box::new(Function::new(
36080 "DATETIME".to_string(),
36081 vec![first, time_as_cast],
36082 ))));
36083 }
36084 }
36085 return Ok(Expression::Function(Box::new(Function::new(
36086 "DATETIME".to_string(),
36087 args,
36088 ))));
36089 }
36090
36091 if args.len() == 1 {
36092 let arg = args.remove(0);
36093 Ok(Expression::Cast(Box::new(Cast {
36094 this: arg,
36095 to: DataType::Timestamp {
36096 timezone: false,
36097 precision: None,
36098 },
36099 trailing_comments: vec![],
36100 double_colon_syntax: false,
36101 format: None,
36102 default: None,
36103 inferred_type: None,
36104 })))
36105 } else if args.len() == 2 {
36106 let first = args.remove(0);
36107 let second = args.remove(0);
36108 // Check if second arg is a TIME literal
36109 let is_time_literal = matches!(&second, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Time(_)));
36110 if is_time_literal {
36111 // DATETIME('date', TIME 'time') -> CAST(CAST(date AS DATE) + CAST('time' AS TIME) AS TIMESTAMP)
36112 let cast_date = Expression::Cast(Box::new(Cast {
36113 this: first,
36114 to: DataType::Date,
36115 trailing_comments: vec![],
36116 double_colon_syntax: false,
36117 format: None,
36118 default: None,
36119 inferred_type: None,
36120 }));
36121 // Convert TIME 'x' literal to string 'x' so CAST produces CAST('x' AS TIME) not CAST(TIME 'x' AS TIME)
36122 let time_as_string = match second {
36123 Expression::Literal(lit)
36124 if matches!(lit.as_ref(), Literal::Time(_)) =>
36125 {
36126 let Literal::Time(s) = lit.as_ref() else {
36127 unreachable!()
36128 };
36129 Expression::Literal(Box::new(Literal::String(s.clone())))
36130 }
36131 other => other,
36132 };
36133 let cast_time = Expression::Cast(Box::new(Cast {
36134 this: time_as_string,
36135 to: DataType::Time {
36136 precision: None,
36137 timezone: false,
36138 },
36139 trailing_comments: vec![],
36140 double_colon_syntax: false,
36141 format: None,
36142 default: None,
36143 inferred_type: None,
36144 }));
36145 let add_expr =
36146 Expression::Add(Box::new(BinaryOp::new(cast_date, cast_time)));
36147 Ok(Expression::Cast(Box::new(Cast {
36148 this: add_expr,
36149 to: DataType::Timestamp {
36150 timezone: false,
36151 precision: None,
36152 },
36153 trailing_comments: vec![],
36154 double_colon_syntax: false,
36155 format: None,
36156 default: None,
36157 inferred_type: None,
36158 })))
36159 } else {
36160 // DATETIME('string', 'timezone')
36161 let cast_tstz = Expression::Cast(Box::new(Cast {
36162 this: first,
36163 to: DataType::Timestamp {
36164 timezone: true,
36165 precision: None,
36166 },
36167 trailing_comments: vec![],
36168 double_colon_syntax: false,
36169 format: None,
36170 default: None,
36171 inferred_type: None,
36172 }));
36173 let at_tz =
36174 Expression::AtTimeZone(Box::new(crate::expressions::AtTimeZone {
36175 this: cast_tstz,
36176 zone: second,
36177 }));
36178 Ok(Expression::Cast(Box::new(Cast {
36179 this: at_tz,
36180 to: DataType::Timestamp {
36181 timezone: false,
36182 precision: None,
36183 },
36184 trailing_comments: vec![],
36185 double_colon_syntax: false,
36186 format: None,
36187 default: None,
36188 inferred_type: None,
36189 })))
36190 }
36191 } else if args.len() >= 3 {
36192 // DATETIME(y, m, d, h, min, s) -> TIMESTAMP_FROM_PARTS for Snowflake
36193 // For other targets, use MAKE_TIMESTAMP or similar
36194 if matches!(target, DialectType::Snowflake) {
36195 Ok(Expression::Function(Box::new(Function::new(
36196 "TIMESTAMP_FROM_PARTS".to_string(),
36197 args,
36198 ))))
36199 } else {
36200 Ok(Expression::Function(Box::new(Function::new(
36201 "DATETIME".to_string(),
36202 args,
36203 ))))
36204 }
36205 } else {
36206 Ok(Expression::Function(Box::new(Function::new(
36207 "DATETIME".to_string(),
36208 args,
36209 ))))
36210 }
36211 }
36212
36213 // TIMESTAMP(x) -> CAST(x AS TIMESTAMP WITH TIME ZONE) for Presto
36214 // TIMESTAMP(x, tz) -> CAST(x AS TIMESTAMP) AT TIME ZONE tz for DuckDB
36215 "TIMESTAMP" => {
36216 if args.len() == 1 {
36217 let arg = args.remove(0);
36218 Ok(Expression::Cast(Box::new(Cast {
36219 this: arg,
36220 to: DataType::Timestamp {
36221 timezone: true,
36222 precision: None,
36223 },
36224 trailing_comments: vec![],
36225 double_colon_syntax: false,
36226 format: None,
36227 default: None,
36228 inferred_type: None,
36229 })))
36230 } else if args.len() == 2 {
36231 let arg = args.remove(0);
36232 let tz = args.remove(0);
36233 let cast_ts = Expression::Cast(Box::new(Cast {
36234 this: arg,
36235 to: DataType::Timestamp {
36236 timezone: false,
36237 precision: None,
36238 },
36239 trailing_comments: vec![],
36240 double_colon_syntax: false,
36241 format: None,
36242 default: None,
36243 inferred_type: None,
36244 }));
36245 if matches!(target, DialectType::Snowflake) {
36246 // CONVERT_TIMEZONE('tz', CAST(x AS TIMESTAMP))
36247 Ok(Expression::Function(Box::new(Function::new(
36248 "CONVERT_TIMEZONE".to_string(),
36249 vec![tz, cast_ts],
36250 ))))
36251 } else {
36252 Ok(Expression::AtTimeZone(Box::new(
36253 crate::expressions::AtTimeZone {
36254 this: cast_ts,
36255 zone: tz,
36256 },
36257 )))
36258 }
36259 } else {
36260 Ok(Expression::Function(Box::new(Function::new(
36261 "TIMESTAMP".to_string(),
36262 args,
36263 ))))
36264 }
36265 }
36266
36267 // STRING(x) -> CAST(x AS VARCHAR/TEXT)
36268 // STRING(x, tz) -> CAST(CAST(x AS TIMESTAMP) AT TIME ZONE 'UTC' AT TIME ZONE tz AS VARCHAR/TEXT)
36269 "STRING" => {
36270 if args.len() == 1 {
36271 let arg = args.remove(0);
36272 let cast_type = match target {
36273 DialectType::DuckDB => DataType::Text,
36274 _ => DataType::VarChar {
36275 length: None,
36276 parenthesized_length: false,
36277 },
36278 };
36279 Ok(Expression::Cast(Box::new(Cast {
36280 this: arg,
36281 to: cast_type,
36282 trailing_comments: vec![],
36283 double_colon_syntax: false,
36284 format: None,
36285 default: None,
36286 inferred_type: None,
36287 })))
36288 } else if args.len() == 2 {
36289 let arg = args.remove(0);
36290 let tz = args.remove(0);
36291 let cast_type = match target {
36292 DialectType::DuckDB => DataType::Text,
36293 _ => DataType::VarChar {
36294 length: None,
36295 parenthesized_length: false,
36296 },
36297 };
36298 if matches!(target, DialectType::Snowflake) {
36299 // STRING(x, tz) -> CAST(CONVERT_TIMEZONE('UTC', tz, x) AS VARCHAR)
36300 let convert_tz = Expression::Function(Box::new(Function::new(
36301 "CONVERT_TIMEZONE".to_string(),
36302 vec![
36303 Expression::Literal(Box::new(Literal::String("UTC".to_string()))),
36304 tz,
36305 arg,
36306 ],
36307 )));
36308 Ok(Expression::Cast(Box::new(Cast {
36309 this: convert_tz,
36310 to: cast_type,
36311 trailing_comments: vec![],
36312 double_colon_syntax: false,
36313 format: None,
36314 default: None,
36315 inferred_type: None,
36316 })))
36317 } else {
36318 // STRING(x, tz) -> CAST(CAST(x AS TIMESTAMP) AT TIME ZONE 'UTC' AT TIME ZONE tz AS TEXT/VARCHAR)
36319 let cast_ts = Expression::Cast(Box::new(Cast {
36320 this: arg,
36321 to: DataType::Timestamp {
36322 timezone: false,
36323 precision: None,
36324 },
36325 trailing_comments: vec![],
36326 double_colon_syntax: false,
36327 format: None,
36328 default: None,
36329 inferred_type: None,
36330 }));
36331 let at_utc =
36332 Expression::AtTimeZone(Box::new(crate::expressions::AtTimeZone {
36333 this: cast_ts,
36334 zone: Expression::Literal(Box::new(Literal::String(
36335 "UTC".to_string(),
36336 ))),
36337 }));
36338 let at_tz =
36339 Expression::AtTimeZone(Box::new(crate::expressions::AtTimeZone {
36340 this: at_utc,
36341 zone: tz,
36342 }));
36343 Ok(Expression::Cast(Box::new(Cast {
36344 this: at_tz,
36345 to: cast_type,
36346 trailing_comments: vec![],
36347 double_colon_syntax: false,
36348 format: None,
36349 default: None,
36350 inferred_type: None,
36351 })))
36352 }
36353 } else {
36354 Ok(Expression::Function(Box::new(Function::new(
36355 "STRING".to_string(),
36356 args,
36357 ))))
36358 }
36359 }
36360
36361 // UNIX_SECONDS, UNIX_MILLIS, UNIX_MICROS as functions (not expressions)
36362 "UNIX_SECONDS" if args.len() == 1 => {
36363 let ts = args.remove(0);
36364 match target {
36365 DialectType::DuckDB => {
36366 // CAST(EPOCH(CAST(ts AS TIMESTAMPTZ)) AS BIGINT)
36367 let cast_ts = Self::ensure_cast_timestamptz(ts);
36368 let epoch = Expression::Function(Box::new(Function::new(
36369 "EPOCH".to_string(),
36370 vec![cast_ts],
36371 )));
36372 Ok(Expression::Cast(Box::new(Cast {
36373 this: epoch,
36374 to: DataType::BigInt { length: None },
36375 trailing_comments: vec![],
36376 double_colon_syntax: false,
36377 format: None,
36378 default: None,
36379 inferred_type: None,
36380 })))
36381 }
36382 DialectType::Snowflake => {
36383 // TIMESTAMPDIFF(SECONDS, CAST('1970-01-01 00:00:00+00' AS TIMESTAMPTZ), ts)
36384 let epoch = Expression::Cast(Box::new(Cast {
36385 this: Expression::Literal(Box::new(Literal::String(
36386 "1970-01-01 00:00:00+00".to_string(),
36387 ))),
36388 to: DataType::Timestamp {
36389 timezone: true,
36390 precision: None,
36391 },
36392 trailing_comments: vec![],
36393 double_colon_syntax: false,
36394 format: None,
36395 default: None,
36396 inferred_type: None,
36397 }));
36398 Ok(Expression::TimestampDiff(Box::new(
36399 crate::expressions::TimestampDiff {
36400 this: Box::new(epoch),
36401 expression: Box::new(ts),
36402 unit: Some("SECONDS".to_string()),
36403 },
36404 )))
36405 }
36406 _ => Ok(Expression::Function(Box::new(Function::new(
36407 "UNIX_SECONDS".to_string(),
36408 vec![ts],
36409 )))),
36410 }
36411 }
36412
36413 "UNIX_MILLIS" if args.len() == 1 => {
36414 let ts = args.remove(0);
36415 match target {
36416 DialectType::DuckDB => {
36417 // EPOCH_MS(CAST(ts AS TIMESTAMPTZ))
36418 let cast_ts = Self::ensure_cast_timestamptz(ts);
36419 Ok(Expression::Function(Box::new(Function::new(
36420 "EPOCH_MS".to_string(),
36421 vec![cast_ts],
36422 ))))
36423 }
36424 _ => Ok(Expression::Function(Box::new(Function::new(
36425 "UNIX_MILLIS".to_string(),
36426 vec![ts],
36427 )))),
36428 }
36429 }
36430
36431 "UNIX_MICROS" if args.len() == 1 => {
36432 let ts = args.remove(0);
36433 match target {
36434 DialectType::DuckDB => {
36435 // EPOCH_US(CAST(ts AS TIMESTAMPTZ))
36436 let cast_ts = Self::ensure_cast_timestamptz(ts);
36437 Ok(Expression::Function(Box::new(Function::new(
36438 "EPOCH_US".to_string(),
36439 vec![cast_ts],
36440 ))))
36441 }
36442 _ => Ok(Expression::Function(Box::new(Function::new(
36443 "UNIX_MICROS".to_string(),
36444 vec![ts],
36445 )))),
36446 }
36447 }
36448
36449 // ARRAY_CONCAT / LIST_CONCAT -> target-specific
36450 "ARRAY_CONCAT" | "LIST_CONCAT" => {
36451 match target {
36452 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
36453 // CONCAT(arr1, arr2, ...)
36454 Ok(Expression::Function(Box::new(Function::new(
36455 "CONCAT".to_string(),
36456 args,
36457 ))))
36458 }
36459 DialectType::Presto | DialectType::Trino => {
36460 // CONCAT(arr1, arr2, ...)
36461 Ok(Expression::Function(Box::new(Function::new(
36462 "CONCAT".to_string(),
36463 args,
36464 ))))
36465 }
36466 DialectType::Snowflake => {
36467 // ARRAY_CAT(arr1, ARRAY_CAT(arr2, arr3))
36468 if args.len() == 1 {
36469 // ARRAY_CAT requires 2 args, add empty array as []
36470 let empty_arr = Expression::ArrayFunc(Box::new(
36471 crate::expressions::ArrayConstructor {
36472 expressions: vec![],
36473 bracket_notation: true,
36474 use_list_keyword: false,
36475 },
36476 ));
36477 let mut new_args = args;
36478 new_args.push(empty_arr);
36479 Ok(Expression::Function(Box::new(Function::new(
36480 "ARRAY_CAT".to_string(),
36481 new_args,
36482 ))))
36483 } else if args.is_empty() {
36484 Ok(Expression::Function(Box::new(Function::new(
36485 "ARRAY_CAT".to_string(),
36486 args,
36487 ))))
36488 } else {
36489 let mut it = args.into_iter().rev();
36490 let mut result = it.next().unwrap();
36491 for arr in it {
36492 result = Expression::Function(Box::new(Function::new(
36493 "ARRAY_CAT".to_string(),
36494 vec![arr, result],
36495 )));
36496 }
36497 Ok(result)
36498 }
36499 }
36500 DialectType::PostgreSQL => {
36501 // ARRAY_CAT(arr1, ARRAY_CAT(arr2, arr3))
36502 if args.len() <= 1 {
36503 Ok(Expression::Function(Box::new(Function::new(
36504 "ARRAY_CAT".to_string(),
36505 args,
36506 ))))
36507 } else {
36508 let mut it = args.into_iter().rev();
36509 let mut result = it.next().unwrap();
36510 for arr in it {
36511 result = Expression::Function(Box::new(Function::new(
36512 "ARRAY_CAT".to_string(),
36513 vec![arr, result],
36514 )));
36515 }
36516 Ok(result)
36517 }
36518 }
36519 DialectType::Redshift => {
36520 // ARRAY_CONCAT(arr1, ARRAY_CONCAT(arr2, arr3))
36521 if args.len() <= 2 {
36522 Ok(Expression::Function(Box::new(Function::new(
36523 "ARRAY_CONCAT".to_string(),
36524 args,
36525 ))))
36526 } else {
36527 let mut it = args.into_iter().rev();
36528 let mut result = it.next().unwrap();
36529 for arr in it {
36530 result = Expression::Function(Box::new(Function::new(
36531 "ARRAY_CONCAT".to_string(),
36532 vec![arr, result],
36533 )));
36534 }
36535 Ok(result)
36536 }
36537 }
36538 DialectType::DuckDB => {
36539 // LIST_CONCAT supports multiple args natively in DuckDB
36540 Ok(Expression::Function(Box::new(Function::new(
36541 "LIST_CONCAT".to_string(),
36542 args,
36543 ))))
36544 }
36545 _ => Ok(Expression::Function(Box::new(Function::new(
36546 "ARRAY_CONCAT".to_string(),
36547 args,
36548 )))),
36549 }
36550 }
36551
36552 // ARRAY_CONCAT_AGG -> Snowflake: ARRAY_FLATTEN(ARRAY_AGG(x))
36553 "ARRAY_CONCAT_AGG" if args.len() == 1 => {
36554 let arg = args.remove(0);
36555 match target {
36556 DialectType::Snowflake => {
36557 let array_agg =
36558 Expression::ArrayAgg(Box::new(crate::expressions::AggFunc {
36559 this: arg,
36560 distinct: false,
36561 filter: None,
36562 order_by: vec![],
36563 name: None,
36564 ignore_nulls: None,
36565 having_max: None,
36566 limit: None,
36567 inferred_type: None,
36568 }));
36569 Ok(Expression::Function(Box::new(Function::new(
36570 "ARRAY_FLATTEN".to_string(),
36571 vec![array_agg],
36572 ))))
36573 }
36574 _ => Ok(Expression::Function(Box::new(Function::new(
36575 "ARRAY_CONCAT_AGG".to_string(),
36576 vec![arg],
36577 )))),
36578 }
36579 }
36580
36581 // MD5/SHA1/SHA256/SHA512 -> target-specific hash functions
36582 "MD5" if args.len() == 1 => {
36583 let arg = args.remove(0);
36584 match target {
36585 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
36586 // UNHEX(MD5(x))
36587 let md5 = Expression::Function(Box::new(Function::new(
36588 "MD5".to_string(),
36589 vec![arg],
36590 )));
36591 Ok(Expression::Function(Box::new(Function::new(
36592 "UNHEX".to_string(),
36593 vec![md5],
36594 ))))
36595 }
36596 DialectType::Snowflake => {
36597 // MD5_BINARY(x)
36598 Ok(Expression::Function(Box::new(Function::new(
36599 "MD5_BINARY".to_string(),
36600 vec![arg],
36601 ))))
36602 }
36603 _ => Ok(Expression::Function(Box::new(Function::new(
36604 "MD5".to_string(),
36605 vec![arg],
36606 )))),
36607 }
36608 }
36609
36610 "SHA1" if args.len() == 1 => {
36611 let arg = args.remove(0);
36612 match target {
36613 DialectType::DuckDB => {
36614 // UNHEX(SHA1(x))
36615 let sha1 = Expression::Function(Box::new(Function::new(
36616 "SHA1".to_string(),
36617 vec![arg],
36618 )));
36619 Ok(Expression::Function(Box::new(Function::new(
36620 "UNHEX".to_string(),
36621 vec![sha1],
36622 ))))
36623 }
36624 _ => Ok(Expression::Function(Box::new(Function::new(
36625 "SHA1".to_string(),
36626 vec![arg],
36627 )))),
36628 }
36629 }
36630
36631 "SHA256" if args.len() == 1 => {
36632 let arg = args.remove(0);
36633 match target {
36634 DialectType::DuckDB => {
36635 // UNHEX(SHA256(x))
36636 let sha = Expression::Function(Box::new(Function::new(
36637 "SHA256".to_string(),
36638 vec![arg],
36639 )));
36640 Ok(Expression::Function(Box::new(Function::new(
36641 "UNHEX".to_string(),
36642 vec![sha],
36643 ))))
36644 }
36645 DialectType::Snowflake => {
36646 // SHA2_BINARY(x, 256)
36647 Ok(Expression::Function(Box::new(Function::new(
36648 "SHA2_BINARY".to_string(),
36649 vec![arg, Expression::number(256)],
36650 ))))
36651 }
36652 DialectType::Redshift | DialectType::Spark => {
36653 // SHA2(x, 256)
36654 Ok(Expression::Function(Box::new(Function::new(
36655 "SHA2".to_string(),
36656 vec![arg, Expression::number(256)],
36657 ))))
36658 }
36659 _ => Ok(Expression::Function(Box::new(Function::new(
36660 "SHA256".to_string(),
36661 vec![arg],
36662 )))),
36663 }
36664 }
36665
36666 "SHA512" if args.len() == 1 => {
36667 let arg = args.remove(0);
36668 match target {
36669 DialectType::Snowflake => {
36670 // SHA2_BINARY(x, 512)
36671 Ok(Expression::Function(Box::new(Function::new(
36672 "SHA2_BINARY".to_string(),
36673 vec![arg, Expression::number(512)],
36674 ))))
36675 }
36676 DialectType::Redshift | DialectType::Spark => {
36677 // SHA2(x, 512)
36678 Ok(Expression::Function(Box::new(Function::new(
36679 "SHA2".to_string(),
36680 vec![arg, Expression::number(512)],
36681 ))))
36682 }
36683 _ => Ok(Expression::Function(Box::new(Function::new(
36684 "SHA512".to_string(),
36685 vec![arg],
36686 )))),
36687 }
36688 }
36689
36690 // REGEXP_EXTRACT_ALL(str, pattern) -> add default group arg
36691 "REGEXP_EXTRACT_ALL" if args.len() == 2 => {
36692 let str_expr = args.remove(0);
36693 let pattern = args.remove(0);
36694
36695 // Check if pattern contains capturing groups (parentheses)
36696 let has_groups = match &pattern {
36697 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)) => {
36698 let Literal::String(s) = lit.as_ref() else {
36699 unreachable!()
36700 };
36701 s.contains('(') && s.contains(')')
36702 }
36703 _ => false,
36704 };
36705
36706 match target {
36707 DialectType::DuckDB => {
36708 let group = if has_groups {
36709 Expression::number(1)
36710 } else {
36711 Expression::number(0)
36712 };
36713 Ok(Expression::Function(Box::new(Function::new(
36714 "REGEXP_EXTRACT_ALL".to_string(),
36715 vec![str_expr, pattern, group],
36716 ))))
36717 }
36718 DialectType::Spark | DialectType::Databricks => {
36719 // Spark's default group_index is 1 (same as BigQuery), so omit for capturing groups
36720 if has_groups {
36721 Ok(Expression::Function(Box::new(Function::new(
36722 "REGEXP_EXTRACT_ALL".to_string(),
36723 vec![str_expr, pattern],
36724 ))))
36725 } else {
36726 Ok(Expression::Function(Box::new(Function::new(
36727 "REGEXP_EXTRACT_ALL".to_string(),
36728 vec![str_expr, pattern, Expression::number(0)],
36729 ))))
36730 }
36731 }
36732 DialectType::Presto | DialectType::Trino => {
36733 if has_groups {
36734 Ok(Expression::Function(Box::new(Function::new(
36735 "REGEXP_EXTRACT_ALL".to_string(),
36736 vec![str_expr, pattern, Expression::number(1)],
36737 ))))
36738 } else {
36739 Ok(Expression::Function(Box::new(Function::new(
36740 "REGEXP_EXTRACT_ALL".to_string(),
36741 vec![str_expr, pattern],
36742 ))))
36743 }
36744 }
36745 DialectType::Snowflake => {
36746 if has_groups {
36747 // REGEXP_EXTRACT_ALL(str, pattern, 1, 1, 'c', 1)
36748 Ok(Expression::Function(Box::new(Function::new(
36749 "REGEXP_EXTRACT_ALL".to_string(),
36750 vec![
36751 str_expr,
36752 pattern,
36753 Expression::number(1),
36754 Expression::number(1),
36755 Expression::Literal(Box::new(Literal::String("c".to_string()))),
36756 Expression::number(1),
36757 ],
36758 ))))
36759 } else {
36760 Ok(Expression::Function(Box::new(Function::new(
36761 "REGEXP_EXTRACT_ALL".to_string(),
36762 vec![str_expr, pattern],
36763 ))))
36764 }
36765 }
36766 _ => Ok(Expression::Function(Box::new(Function::new(
36767 "REGEXP_EXTRACT_ALL".to_string(),
36768 vec![str_expr, pattern],
36769 )))),
36770 }
36771 }
36772
36773 // MOD(x, y) -> x % y for dialects that prefer or require the infix operator.
36774 "MOD" if args.len() == 2 => {
36775 match target {
36776 DialectType::PostgreSQL
36777 | DialectType::DuckDB
36778 | DialectType::Presto
36779 | DialectType::Trino
36780 | DialectType::Athena
36781 | DialectType::Snowflake
36782 | DialectType::TSQL
36783 | DialectType::Fabric => {
36784 let x = args.remove(0);
36785 let y = args.remove(0);
36786 // Wrap complex expressions in parens to preserve precedence
36787 let needs_paren = |e: &Expression| {
36788 matches!(
36789 e,
36790 Expression::Add(_)
36791 | Expression::Sub(_)
36792 | Expression::Mul(_)
36793 | Expression::Div(_)
36794 | Expression::Mod(_)
36795 | Expression::ModFunc(_)
36796 )
36797 };
36798 let x = if needs_paren(&x) {
36799 Expression::Paren(Box::new(crate::expressions::Paren {
36800 this: x,
36801 trailing_comments: vec![],
36802 }))
36803 } else {
36804 x
36805 };
36806 let y = if needs_paren(&y) {
36807 Expression::Paren(Box::new(crate::expressions::Paren {
36808 this: y,
36809 trailing_comments: vec![],
36810 }))
36811 } else {
36812 y
36813 };
36814 Ok(Expression::Mod(Box::new(
36815 crate::expressions::BinaryOp::new(x, y),
36816 )))
36817 }
36818 DialectType::Hive | DialectType::Spark | DialectType::Databricks => {
36819 // Hive/Spark: a % b
36820 let x = args.remove(0);
36821 let y = args.remove(0);
36822 let needs_paren = |e: &Expression| {
36823 matches!(
36824 e,
36825 Expression::Add(_)
36826 | Expression::Sub(_)
36827 | Expression::Mul(_)
36828 | Expression::Div(_)
36829 | Expression::Mod(_)
36830 | Expression::ModFunc(_)
36831 )
36832 };
36833 let x = if needs_paren(&x) {
36834 Expression::Paren(Box::new(crate::expressions::Paren {
36835 this: x,
36836 trailing_comments: vec![],
36837 }))
36838 } else {
36839 x
36840 };
36841 let y = if needs_paren(&y) {
36842 Expression::Paren(Box::new(crate::expressions::Paren {
36843 this: y,
36844 trailing_comments: vec![],
36845 }))
36846 } else {
36847 y
36848 };
36849 Ok(Expression::Mod(Box::new(
36850 crate::expressions::BinaryOp::new(x, y),
36851 )))
36852 }
36853 _ => Ok(Expression::Function(Box::new(Function::new(
36854 "MOD".to_string(),
36855 args,
36856 )))),
36857 }
36858 }
36859
36860 // ARRAY_FILTER(arr, lambda) -> FILTER for Hive/Spark/Presto, ARRAY_FILTER for StarRocks
36861 "ARRAY_FILTER" if args.len() == 2 => {
36862 let name = match target {
36863 DialectType::DuckDB => "LIST_FILTER",
36864 DialectType::StarRocks => "ARRAY_FILTER",
36865 _ => "FILTER",
36866 };
36867 Ok(Expression::Function(Box::new(Function::new(
36868 name.to_string(),
36869 args,
36870 ))))
36871 }
36872 // FILTER(arr, lambda) -> ARRAY_FILTER for StarRocks, LIST_FILTER for DuckDB
36873 "FILTER" if args.len() == 2 => {
36874 let name = match target {
36875 DialectType::DuckDB => "LIST_FILTER",
36876 DialectType::StarRocks => "ARRAY_FILTER",
36877 _ => "FILTER",
36878 };
36879 Ok(Expression::Function(Box::new(Function::new(
36880 name.to_string(),
36881 args,
36882 ))))
36883 }
36884 // REDUCE(arr, init, lambda1, lambda2) -> AGGREGATE for Spark
36885 "REDUCE" if args.len() >= 3 => {
36886 let name = match target {
36887 DialectType::Spark | DialectType::Databricks => "AGGREGATE",
36888 _ => "REDUCE",
36889 };
36890 Ok(Expression::Function(Box::new(Function::new(
36891 name.to_string(),
36892 args,
36893 ))))
36894 }
36895 // ARRAY_REVERSE(x) -> arrayReverse for ClickHouse (handled by generator)
36896 "ARRAY_REVERSE" if args.len() == 1 => Ok(Expression::Function(Box::new(
36897 Function::new("ARRAY_REVERSE".to_string(), args),
36898 ))),
36899
36900 // CONCAT(a, b, ...) -> a || b || ... for DuckDB with 3+ args
36901 "CONCAT" if args.len() > 2 => match target {
36902 DialectType::DuckDB => {
36903 let mut it = args.into_iter();
36904 let mut result = it.next().unwrap();
36905 for arg in it {
36906 result = Expression::DPipe(Box::new(crate::expressions::DPipe {
36907 this: Box::new(result),
36908 expression: Box::new(arg),
36909 safe: None,
36910 }));
36911 }
36912 Ok(result)
36913 }
36914 _ => Ok(Expression::Function(Box::new(Function::new(
36915 "CONCAT".to_string(),
36916 args,
36917 )))),
36918 },
36919
36920 // GENERATE_DATE_ARRAY(start, end[, step]) -> target-specific
36921 "GENERATE_DATE_ARRAY" => {
36922 if matches!(target, DialectType::BigQuery) {
36923 // BQ->BQ: add default interval if not present
36924 if args.len() == 2 {
36925 let start = args.remove(0);
36926 let end = args.remove(0);
36927 let default_interval =
36928 Expression::Interval(Box::new(crate::expressions::Interval {
36929 this: Some(Expression::Literal(Box::new(Literal::String(
36930 "1".to_string(),
36931 )))),
36932 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
36933 unit: crate::expressions::IntervalUnit::Day,
36934 use_plural: false,
36935 }),
36936 }));
36937 Ok(Expression::Function(Box::new(Function::new(
36938 "GENERATE_DATE_ARRAY".to_string(),
36939 vec![start, end, default_interval],
36940 ))))
36941 } else {
36942 Ok(Expression::Function(Box::new(Function::new(
36943 "GENERATE_DATE_ARRAY".to_string(),
36944 args,
36945 ))))
36946 }
36947 } else if matches!(target, DialectType::DuckDB) {
36948 // DuckDB: CAST(GENERATE_SERIES(CAST(start AS DATE), CAST(end AS DATE), step) AS DATE[])
36949 let start = args.get(0).cloned();
36950 let end = args.get(1).cloned();
36951 let step = args.get(2).cloned().or_else(|| {
36952 Some(Expression::Interval(Box::new(
36953 crate::expressions::Interval {
36954 this: Some(Expression::Literal(Box::new(Literal::String(
36955 "1".to_string(),
36956 )))),
36957 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
36958 unit: crate::expressions::IntervalUnit::Day,
36959 use_plural: false,
36960 }),
36961 },
36962 )))
36963 });
36964
36965 // Wrap start/end in CAST(... AS DATE) only for string literals
36966 let maybe_cast_date = |expr: Expression| -> Expression {
36967 if matches!(&expr, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)))
36968 {
36969 Expression::Cast(Box::new(Cast {
36970 this: expr,
36971 to: DataType::Date,
36972 trailing_comments: vec![],
36973 double_colon_syntax: false,
36974 format: None,
36975 default: None,
36976 inferred_type: None,
36977 }))
36978 } else {
36979 expr
36980 }
36981 };
36982 let cast_start = start.map(maybe_cast_date);
36983 let cast_end = end.map(maybe_cast_date);
36984
36985 let gen_series =
36986 Expression::GenerateSeries(Box::new(crate::expressions::GenerateSeries {
36987 start: cast_start.map(Box::new),
36988 end: cast_end.map(Box::new),
36989 step: step.map(Box::new),
36990 is_end_exclusive: None,
36991 }));
36992
36993 // Wrap in CAST(... AS DATE[])
36994 Ok(Expression::Cast(Box::new(Cast {
36995 this: gen_series,
36996 to: DataType::Array {
36997 element_type: Box::new(DataType::Date),
36998 dimension: None,
36999 },
37000 trailing_comments: vec![],
37001 double_colon_syntax: false,
37002 format: None,
37003 default: None,
37004 inferred_type: None,
37005 })))
37006 } else if matches!(target, DialectType::Snowflake) {
37007 // Snowflake: keep as GENERATE_DATE_ARRAY function for later transform
37008 // (transform_generate_date_array_snowflake will convert to ARRAY_GENERATE_RANGE + DATEADD)
37009 if args.len() == 2 {
37010 let start = args.remove(0);
37011 let end = args.remove(0);
37012 let default_interval =
37013 Expression::Interval(Box::new(crate::expressions::Interval {
37014 this: Some(Expression::Literal(Box::new(Literal::String(
37015 "1".to_string(),
37016 )))),
37017 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
37018 unit: crate::expressions::IntervalUnit::Day,
37019 use_plural: false,
37020 }),
37021 }));
37022 Ok(Expression::Function(Box::new(Function::new(
37023 "GENERATE_DATE_ARRAY".to_string(),
37024 vec![start, end, default_interval],
37025 ))))
37026 } else {
37027 Ok(Expression::Function(Box::new(Function::new(
37028 "GENERATE_DATE_ARRAY".to_string(),
37029 args,
37030 ))))
37031 }
37032 } else {
37033 // Convert to GenerateSeries for other targets
37034 let start = args.get(0).cloned();
37035 let end = args.get(1).cloned();
37036 let step = args.get(2).cloned().or_else(|| {
37037 Some(Expression::Interval(Box::new(
37038 crate::expressions::Interval {
37039 this: Some(Expression::Literal(Box::new(Literal::String(
37040 "1".to_string(),
37041 )))),
37042 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
37043 unit: crate::expressions::IntervalUnit::Day,
37044 use_plural: false,
37045 }),
37046 },
37047 )))
37048 });
37049 Ok(Expression::GenerateSeries(Box::new(
37050 crate::expressions::GenerateSeries {
37051 start: start.map(Box::new),
37052 end: end.map(Box::new),
37053 step: step.map(Box::new),
37054 is_end_exclusive: None,
37055 },
37056 )))
37057 }
37058 }
37059
37060 // PARSE_DATE(format, str) -> target-specific
37061 "PARSE_DATE" if args.len() == 2 => {
37062 let format = args.remove(0);
37063 let str_expr = args.remove(0);
37064 match target {
37065 DialectType::DuckDB => {
37066 // CAST(STRPTIME(str, duck_format) AS DATE)
37067 let duck_format = Self::bq_format_to_duckdb(&format);
37068 let strptime = Expression::Function(Box::new(Function::new(
37069 "STRPTIME".to_string(),
37070 vec![str_expr, duck_format],
37071 )));
37072 Ok(Expression::Cast(Box::new(Cast {
37073 this: strptime,
37074 to: DataType::Date,
37075 trailing_comments: vec![],
37076 double_colon_syntax: false,
37077 format: None,
37078 default: None,
37079 inferred_type: None,
37080 })))
37081 }
37082 DialectType::Snowflake => {
37083 // _POLYGLOT_DATE(str, snowflake_format)
37084 // Use marker so Snowflake target transform keeps it as DATE() instead of TO_DATE()
37085 let sf_format = Self::bq_format_to_snowflake(&format);
37086 Ok(Expression::Function(Box::new(Function::new(
37087 "_POLYGLOT_DATE".to_string(),
37088 vec![str_expr, sf_format],
37089 ))))
37090 }
37091 _ => Ok(Expression::Function(Box::new(Function::new(
37092 "PARSE_DATE".to_string(),
37093 vec![format, str_expr],
37094 )))),
37095 }
37096 }
37097
37098 // PARSE_TIMESTAMP(format, str) -> target-specific
37099 "PARSE_TIMESTAMP" if args.len() >= 2 => {
37100 let format = args.remove(0);
37101 let str_expr = args.remove(0);
37102 let tz = if !args.is_empty() {
37103 Some(args.remove(0))
37104 } else {
37105 None
37106 };
37107 match target {
37108 DialectType::DuckDB => {
37109 let duck_format = Self::bq_format_to_duckdb(&format);
37110 let strptime = Expression::Function(Box::new(Function::new(
37111 "STRPTIME".to_string(),
37112 vec![str_expr, duck_format],
37113 )));
37114 Ok(strptime)
37115 }
37116 _ => {
37117 let mut result_args = vec![format, str_expr];
37118 if let Some(tz_arg) = tz {
37119 result_args.push(tz_arg);
37120 }
37121 Ok(Expression::Function(Box::new(Function::new(
37122 "PARSE_TIMESTAMP".to_string(),
37123 result_args,
37124 ))))
37125 }
37126 }
37127 }
37128
37129 // FORMAT_DATE(format, date) -> target-specific
37130 "FORMAT_DATE" if args.len() == 2 => {
37131 let format = args.remove(0);
37132 let date_expr = args.remove(0);
37133 match target {
37134 DialectType::DuckDB => {
37135 // STRFTIME(CAST(date AS DATE), format)
37136 let cast_date = Expression::Cast(Box::new(Cast {
37137 this: date_expr,
37138 to: DataType::Date,
37139 trailing_comments: vec![],
37140 double_colon_syntax: false,
37141 format: None,
37142 default: None,
37143 inferred_type: None,
37144 }));
37145 Ok(Expression::Function(Box::new(Function::new(
37146 "STRFTIME".to_string(),
37147 vec![cast_date, format],
37148 ))))
37149 }
37150 _ => Ok(Expression::Function(Box::new(Function::new(
37151 "FORMAT_DATE".to_string(),
37152 vec![format, date_expr],
37153 )))),
37154 }
37155 }
37156
37157 // FORMAT_DATETIME(format, datetime) -> target-specific
37158 "FORMAT_DATETIME" if args.len() == 2 => {
37159 let format = args.remove(0);
37160 let dt_expr = args.remove(0);
37161
37162 if matches!(target, DialectType::BigQuery) {
37163 // BQ->BQ: normalize %H:%M:%S to %T, %x to %D
37164 let norm_format = Self::bq_format_normalize_bq(&format);
37165 // Also strip DATETIME keyword from typed literals
37166 let norm_dt = match dt_expr {
37167 Expression::Literal(lit)
37168 if matches!(lit.as_ref(), Literal::Timestamp(_)) =>
37169 {
37170 let Literal::Timestamp(s) = lit.as_ref() else {
37171 unreachable!()
37172 };
37173 Expression::Cast(Box::new(Cast {
37174 this: Expression::Literal(Box::new(Literal::String(s.clone()))),
37175 to: DataType::Custom {
37176 name: "DATETIME".to_string(),
37177 },
37178 trailing_comments: vec![],
37179 double_colon_syntax: false,
37180 format: None,
37181 default: None,
37182 inferred_type: None,
37183 }))
37184 }
37185 other => other,
37186 };
37187 return Ok(Expression::Function(Box::new(Function::new(
37188 "FORMAT_DATETIME".to_string(),
37189 vec![norm_format, norm_dt],
37190 ))));
37191 }
37192
37193 match target {
37194 DialectType::DuckDB => {
37195 // STRFTIME(CAST(dt AS TIMESTAMP), duckdb_format)
37196 let cast_dt = Self::ensure_cast_timestamp(dt_expr);
37197 let duck_format = Self::bq_format_to_duckdb(&format);
37198 Ok(Expression::Function(Box::new(Function::new(
37199 "STRFTIME".to_string(),
37200 vec![cast_dt, duck_format],
37201 ))))
37202 }
37203 _ => Ok(Expression::Function(Box::new(Function::new(
37204 "FORMAT_DATETIME".to_string(),
37205 vec![format, dt_expr],
37206 )))),
37207 }
37208 }
37209
37210 // FORMAT_TIMESTAMP(format, ts) -> target-specific
37211 "FORMAT_TIMESTAMP" if args.len() == 2 => {
37212 let format = args.remove(0);
37213 let ts_expr = args.remove(0);
37214 match target {
37215 DialectType::DuckDB => {
37216 // STRFTIME(CAST(CAST(ts AS TIMESTAMPTZ) AS TIMESTAMP), format)
37217 let cast_tstz = Self::ensure_cast_timestamptz(ts_expr);
37218 let cast_ts = Expression::Cast(Box::new(Cast {
37219 this: cast_tstz,
37220 to: DataType::Timestamp {
37221 timezone: false,
37222 precision: None,
37223 },
37224 trailing_comments: vec![],
37225 double_colon_syntax: false,
37226 format: None,
37227 default: None,
37228 inferred_type: None,
37229 }));
37230 Ok(Expression::Function(Box::new(Function::new(
37231 "STRFTIME".to_string(),
37232 vec![cast_ts, format],
37233 ))))
37234 }
37235 DialectType::Snowflake => {
37236 // TO_CHAR(CAST(CAST(ts AS TIMESTAMPTZ) AS TIMESTAMP), snowflake_format)
37237 let cast_tstz = Self::ensure_cast_timestamptz(ts_expr);
37238 let cast_ts = Expression::Cast(Box::new(Cast {
37239 this: cast_tstz,
37240 to: DataType::Timestamp {
37241 timezone: false,
37242 precision: None,
37243 },
37244 trailing_comments: vec![],
37245 double_colon_syntax: false,
37246 format: None,
37247 default: None,
37248 inferred_type: None,
37249 }));
37250 let sf_format = Self::bq_format_to_snowflake(&format);
37251 Ok(Expression::Function(Box::new(Function::new(
37252 "TO_CHAR".to_string(),
37253 vec![cast_ts, sf_format],
37254 ))))
37255 }
37256 _ => Ok(Expression::Function(Box::new(Function::new(
37257 "FORMAT_TIMESTAMP".to_string(),
37258 vec![format, ts_expr],
37259 )))),
37260 }
37261 }
37262
37263 // UNIX_DATE(date) -> DATE_DIFF('DAY', '1970-01-01', date) for DuckDB
37264 "UNIX_DATE" if args.len() == 1 => {
37265 let date = args.remove(0);
37266 match target {
37267 DialectType::DuckDB => {
37268 let epoch = Expression::Cast(Box::new(Cast {
37269 this: Expression::Literal(Box::new(Literal::String(
37270 "1970-01-01".to_string(),
37271 ))),
37272 to: DataType::Date,
37273 trailing_comments: vec![],
37274 double_colon_syntax: false,
37275 format: None,
37276 default: None,
37277 inferred_type: None,
37278 }));
37279 // DATE_DIFF('DAY', epoch, date) but date might be DATE '...' literal
37280 // Need to convert DATE literal to CAST
37281 let norm_date = Self::date_literal_to_cast(date);
37282 Ok(Expression::Function(Box::new(Function::new(
37283 "DATE_DIFF".to_string(),
37284 vec![
37285 Expression::Literal(Box::new(Literal::String("DAY".to_string()))),
37286 epoch,
37287 norm_date,
37288 ],
37289 ))))
37290 }
37291 _ => Ok(Expression::Function(Box::new(Function::new(
37292 "UNIX_DATE".to_string(),
37293 vec![date],
37294 )))),
37295 }
37296 }
37297
37298 // UNIX_SECONDS(ts) -> target-specific
37299 "UNIX_SECONDS" if args.len() == 1 => {
37300 let ts = args.remove(0);
37301 match target {
37302 DialectType::DuckDB => {
37303 // CAST(EPOCH(CAST(ts AS TIMESTAMPTZ)) AS BIGINT)
37304 let norm_ts = Self::ts_literal_to_cast_tz(ts);
37305 let epoch = Expression::Function(Box::new(Function::new(
37306 "EPOCH".to_string(),
37307 vec![norm_ts],
37308 )));
37309 Ok(Expression::Cast(Box::new(Cast {
37310 this: epoch,
37311 to: DataType::BigInt { length: None },
37312 trailing_comments: vec![],
37313 double_colon_syntax: false,
37314 format: None,
37315 default: None,
37316 inferred_type: None,
37317 })))
37318 }
37319 DialectType::Snowflake => {
37320 // TIMESTAMPDIFF(SECONDS, CAST('1970-01-01 00:00:00+00' AS TIMESTAMPTZ), ts)
37321 let epoch = Expression::Cast(Box::new(Cast {
37322 this: Expression::Literal(Box::new(Literal::String(
37323 "1970-01-01 00:00:00+00".to_string(),
37324 ))),
37325 to: DataType::Timestamp {
37326 timezone: true,
37327 precision: None,
37328 },
37329 trailing_comments: vec![],
37330 double_colon_syntax: false,
37331 format: None,
37332 default: None,
37333 inferred_type: None,
37334 }));
37335 Ok(Expression::Function(Box::new(Function::new(
37336 "TIMESTAMPDIFF".to_string(),
37337 vec![
37338 Expression::Identifier(Identifier::new("SECONDS".to_string())),
37339 epoch,
37340 ts,
37341 ],
37342 ))))
37343 }
37344 _ => Ok(Expression::Function(Box::new(Function::new(
37345 "UNIX_SECONDS".to_string(),
37346 vec![ts],
37347 )))),
37348 }
37349 }
37350
37351 // UNIX_MILLIS(ts) -> target-specific
37352 "UNIX_MILLIS" if args.len() == 1 => {
37353 let ts = args.remove(0);
37354 match target {
37355 DialectType::DuckDB => {
37356 let norm_ts = Self::ts_literal_to_cast_tz(ts);
37357 Ok(Expression::Function(Box::new(Function::new(
37358 "EPOCH_MS".to_string(),
37359 vec![norm_ts],
37360 ))))
37361 }
37362 _ => Ok(Expression::Function(Box::new(Function::new(
37363 "UNIX_MILLIS".to_string(),
37364 vec![ts],
37365 )))),
37366 }
37367 }
37368
37369 // UNIX_MICROS(ts) -> target-specific
37370 "UNIX_MICROS" if args.len() == 1 => {
37371 let ts = args.remove(0);
37372 match target {
37373 DialectType::DuckDB => {
37374 let norm_ts = Self::ts_literal_to_cast_tz(ts);
37375 Ok(Expression::Function(Box::new(Function::new(
37376 "EPOCH_US".to_string(),
37377 vec![norm_ts],
37378 ))))
37379 }
37380 _ => Ok(Expression::Function(Box::new(Function::new(
37381 "UNIX_MICROS".to_string(),
37382 vec![ts],
37383 )))),
37384 }
37385 }
37386
37387 // INSTR(str, substr) -> target-specific
37388 "INSTR" => {
37389 if matches!(target, DialectType::BigQuery) {
37390 // BQ->BQ: keep as INSTR
37391 Ok(Expression::Function(Box::new(Function::new(
37392 "INSTR".to_string(),
37393 args,
37394 ))))
37395 } else if matches!(target, DialectType::Snowflake) && args.len() == 2 {
37396 // Snowflake: CHARINDEX(substr, str) - swap args
37397 let str_expr = args.remove(0);
37398 let substr = args.remove(0);
37399 Ok(Expression::Function(Box::new(Function::new(
37400 "CHARINDEX".to_string(),
37401 vec![substr, str_expr],
37402 ))))
37403 } else {
37404 // Keep as INSTR for other targets
37405 Ok(Expression::Function(Box::new(Function::new(
37406 "INSTR".to_string(),
37407 args,
37408 ))))
37409 }
37410 }
37411
37412 // CURRENT_TIMESTAMP / CURRENT_DATE handling - parens normalization and timezone
37413 "CURRENT_TIMESTAMP" | "CURRENT_DATE" | "CURRENT_DATETIME" | "CURRENT_TIME" => {
37414 if matches!(target, DialectType::BigQuery) {
37415 // BQ->BQ: always output with parens (function form), keep any timezone arg
37416 Ok(Expression::Function(Box::new(Function::new(name, args))))
37417 } else if name == "CURRENT_DATE" && args.len() == 1 {
37418 // CURRENT_DATE('UTC') - has timezone arg
37419 let tz_arg = args.remove(0);
37420 match target {
37421 DialectType::DuckDB => {
37422 // CAST(CURRENT_TIMESTAMP AT TIME ZONE 'UTC' AS DATE)
37423 let ct = Expression::CurrentTimestamp(
37424 crate::expressions::CurrentTimestamp {
37425 precision: None,
37426 sysdate: false,
37427 },
37428 );
37429 let at_tz =
37430 Expression::AtTimeZone(Box::new(crate::expressions::AtTimeZone {
37431 this: ct,
37432 zone: tz_arg,
37433 }));
37434 Ok(Expression::Cast(Box::new(Cast {
37435 this: at_tz,
37436 to: DataType::Date,
37437 trailing_comments: vec![],
37438 double_colon_syntax: false,
37439 format: None,
37440 default: None,
37441 inferred_type: None,
37442 })))
37443 }
37444 DialectType::Snowflake => {
37445 // CAST(CONVERT_TIMEZONE('UTC', CURRENT_TIMESTAMP()) AS DATE)
37446 let ct = Expression::Function(Box::new(Function::new(
37447 "CURRENT_TIMESTAMP".to_string(),
37448 vec![],
37449 )));
37450 let convert = Expression::Function(Box::new(Function::new(
37451 "CONVERT_TIMEZONE".to_string(),
37452 vec![tz_arg, ct],
37453 )));
37454 Ok(Expression::Cast(Box::new(Cast {
37455 this: convert,
37456 to: DataType::Date,
37457 trailing_comments: vec![],
37458 double_colon_syntax: false,
37459 format: None,
37460 default: None,
37461 inferred_type: None,
37462 })))
37463 }
37464 _ => {
37465 // PostgreSQL, MySQL, etc.: CURRENT_DATE AT TIME ZONE 'UTC'
37466 let cd = Expression::CurrentDate(crate::expressions::CurrentDate);
37467 Ok(Expression::AtTimeZone(Box::new(
37468 crate::expressions::AtTimeZone {
37469 this: cd,
37470 zone: tz_arg,
37471 },
37472 )))
37473 }
37474 }
37475 } else if (name == "CURRENT_TIMESTAMP"
37476 || name == "CURRENT_TIME"
37477 || name == "CURRENT_DATE")
37478 && args.is_empty()
37479 && matches!(
37480 target,
37481 DialectType::PostgreSQL
37482 | DialectType::DuckDB
37483 | DialectType::Presto
37484 | DialectType::Trino
37485 )
37486 {
37487 // These targets want no-parens CURRENT_TIMESTAMP / CURRENT_DATE / CURRENT_TIME
37488 if name == "CURRENT_TIMESTAMP" {
37489 Ok(Expression::CurrentTimestamp(
37490 crate::expressions::CurrentTimestamp {
37491 precision: None,
37492 sysdate: false,
37493 },
37494 ))
37495 } else if name == "CURRENT_DATE" {
37496 Ok(Expression::CurrentDate(crate::expressions::CurrentDate))
37497 } else {
37498 // CURRENT_TIME
37499 Ok(Expression::CurrentTime(crate::expressions::CurrentTime {
37500 precision: None,
37501 }))
37502 }
37503 } else {
37504 // All other targets: keep as function (with parens)
37505 Ok(Expression::Function(Box::new(Function::new(name, args))))
37506 }
37507 }
37508
37509 // JSON_QUERY(json, path) -> target-specific
37510 "JSON_QUERY" if args.len() == 2 => {
37511 match target {
37512 DialectType::DuckDB | DialectType::SQLite => {
37513 // json -> path syntax
37514 let json_expr = args.remove(0);
37515 let path = args.remove(0);
37516 Ok(Expression::JsonExtract(Box::new(
37517 crate::expressions::JsonExtractFunc {
37518 this: json_expr,
37519 path,
37520 returning: None,
37521 arrow_syntax: true,
37522 hash_arrow_syntax: false,
37523 wrapper_option: None,
37524 quotes_option: None,
37525 on_scalar_string: false,
37526 on_error: None,
37527 },
37528 )))
37529 }
37530 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
37531 Ok(Expression::Function(Box::new(Function::new(
37532 "GET_JSON_OBJECT".to_string(),
37533 args,
37534 ))))
37535 }
37536 DialectType::PostgreSQL | DialectType::Redshift => Ok(Expression::Function(
37537 Box::new(Function::new("JSON_EXTRACT_PATH".to_string(), args)),
37538 )),
37539 _ => Ok(Expression::Function(Box::new(Function::new(
37540 "JSON_QUERY".to_string(),
37541 args,
37542 )))),
37543 }
37544 }
37545
37546 // JSON_VALUE_ARRAY(json, path) -> target-specific
37547 "JSON_VALUE_ARRAY" if args.len() == 2 => {
37548 match target {
37549 DialectType::DuckDB => {
37550 // CAST(json -> path AS TEXT[])
37551 let json_expr = args.remove(0);
37552 let path = args.remove(0);
37553 let arrow = Expression::JsonExtract(Box::new(
37554 crate::expressions::JsonExtractFunc {
37555 this: json_expr,
37556 path,
37557 returning: None,
37558 arrow_syntax: true,
37559 hash_arrow_syntax: false,
37560 wrapper_option: None,
37561 quotes_option: None,
37562 on_scalar_string: false,
37563 on_error: None,
37564 },
37565 ));
37566 Ok(Expression::Cast(Box::new(Cast {
37567 this: arrow,
37568 to: DataType::Array {
37569 element_type: Box::new(DataType::Text),
37570 dimension: None,
37571 },
37572 trailing_comments: vec![],
37573 double_colon_syntax: false,
37574 format: None,
37575 default: None,
37576 inferred_type: None,
37577 })))
37578 }
37579 DialectType::Snowflake => {
37580 let json_expr = args.remove(0);
37581 let path_expr = args.remove(0);
37582 // Convert JSON path from $.path to just path
37583 let sf_path = if let Expression::Literal(ref lit) = path_expr {
37584 if let Literal::String(ref s) = lit.as_ref() {
37585 let trimmed = s.trim_start_matches('$').trim_start_matches('.');
37586 Expression::Literal(Box::new(Literal::String(trimmed.to_string())))
37587 } else {
37588 path_expr.clone()
37589 }
37590 } else {
37591 path_expr
37592 };
37593 let parse_json = Expression::Function(Box::new(Function::new(
37594 "PARSE_JSON".to_string(),
37595 vec![json_expr],
37596 )));
37597 let get_path = Expression::Function(Box::new(Function::new(
37598 "GET_PATH".to_string(),
37599 vec![parse_json, sf_path],
37600 )));
37601 // TRANSFORM(get_path, x -> CAST(x AS VARCHAR))
37602 let cast_expr = Expression::Cast(Box::new(Cast {
37603 this: Expression::Identifier(Identifier::new("x")),
37604 to: DataType::VarChar {
37605 length: None,
37606 parenthesized_length: false,
37607 },
37608 trailing_comments: vec![],
37609 double_colon_syntax: false,
37610 format: None,
37611 default: None,
37612 inferred_type: None,
37613 }));
37614 let lambda = Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
37615 parameters: vec![Identifier::new("x")],
37616 body: cast_expr,
37617 colon: false,
37618 parameter_types: vec![],
37619 }));
37620 Ok(Expression::Function(Box::new(Function::new(
37621 "TRANSFORM".to_string(),
37622 vec![get_path, lambda],
37623 ))))
37624 }
37625 _ => Ok(Expression::Function(Box::new(Function::new(
37626 "JSON_VALUE_ARRAY".to_string(),
37627 args,
37628 )))),
37629 }
37630 }
37631
37632 // BigQuery REGEXP_EXTRACT(val, regex[, position[, occurrence]]) -> target dialects
37633 // BigQuery's 3rd arg is "position" (starting char index), 4th is "occurrence" (which match to return)
37634 // This is different from Hive/Spark where 3rd arg is "group_index"
37635 "REGEXP_EXTRACT" if matches!(source, DialectType::BigQuery) => {
37636 match target {
37637 DialectType::DuckDB
37638 | DialectType::Presto
37639 | DialectType::Trino
37640 | DialectType::Athena => {
37641 if args.len() == 2 {
37642 // REGEXP_EXTRACT(val, regex) -> REGEXP_EXTRACT(val, regex, 1)
37643 args.push(Expression::number(1));
37644 Ok(Expression::Function(Box::new(Function::new(
37645 "REGEXP_EXTRACT".to_string(),
37646 args,
37647 ))))
37648 } else if args.len() == 3 {
37649 let val = args.remove(0);
37650 let regex = args.remove(0);
37651 let position = args.remove(0);
37652 let is_pos_1 = matches!(&position, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(n) if n == "1"));
37653 if is_pos_1 {
37654 Ok(Expression::Function(Box::new(Function::new(
37655 "REGEXP_EXTRACT".to_string(),
37656 vec![val, regex, Expression::number(1)],
37657 ))))
37658 } else {
37659 let substring_expr = Expression::Function(Box::new(Function::new(
37660 "SUBSTRING".to_string(),
37661 vec![val, position],
37662 )));
37663 let nullif_expr = Expression::Function(Box::new(Function::new(
37664 "NULLIF".to_string(),
37665 vec![
37666 substring_expr,
37667 Expression::Literal(Box::new(Literal::String(
37668 String::new(),
37669 ))),
37670 ],
37671 )));
37672 Ok(Expression::Function(Box::new(Function::new(
37673 "REGEXP_EXTRACT".to_string(),
37674 vec![nullif_expr, regex, Expression::number(1)],
37675 ))))
37676 }
37677 } else if args.len() == 4 {
37678 let val = args.remove(0);
37679 let regex = args.remove(0);
37680 let position = args.remove(0);
37681 let occurrence = args.remove(0);
37682 let is_pos_1 = matches!(&position, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(n) if n == "1"));
37683 let is_occ_1 = matches!(&occurrence, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(n) if n == "1"));
37684 if is_pos_1 && is_occ_1 {
37685 Ok(Expression::Function(Box::new(Function::new(
37686 "REGEXP_EXTRACT".to_string(),
37687 vec![val, regex, Expression::number(1)],
37688 ))))
37689 } else {
37690 let subject = if is_pos_1 {
37691 val
37692 } else {
37693 let substring_expr = Expression::Function(Box::new(
37694 Function::new("SUBSTRING".to_string(), vec![val, position]),
37695 ));
37696 Expression::Function(Box::new(Function::new(
37697 "NULLIF".to_string(),
37698 vec![
37699 substring_expr,
37700 Expression::Literal(Box::new(Literal::String(
37701 String::new(),
37702 ))),
37703 ],
37704 )))
37705 };
37706 let extract_all = Expression::Function(Box::new(Function::new(
37707 "REGEXP_EXTRACT_ALL".to_string(),
37708 vec![subject, regex, Expression::number(1)],
37709 )));
37710 Ok(Expression::Function(Box::new(Function::new(
37711 "ARRAY_EXTRACT".to_string(),
37712 vec![extract_all, occurrence],
37713 ))))
37714 }
37715 } else {
37716 Ok(Expression::Function(Box::new(Function {
37717 name: f.name,
37718 args,
37719 distinct: f.distinct,
37720 trailing_comments: f.trailing_comments,
37721 use_bracket_syntax: f.use_bracket_syntax,
37722 no_parens: f.no_parens,
37723 quoted: f.quoted,
37724 span: None,
37725 inferred_type: None,
37726 })))
37727 }
37728 }
37729 DialectType::Snowflake => {
37730 // BigQuery REGEXP_EXTRACT -> Snowflake REGEXP_SUBSTR
37731 Ok(Expression::Function(Box::new(Function::new(
37732 "REGEXP_SUBSTR".to_string(),
37733 args,
37734 ))))
37735 }
37736 _ => {
37737 // For other targets (Hive/Spark/BigQuery): pass through as-is
37738 // BigQuery's default group behavior matches Hive/Spark for 2-arg case
37739 Ok(Expression::Function(Box::new(Function {
37740 name: f.name,
37741 args,
37742 distinct: f.distinct,
37743 trailing_comments: f.trailing_comments,
37744 use_bracket_syntax: f.use_bracket_syntax,
37745 no_parens: f.no_parens,
37746 quoted: f.quoted,
37747 span: None,
37748 inferred_type: None,
37749 })))
37750 }
37751 }
37752 }
37753
37754 // BigQuery STRUCT(args) -> target-specific struct expression
37755 "STRUCT" => {
37756 // Convert Function args to Struct fields
37757 let mut fields: Vec<(Option<String>, Expression)> = Vec::new();
37758 for (i, arg) in args.into_iter().enumerate() {
37759 match arg {
37760 Expression::Alias(a) => {
37761 // Named field: expr AS name
37762 fields.push((Some(a.alias.name.clone()), a.this));
37763 }
37764 other => {
37765 // Unnamed field: for Spark/Hive, keep as None
37766 // For Snowflake, auto-name as _N
37767 // For DuckDB, use column name for column refs, _N for others
37768 if matches!(target, DialectType::Snowflake) {
37769 fields.push((Some(format!("_{}", i)), other));
37770 } else if matches!(target, DialectType::DuckDB) {
37771 let auto_name = match &other {
37772 Expression::Column(col) => col.name.name.clone(),
37773 _ => format!("_{}", i),
37774 };
37775 fields.push((Some(auto_name), other));
37776 } else {
37777 fields.push((None, other));
37778 }
37779 }
37780 }
37781 }
37782
37783 match target {
37784 DialectType::Snowflake => {
37785 // OBJECT_CONSTRUCT('name', value, ...)
37786 let mut oc_args = Vec::new();
37787 for (name, val) in &fields {
37788 if let Some(n) = name {
37789 oc_args.push(Expression::Literal(Box::new(Literal::String(
37790 n.clone(),
37791 ))));
37792 oc_args.push(val.clone());
37793 } else {
37794 oc_args.push(val.clone());
37795 }
37796 }
37797 Ok(Expression::Function(Box::new(Function::new(
37798 "OBJECT_CONSTRUCT".to_string(),
37799 oc_args,
37800 ))))
37801 }
37802 DialectType::DuckDB => {
37803 // {'name': value, ...}
37804 Ok(Expression::Struct(Box::new(crate::expressions::Struct {
37805 fields,
37806 })))
37807 }
37808 DialectType::Hive => {
37809 // STRUCT(val1, val2, ...) - strip aliases
37810 let hive_fields: Vec<(Option<String>, Expression)> =
37811 fields.into_iter().map(|(_, v)| (None, v)).collect();
37812 Ok(Expression::Struct(Box::new(crate::expressions::Struct {
37813 fields: hive_fields,
37814 })))
37815 }
37816 DialectType::Spark | DialectType::Databricks => {
37817 // Use Expression::Struct to bypass Spark target transform auto-naming
37818 Ok(Expression::Struct(Box::new(crate::expressions::Struct {
37819 fields,
37820 })))
37821 }
37822 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
37823 // Check if all fields are named AND all have inferable types - if so, wrap in CAST(ROW(...) AS ROW(name TYPE, ...))
37824 let all_named =
37825 !fields.is_empty() && fields.iter().all(|(name, _)| name.is_some());
37826 let all_types_inferable = all_named
37827 && fields
37828 .iter()
37829 .all(|(_, val)| Self::can_infer_presto_type(val));
37830 let row_args: Vec<Expression> =
37831 fields.iter().map(|(_, v)| v.clone()).collect();
37832 let row_expr = Expression::Function(Box::new(Function::new(
37833 "ROW".to_string(),
37834 row_args,
37835 )));
37836 if all_named && all_types_inferable {
37837 // Build ROW type with inferred types
37838 let mut row_type_fields = Vec::new();
37839 for (name, val) in &fields {
37840 if let Some(n) = name {
37841 let type_str = Self::infer_sql_type_for_presto(val);
37842 row_type_fields.push(crate::expressions::StructField::new(
37843 n.clone(),
37844 crate::expressions::DataType::Custom { name: type_str },
37845 ));
37846 }
37847 }
37848 let row_type = crate::expressions::DataType::Struct {
37849 fields: row_type_fields,
37850 nested: true,
37851 };
37852 Ok(Expression::Cast(Box::new(Cast {
37853 this: row_expr,
37854 to: row_type,
37855 trailing_comments: Vec::new(),
37856 double_colon_syntax: false,
37857 format: None,
37858 default: None,
37859 inferred_type: None,
37860 })))
37861 } else {
37862 Ok(row_expr)
37863 }
37864 }
37865 _ => {
37866 // Default: keep as STRUCT function with original args
37867 let mut new_args = Vec::new();
37868 for (name, val) in fields {
37869 if let Some(n) = name {
37870 new_args.push(Expression::Alias(Box::new(
37871 crate::expressions::Alias::new(val, Identifier::new(n)),
37872 )));
37873 } else {
37874 new_args.push(val);
37875 }
37876 }
37877 Ok(Expression::Function(Box::new(Function::new(
37878 "STRUCT".to_string(),
37879 new_args,
37880 ))))
37881 }
37882 }
37883 }
37884
37885 // ROUND(x, n, 'ROUND_HALF_EVEN') -> ROUND_EVEN(x, n) for DuckDB
37886 "ROUND" if args.len() == 3 => {
37887 let x = args.remove(0);
37888 let n = args.remove(0);
37889 let mode = args.remove(0);
37890 // Check if mode is 'ROUND_HALF_EVEN'
37891 let is_half_even = matches!(&mode, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(s) if s.eq_ignore_ascii_case("ROUND_HALF_EVEN")));
37892 if is_half_even && matches!(target, DialectType::DuckDB) {
37893 Ok(Expression::Function(Box::new(Function::new(
37894 "ROUND_EVEN".to_string(),
37895 vec![x, n],
37896 ))))
37897 } else {
37898 // Pass through with all args
37899 Ok(Expression::Function(Box::new(Function::new(
37900 "ROUND".to_string(),
37901 vec![x, n, mode],
37902 ))))
37903 }
37904 }
37905
37906 // MAKE_INTERVAL(year, month, named_args...) -> INTERVAL string for Snowflake/DuckDB
37907 "MAKE_INTERVAL" => {
37908 // MAKE_INTERVAL(1, 2, minute => 5, day => 3)
37909 // The positional args are: year, month
37910 // Named args are: day =>, minute =>, etc.
37911 // For Snowflake: INTERVAL '1 year, 2 month, 5 minute, 3 day'
37912 // For DuckDB: INTERVAL '1 year 2 month 5 minute 3 day'
37913 // For BigQuery->BigQuery: reorder named args (day before minute)
37914 if matches!(target, DialectType::Snowflake | DialectType::DuckDB) {
37915 let mut parts: Vec<(String, String)> = Vec::new();
37916 let mut pos_idx = 0;
37917 let pos_units = ["year", "month"];
37918 for arg in &args {
37919 if let Expression::NamedArgument(na) = arg {
37920 // Named arg like minute => 5
37921 let unit = na.name.name.clone();
37922 if let Expression::Literal(lit) = &na.value {
37923 if let Literal::Number(n) = lit.as_ref() {
37924 parts.push((unit, n.clone()));
37925 }
37926 }
37927 } else if pos_idx < pos_units.len() {
37928 if let Expression::Literal(lit) = arg {
37929 if let Literal::Number(n) = lit.as_ref() {
37930 parts.push((pos_units[pos_idx].to_string(), n.clone()));
37931 }
37932 }
37933 pos_idx += 1;
37934 }
37935 }
37936 // Don't sort - preserve original argument order
37937 let separator = if matches!(target, DialectType::Snowflake) {
37938 ", "
37939 } else {
37940 " "
37941 };
37942 let interval_str = parts
37943 .iter()
37944 .map(|(u, v)| format!("{} {}", v, u))
37945 .collect::<Vec<_>>()
37946 .join(separator);
37947 Ok(Expression::Interval(Box::new(
37948 crate::expressions::Interval {
37949 this: Some(Expression::Literal(Box::new(Literal::String(
37950 interval_str,
37951 )))),
37952 unit: None,
37953 },
37954 )))
37955 } else if matches!(target, DialectType::BigQuery) {
37956 // BigQuery->BigQuery: reorder named args (day, minute, etc.)
37957 let mut positional = Vec::new();
37958 let mut named: Vec<(
37959 String,
37960 Expression,
37961 crate::expressions::NamedArgSeparator,
37962 )> = Vec::new();
37963 let _pos_units = ["year", "month"];
37964 let mut _pos_idx = 0;
37965 for arg in args {
37966 if let Expression::NamedArgument(na) = arg {
37967 named.push((na.name.name.clone(), na.value, na.separator));
37968 } else {
37969 positional.push(arg);
37970 _pos_idx += 1;
37971 }
37972 }
37973 // Sort named args by: day, hour, minute, second
37974 let unit_order = |u: &str| -> usize {
37975 match u.to_ascii_lowercase().as_str() {
37976 "day" => 0,
37977 "hour" => 1,
37978 "minute" => 2,
37979 "second" => 3,
37980 _ => 4,
37981 }
37982 };
37983 named.sort_by_key(|(u, _, _)| unit_order(u));
37984 let mut result_args = positional;
37985 for (name, value, sep) in named {
37986 result_args.push(Expression::NamedArgument(Box::new(
37987 crate::expressions::NamedArgument {
37988 name: Identifier::new(&name),
37989 value,
37990 separator: sep,
37991 },
37992 )));
37993 }
37994 Ok(Expression::Function(Box::new(Function::new(
37995 "MAKE_INTERVAL".to_string(),
37996 result_args,
37997 ))))
37998 } else {
37999 Ok(Expression::Function(Box::new(Function::new(
38000 "MAKE_INTERVAL".to_string(),
38001 args,
38002 ))))
38003 }
38004 }
38005
38006 // ARRAY_TO_STRING(array, sep, null_text) -> ARRAY_TO_STRING(LIST_TRANSFORM(array, x -> COALESCE(x, null_text)), sep) for DuckDB
38007 "ARRAY_TO_STRING" if args.len() == 3 => {
38008 let arr = args.remove(0);
38009 let sep = args.remove(0);
38010 let null_text = args.remove(0);
38011 match target {
38012 DialectType::DuckDB => {
38013 // LIST_TRANSFORM(array, x -> COALESCE(x, null_text))
38014 let _lambda_param =
38015 Expression::Identifier(crate::expressions::Identifier::new("x"));
38016 let coalesce =
38017 Expression::Coalesce(Box::new(crate::expressions::VarArgFunc {
38018 original_name: None,
38019 expressions: vec![
38020 Expression::Identifier(crate::expressions::Identifier::new(
38021 "x",
38022 )),
38023 null_text,
38024 ],
38025 inferred_type: None,
38026 }));
38027 let lambda = Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
38028 parameters: vec![crate::expressions::Identifier::new("x")],
38029 body: coalesce,
38030 colon: false,
38031 parameter_types: vec![],
38032 }));
38033 let list_transform = Expression::Function(Box::new(Function::new(
38034 "LIST_TRANSFORM".to_string(),
38035 vec![arr, lambda],
38036 )));
38037 Ok(Expression::Function(Box::new(Function::new(
38038 "ARRAY_TO_STRING".to_string(),
38039 vec![list_transform, sep],
38040 ))))
38041 }
38042 _ => Ok(Expression::Function(Box::new(Function::new(
38043 "ARRAY_TO_STRING".to_string(),
38044 vec![arr, sep, null_text],
38045 )))),
38046 }
38047 }
38048
38049 // LENGTH(x) -> CASE TYPEOF(x) ... for DuckDB
38050 "LENGTH" if args.len() == 1 => {
38051 let arg = args.remove(0);
38052 match target {
38053 DialectType::DuckDB => {
38054 // CASE TYPEOF(foo) WHEN 'BLOB' THEN OCTET_LENGTH(CAST(foo AS BLOB)) ELSE LENGTH(CAST(foo AS TEXT)) END
38055 let typeof_func = Expression::Function(Box::new(Function::new(
38056 "TYPEOF".to_string(),
38057 vec![arg.clone()],
38058 )));
38059 let blob_cast = Expression::Cast(Box::new(Cast {
38060 this: arg.clone(),
38061 to: DataType::VarBinary { length: None },
38062 trailing_comments: vec![],
38063 double_colon_syntax: false,
38064 format: None,
38065 default: None,
38066 inferred_type: None,
38067 }));
38068 let octet_length = Expression::Function(Box::new(Function::new(
38069 "OCTET_LENGTH".to_string(),
38070 vec![blob_cast],
38071 )));
38072 let text_cast = Expression::Cast(Box::new(Cast {
38073 this: arg,
38074 to: DataType::Text,
38075 trailing_comments: vec![],
38076 double_colon_syntax: false,
38077 format: None,
38078 default: None,
38079 inferred_type: None,
38080 }));
38081 let length_text = Expression::Function(Box::new(Function::new(
38082 "LENGTH".to_string(),
38083 vec![text_cast],
38084 )));
38085 Ok(Expression::Case(Box::new(crate::expressions::Case {
38086 operand: Some(typeof_func),
38087 whens: vec![(
38088 Expression::Literal(Box::new(Literal::String("BLOB".to_string()))),
38089 octet_length,
38090 )],
38091 else_: Some(length_text),
38092 comments: Vec::new(),
38093 inferred_type: None,
38094 })))
38095 }
38096 _ => Ok(Expression::Function(Box::new(Function::new(
38097 "LENGTH".to_string(),
38098 vec![arg],
38099 )))),
38100 }
38101 }
38102
38103 // PERCENTILE_CONT(x, fraction RESPECT NULLS) -> QUANTILE_CONT(x, fraction) for DuckDB
38104 "PERCENTILE_CONT" if args.len() >= 2 && matches!(source, DialectType::BigQuery) => {
38105 // BigQuery PERCENTILE_CONT(x, fraction [RESPECT|IGNORE NULLS]) OVER ()
38106 // The args should be [x, fraction] with the null handling stripped
38107 // For DuckDB: QUANTILE_CONT(x, fraction)
38108 // For Spark: PERCENTILE_CONT(x, fraction) RESPECT NULLS (handled at window level)
38109 match target {
38110 DialectType::DuckDB => {
38111 // Strip down to just 2 args, rename to QUANTILE_CONT
38112 let x = args[0].clone();
38113 let frac = args[1].clone();
38114 Ok(Expression::Function(Box::new(Function::new(
38115 "QUANTILE_CONT".to_string(),
38116 vec![x, frac],
38117 ))))
38118 }
38119 _ => Ok(Expression::Function(Box::new(Function::new(
38120 "PERCENTILE_CONT".to_string(),
38121 args,
38122 )))),
38123 }
38124 }
38125
38126 // All others: pass through
38127 _ => Ok(Expression::Function(Box::new(Function {
38128 name: f.name,
38129 args,
38130 distinct: f.distinct,
38131 trailing_comments: f.trailing_comments,
38132 use_bracket_syntax: f.use_bracket_syntax,
38133 no_parens: f.no_parens,
38134 quoted: f.quoted,
38135 span: None,
38136 inferred_type: None,
38137 }))),
38138 }
38139 }
38140
38141 /// Check if we can reliably infer the SQL type for Presto/Trino ROW CAST.
38142 /// Returns false for column references and other non-literal expressions where the type is unknown.
38143 fn can_infer_presto_type(expr: &Expression) -> bool {
38144 match expr {
38145 Expression::Literal(_) => true,
38146 Expression::Boolean(_) => true,
38147 Expression::Array(_) | Expression::ArrayFunc(_) => true,
38148 Expression::Struct(_) | Expression::StructFunc(_) => true,
38149 Expression::Function(f) => {
38150 f.name.eq_ignore_ascii_case("STRUCT")
38151 || f.name.eq_ignore_ascii_case("ROW")
38152 || f.name.eq_ignore_ascii_case("CURRENT_DATE")
38153 || f.name.eq_ignore_ascii_case("CURRENT_TIMESTAMP")
38154 || f.name.eq_ignore_ascii_case("NOW")
38155 }
38156 Expression::Cast(_) => true,
38157 Expression::Neg(inner) => Self::can_infer_presto_type(&inner.this),
38158 _ => false,
38159 }
38160 }
38161
38162 /// Infer SQL type name for a Presto/Trino ROW CAST from a literal expression
38163 fn infer_sql_type_for_presto(expr: &Expression) -> String {
38164 use crate::expressions::Literal;
38165 match expr {
38166 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)) => {
38167 "VARCHAR".to_string()
38168 }
38169 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(_)) => {
38170 let Literal::Number(n) = lit.as_ref() else {
38171 unreachable!()
38172 };
38173 if n.contains('.') {
38174 "DOUBLE".to_string()
38175 } else {
38176 "INTEGER".to_string()
38177 }
38178 }
38179 Expression::Boolean(_) => "BOOLEAN".to_string(),
38180 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Date(_)) => {
38181 "DATE".to_string()
38182 }
38183 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Timestamp(_)) => {
38184 "TIMESTAMP".to_string()
38185 }
38186 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Datetime(_)) => {
38187 "TIMESTAMP".to_string()
38188 }
38189 Expression::Array(_) | Expression::ArrayFunc(_) => "ARRAY(VARCHAR)".to_string(),
38190 Expression::Struct(_) | Expression::StructFunc(_) => "ROW".to_string(),
38191 Expression::Function(f) => {
38192 if f.name.eq_ignore_ascii_case("STRUCT") || f.name.eq_ignore_ascii_case("ROW") {
38193 "ROW".to_string()
38194 } else if f.name.eq_ignore_ascii_case("CURRENT_DATE") {
38195 "DATE".to_string()
38196 } else if f.name.eq_ignore_ascii_case("CURRENT_TIMESTAMP")
38197 || f.name.eq_ignore_ascii_case("NOW")
38198 {
38199 "TIMESTAMP".to_string()
38200 } else {
38201 "VARCHAR".to_string()
38202 }
38203 }
38204 Expression::Cast(c) => {
38205 // If already cast, use the target type
38206 Self::data_type_to_presto_string(&c.to)
38207 }
38208 _ => "VARCHAR".to_string(),
38209 }
38210 }
38211
38212 /// Convert a DataType to its Presto/Trino string representation for ROW type
38213 fn data_type_to_presto_string(dt: &crate::expressions::DataType) -> String {
38214 use crate::expressions::DataType;
38215 match dt {
38216 DataType::VarChar { .. } | DataType::Text | DataType::String { .. } => {
38217 "VARCHAR".to_string()
38218 }
38219 DataType::Int { .. }
38220 | DataType::BigInt { .. }
38221 | DataType::SmallInt { .. }
38222 | DataType::TinyInt { .. } => "INTEGER".to_string(),
38223 DataType::Float { .. } | DataType::Double { .. } => "DOUBLE".to_string(),
38224 DataType::Boolean => "BOOLEAN".to_string(),
38225 DataType::Date => "DATE".to_string(),
38226 DataType::Timestamp { .. } => "TIMESTAMP".to_string(),
38227 DataType::Struct { fields, .. } => {
38228 let field_strs: Vec<String> = fields
38229 .iter()
38230 .map(|f| {
38231 format!(
38232 "{} {}",
38233 f.name,
38234 Self::data_type_to_presto_string(&f.data_type)
38235 )
38236 })
38237 .collect();
38238 format!("ROW({})", field_strs.join(", "))
38239 }
38240 DataType::Array { element_type, .. } => {
38241 format!("ARRAY({})", Self::data_type_to_presto_string(element_type))
38242 }
38243 DataType::Custom { name } => {
38244 // Pass through custom type names (e.g., "INTEGER", "VARCHAR" from earlier inference)
38245 name.clone()
38246 }
38247 _ => "VARCHAR".to_string(),
38248 }
38249 }
38250
38251 /// Convert IntervalUnit to string
38252 fn interval_unit_to_string(unit: &crate::expressions::IntervalUnit) -> &'static str {
38253 match unit {
38254 crate::expressions::IntervalUnit::Year => "YEAR",
38255 crate::expressions::IntervalUnit::Quarter => "QUARTER",
38256 crate::expressions::IntervalUnit::Month => "MONTH",
38257 crate::expressions::IntervalUnit::Week => "WEEK",
38258 crate::expressions::IntervalUnit::Day => "DAY",
38259 crate::expressions::IntervalUnit::Hour => "HOUR",
38260 crate::expressions::IntervalUnit::Minute => "MINUTE",
38261 crate::expressions::IntervalUnit::Second => "SECOND",
38262 crate::expressions::IntervalUnit::Millisecond => "MILLISECOND",
38263 crate::expressions::IntervalUnit::Microsecond => "MICROSECOND",
38264 crate::expressions::IntervalUnit::Nanosecond => "NANOSECOND",
38265 }
38266 }
38267
38268 /// Extract unit string from an expression (uppercased)
38269 fn get_unit_str_static(expr: &Expression) -> String {
38270 use crate::expressions::Literal;
38271 match expr {
38272 Expression::Identifier(id) => id.name.to_ascii_uppercase(),
38273 Expression::Var(v) => v.this.to_ascii_uppercase(),
38274 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)) => {
38275 let Literal::String(s) = lit.as_ref() else {
38276 unreachable!()
38277 };
38278 s.to_ascii_uppercase()
38279 }
38280 Expression::Column(col) => col.name.name.to_ascii_uppercase(),
38281 Expression::Function(f) => {
38282 let base = f.name.to_ascii_uppercase();
38283 if !f.args.is_empty() {
38284 let inner = Self::get_unit_str_static(&f.args[0]);
38285 format!("{}({})", base, inner)
38286 } else {
38287 base
38288 }
38289 }
38290 _ => "DAY".to_string(),
38291 }
38292 }
38293
38294 /// Parse unit string to IntervalUnit
38295 fn parse_interval_unit_static(s: &str) -> crate::expressions::IntervalUnit {
38296 match s {
38297 "YEAR" | "YY" | "YYYY" => crate::expressions::IntervalUnit::Year,
38298 "QUARTER" | "QQ" | "Q" => crate::expressions::IntervalUnit::Quarter,
38299 "MONTH" | "MONTHS" | "MON" | "MONS" | "MM" | "M" => {
38300 crate::expressions::IntervalUnit::Month
38301 }
38302 "WEEK" | "WK" | "WW" | "ISOWEEK" => crate::expressions::IntervalUnit::Week,
38303 "DAY" | "DD" | "D" | "DY" => crate::expressions::IntervalUnit::Day,
38304 "HOUR" | "HH" => crate::expressions::IntervalUnit::Hour,
38305 "MINUTE" | "MI" | "N" => crate::expressions::IntervalUnit::Minute,
38306 "SECOND" | "SS" | "S" => crate::expressions::IntervalUnit::Second,
38307 "MILLISECOND" | "MS" => crate::expressions::IntervalUnit::Millisecond,
38308 "MICROSECOND" | "MCS" | "US" => crate::expressions::IntervalUnit::Microsecond,
38309 _ if s.starts_with("WEEK(") => crate::expressions::IntervalUnit::Week,
38310 _ => crate::expressions::IntervalUnit::Day,
38311 }
38312 }
38313
38314 /// Convert expression to simple string for interval building
38315 fn expr_to_string_static(expr: &Expression) -> String {
38316 use crate::expressions::Literal;
38317 match expr {
38318 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(_)) => {
38319 let Literal::Number(s) = lit.as_ref() else {
38320 unreachable!()
38321 };
38322 s.clone()
38323 }
38324 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)) => {
38325 let Literal::String(s) = lit.as_ref() else {
38326 unreachable!()
38327 };
38328 s.clone()
38329 }
38330 Expression::Identifier(id) => id.name.clone(),
38331 Expression::Neg(f) => format!("-{}", Self::expr_to_string_static(&f.this)),
38332 _ => "1".to_string(),
38333 }
38334 }
38335
38336 /// Extract a simple string representation from a literal expression
38337 fn expr_to_string(expr: &Expression) -> String {
38338 use crate::expressions::Literal;
38339 match expr {
38340 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(_)) => {
38341 let Literal::Number(s) = lit.as_ref() else {
38342 unreachable!()
38343 };
38344 s.clone()
38345 }
38346 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)) => {
38347 let Literal::String(s) = lit.as_ref() else {
38348 unreachable!()
38349 };
38350 s.clone()
38351 }
38352 Expression::Neg(f) => format!("-{}", Self::expr_to_string(&f.this)),
38353 Expression::Identifier(id) => id.name.clone(),
38354 _ => "1".to_string(),
38355 }
38356 }
38357
38358 /// Quote an interval value expression as a string literal if it's a number (or negated number)
38359 fn quote_interval_val(expr: &Expression) -> Expression {
38360 use crate::expressions::Literal;
38361 match expr {
38362 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(_)) => {
38363 let Literal::Number(n) = lit.as_ref() else {
38364 unreachable!()
38365 };
38366 Expression::Literal(Box::new(Literal::String(n.clone())))
38367 }
38368 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)) => expr.clone(),
38369 Expression::Neg(inner) => {
38370 if let Expression::Literal(lit) = &inner.this {
38371 if let Literal::Number(n) = lit.as_ref() {
38372 Expression::Literal(Box::new(Literal::String(format!("-{}", n))))
38373 } else {
38374 inner.this.clone()
38375 }
38376 } else {
38377 expr.clone()
38378 }
38379 }
38380 _ => expr.clone(),
38381 }
38382 }
38383
38384 /// Check if a timestamp string contains timezone info (offset like +02:00, or named timezone)
38385 fn timestamp_string_has_timezone(ts: &str) -> bool {
38386 let trimmed = ts.trim();
38387 // Check for numeric timezone offsets: +N, -N, +NN:NN, -NN:NN at end
38388 if let Some(last_space) = trimmed.rfind(' ') {
38389 let suffix = &trimmed[last_space + 1..];
38390 if (suffix.starts_with('+') || suffix.starts_with('-')) && suffix.len() > 1 {
38391 let rest = &suffix[1..];
38392 if rest.chars().all(|c| c.is_ascii_digit() || c == ':') {
38393 return true;
38394 }
38395 }
38396 }
38397 // Check for named timezone abbreviations
38398 let ts_lower = trimmed.to_ascii_lowercase();
38399 let tz_abbrevs = [" utc", " gmt", " cet", " est", " pst", " cst", " mst"];
38400 for abbrev in &tz_abbrevs {
38401 if ts_lower.ends_with(abbrev) {
38402 return true;
38403 }
38404 }
38405 false
38406 }
38407
38408 /// Maybe CAST timestamp literal to TIMESTAMPTZ for Snowflake
38409 fn maybe_cast_ts_to_tz(expr: Expression, func_name: &str) -> Expression {
38410 use crate::expressions::{Cast, DataType, Literal};
38411 match expr {
38412 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Timestamp(_)) => {
38413 let Literal::Timestamp(s) = lit.as_ref() else {
38414 unreachable!()
38415 };
38416 let tz = func_name.starts_with("TIMESTAMP");
38417 Expression::Cast(Box::new(Cast {
38418 this: Expression::Literal(Box::new(Literal::String(s.clone()))),
38419 to: if tz {
38420 DataType::Timestamp {
38421 timezone: true,
38422 precision: None,
38423 }
38424 } else {
38425 DataType::Timestamp {
38426 timezone: false,
38427 precision: None,
38428 }
38429 },
38430 trailing_comments: vec![],
38431 double_colon_syntax: false,
38432 format: None,
38433 default: None,
38434 inferred_type: None,
38435 }))
38436 }
38437 other => other,
38438 }
38439 }
38440
38441 /// Maybe CAST timestamp literal to TIMESTAMP (no tz)
38442 fn maybe_cast_ts(expr: Expression) -> Expression {
38443 use crate::expressions::{Cast, DataType, Literal};
38444 match expr {
38445 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Timestamp(_)) => {
38446 let Literal::Timestamp(s) = lit.as_ref() else {
38447 unreachable!()
38448 };
38449 Expression::Cast(Box::new(Cast {
38450 this: Expression::Literal(Box::new(Literal::String(s.clone()))),
38451 to: DataType::Timestamp {
38452 timezone: false,
38453 precision: None,
38454 },
38455 trailing_comments: vec![],
38456 double_colon_syntax: false,
38457 format: None,
38458 default: None,
38459 inferred_type: None,
38460 }))
38461 }
38462 other => other,
38463 }
38464 }
38465
38466 /// Convert DATE 'x' literal to CAST('x' AS DATE)
38467 fn date_literal_to_cast(expr: Expression) -> Expression {
38468 use crate::expressions::{Cast, DataType, Literal};
38469 match expr {
38470 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Date(_)) => {
38471 let Literal::Date(s) = lit.as_ref() else {
38472 unreachable!()
38473 };
38474 Expression::Cast(Box::new(Cast {
38475 this: Expression::Literal(Box::new(Literal::String(s.clone()))),
38476 to: DataType::Date,
38477 trailing_comments: vec![],
38478 double_colon_syntax: false,
38479 format: None,
38480 default: None,
38481 inferred_type: None,
38482 }))
38483 }
38484 other => other,
38485 }
38486 }
38487
38488 /// Ensure an expression that should be a date is CAST(... AS DATE).
38489 /// Handles both DATE literals and string literals that look like dates.
38490 fn ensure_cast_date(expr: Expression) -> Expression {
38491 use crate::expressions::{Cast, DataType, Literal};
38492 match expr {
38493 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Date(_)) => {
38494 let Literal::Date(s) = lit.as_ref() else {
38495 unreachable!()
38496 };
38497 Expression::Cast(Box::new(Cast {
38498 this: Expression::Literal(Box::new(Literal::String(s.clone()))),
38499 to: DataType::Date,
38500 trailing_comments: vec![],
38501 double_colon_syntax: false,
38502 format: None,
38503 default: None,
38504 inferred_type: None,
38505 }))
38506 }
38507 Expression::Literal(ref lit) if matches!(lit.as_ref(), Literal::String(ref _s)) => {
38508 // String literal that should be a date -> CAST('s' AS DATE)
38509 Expression::Cast(Box::new(Cast {
38510 this: expr,
38511 to: DataType::Date,
38512 trailing_comments: vec![],
38513 double_colon_syntax: false,
38514 format: None,
38515 default: None,
38516 inferred_type: None,
38517 }))
38518 }
38519 // Already a CAST or other expression -> leave as-is
38520 other => other,
38521 }
38522 }
38523
38524 /// Force CAST(expr AS DATE) for any expression (not just literals)
38525 /// Skips if the expression is already a CAST to DATE
38526 fn force_cast_date(expr: Expression) -> Expression {
38527 use crate::expressions::{Cast, DataType};
38528 // If it's already a CAST to DATE, don't double-wrap
38529 if let Expression::Cast(ref c) = expr {
38530 if matches!(c.to, DataType::Date) {
38531 return expr;
38532 }
38533 }
38534 Expression::Cast(Box::new(Cast {
38535 this: expr,
38536 to: DataType::Date,
38537 trailing_comments: vec![],
38538 double_colon_syntax: false,
38539 format: None,
38540 default: None,
38541 inferred_type: None,
38542 }))
38543 }
38544
38545 /// Internal TO_DATE function that won't be converted to CAST by the Snowflake handler.
38546 /// Uses the name `_POLYGLOT_TO_DATE` which is not recognized by the TO_DATE -> CAST logic.
38547 /// The Snowflake DATEDIFF handler converts these back to TO_DATE.
38548 const PRESERVED_TO_DATE: &'static str = "_POLYGLOT_TO_DATE";
38549
38550 fn ensure_to_date_preserved(expr: Expression) -> Expression {
38551 use crate::expressions::{Function, Literal};
38552 if matches!(expr, Expression::Literal(ref lit) if matches!(lit.as_ref(), Literal::String(_)))
38553 {
38554 Expression::Function(Box::new(Function::new(
38555 Self::PRESERVED_TO_DATE.to_string(),
38556 vec![expr],
38557 )))
38558 } else {
38559 expr
38560 }
38561 }
38562
38563 /// TRY_CAST(expr AS DATE) - used for DuckDB when TO_DATE is unwrapped
38564 fn try_cast_date(expr: Expression) -> Expression {
38565 use crate::expressions::{Cast, DataType};
38566 Expression::TryCast(Box::new(Cast {
38567 this: expr,
38568 to: DataType::Date,
38569 trailing_comments: vec![],
38570 double_colon_syntax: false,
38571 format: None,
38572 default: None,
38573 inferred_type: None,
38574 }))
38575 }
38576
38577 /// CAST(CAST(expr AS TIMESTAMP) AS DATE) - used when Hive string dates need to be cast
38578 fn double_cast_timestamp_date(expr: Expression) -> Expression {
38579 use crate::expressions::{Cast, DataType};
38580 let inner = Expression::Cast(Box::new(Cast {
38581 this: expr,
38582 to: DataType::Timestamp {
38583 timezone: false,
38584 precision: None,
38585 },
38586 trailing_comments: vec![],
38587 double_colon_syntax: false,
38588 format: None,
38589 default: None,
38590 inferred_type: None,
38591 }));
38592 Expression::Cast(Box::new(Cast {
38593 this: inner,
38594 to: DataType::Date,
38595 trailing_comments: vec![],
38596 double_colon_syntax: false,
38597 format: None,
38598 default: None,
38599 inferred_type: None,
38600 }))
38601 }
38602
38603 /// CAST(CAST(expr AS DATETIME) AS DATE) - BigQuery variant
38604 fn double_cast_datetime_date(expr: Expression) -> Expression {
38605 use crate::expressions::{Cast, DataType};
38606 let inner = Expression::Cast(Box::new(Cast {
38607 this: expr,
38608 to: DataType::Custom {
38609 name: "DATETIME".to_string(),
38610 },
38611 trailing_comments: vec![],
38612 double_colon_syntax: false,
38613 format: None,
38614 default: None,
38615 inferred_type: None,
38616 }));
38617 Expression::Cast(Box::new(Cast {
38618 this: inner,
38619 to: DataType::Date,
38620 trailing_comments: vec![],
38621 double_colon_syntax: false,
38622 format: None,
38623 default: None,
38624 inferred_type: None,
38625 }))
38626 }
38627
38628 /// CAST(CAST(expr AS DATETIME2) AS DATE) - TSQL variant
38629 fn double_cast_datetime2_date(expr: Expression) -> Expression {
38630 use crate::expressions::{Cast, DataType};
38631 let inner = Expression::Cast(Box::new(Cast {
38632 this: expr,
38633 to: DataType::Custom {
38634 name: "DATETIME2".to_string(),
38635 },
38636 trailing_comments: vec![],
38637 double_colon_syntax: false,
38638 format: None,
38639 default: None,
38640 inferred_type: None,
38641 }));
38642 Expression::Cast(Box::new(Cast {
38643 this: inner,
38644 to: DataType::Date,
38645 trailing_comments: vec![],
38646 double_colon_syntax: false,
38647 format: None,
38648 default: None,
38649 inferred_type: None,
38650 }))
38651 }
38652
38653 /// Convert Hive/Java-style date format strings to C-style (strftime) format
38654 /// e.g., "yyyy-MM-dd'T'HH" -> "%Y-%m-%d'T'%H"
38655 fn hive_format_to_c_format(fmt: &str) -> String {
38656 let mut result = String::new();
38657 let chars: Vec<char> = fmt.chars().collect();
38658 let mut i = 0;
38659 while i < chars.len() {
38660 match chars[i] {
38661 'y' => {
38662 let mut count = 0;
38663 while i < chars.len() && chars[i] == 'y' {
38664 count += 1;
38665 i += 1;
38666 }
38667 if count >= 4 {
38668 result.push_str("%Y");
38669 } else if count == 2 {
38670 result.push_str("%y");
38671 } else {
38672 result.push_str("%Y");
38673 }
38674 }
38675 'M' => {
38676 let mut count = 0;
38677 while i < chars.len() && chars[i] == 'M' {
38678 count += 1;
38679 i += 1;
38680 }
38681 if count >= 3 {
38682 result.push_str("%b");
38683 } else if count == 2 {
38684 result.push_str("%m");
38685 } else {
38686 result.push_str("%m");
38687 }
38688 }
38689 'd' => {
38690 let mut _count = 0;
38691 while i < chars.len() && chars[i] == 'd' {
38692 _count += 1;
38693 i += 1;
38694 }
38695 result.push_str("%d");
38696 }
38697 'H' => {
38698 let mut _count = 0;
38699 while i < chars.len() && chars[i] == 'H' {
38700 _count += 1;
38701 i += 1;
38702 }
38703 result.push_str("%H");
38704 }
38705 'h' => {
38706 let mut _count = 0;
38707 while i < chars.len() && chars[i] == 'h' {
38708 _count += 1;
38709 i += 1;
38710 }
38711 result.push_str("%I");
38712 }
38713 'm' => {
38714 let mut _count = 0;
38715 while i < chars.len() && chars[i] == 'm' {
38716 _count += 1;
38717 i += 1;
38718 }
38719 result.push_str("%M");
38720 }
38721 's' => {
38722 let mut _count = 0;
38723 while i < chars.len() && chars[i] == 's' {
38724 _count += 1;
38725 i += 1;
38726 }
38727 result.push_str("%S");
38728 }
38729 'S' => {
38730 // Fractional seconds - skip
38731 while i < chars.len() && chars[i] == 'S' {
38732 i += 1;
38733 }
38734 result.push_str("%f");
38735 }
38736 'a' => {
38737 // AM/PM
38738 while i < chars.len() && chars[i] == 'a' {
38739 i += 1;
38740 }
38741 result.push_str("%p");
38742 }
38743 'E' => {
38744 let mut count = 0;
38745 while i < chars.len() && chars[i] == 'E' {
38746 count += 1;
38747 i += 1;
38748 }
38749 if count >= 4 {
38750 result.push_str("%A");
38751 } else {
38752 result.push_str("%a");
38753 }
38754 }
38755 '\'' => {
38756 // Quoted literal text - pass through the quotes and content
38757 result.push('\'');
38758 i += 1;
38759 while i < chars.len() && chars[i] != '\'' {
38760 result.push(chars[i]);
38761 i += 1;
38762 }
38763 if i < chars.len() {
38764 result.push('\'');
38765 i += 1;
38766 }
38767 }
38768 c => {
38769 result.push(c);
38770 i += 1;
38771 }
38772 }
38773 }
38774 result
38775 }
38776
38777 /// Convert Hive/Java format to Presto format (uses %T for HH:mm:ss)
38778 fn hive_format_to_presto_format(fmt: &str) -> String {
38779 let c_fmt = Self::hive_format_to_c_format(fmt);
38780 // Presto uses %T for HH:MM:SS
38781 c_fmt.replace("%H:%M:%S", "%T")
38782 }
38783
38784 /// Ensure a timestamp-like expression for DuckDB with CAST(... AS TIMESTAMP)
38785 fn ensure_cast_timestamp(expr: Expression) -> Expression {
38786 use crate::expressions::{Cast, DataType, Literal};
38787 match expr {
38788 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Timestamp(_)) => {
38789 let Literal::Timestamp(s) = lit.as_ref() else {
38790 unreachable!()
38791 };
38792 Expression::Cast(Box::new(Cast {
38793 this: Expression::Literal(Box::new(Literal::String(s.clone()))),
38794 to: DataType::Timestamp {
38795 timezone: false,
38796 precision: None,
38797 },
38798 trailing_comments: vec![],
38799 double_colon_syntax: false,
38800 format: None,
38801 default: None,
38802 inferred_type: None,
38803 }))
38804 }
38805 Expression::Literal(ref lit) if matches!(lit.as_ref(), Literal::String(ref _s)) => {
38806 Expression::Cast(Box::new(Cast {
38807 this: expr,
38808 to: DataType::Timestamp {
38809 timezone: false,
38810 precision: None,
38811 },
38812 trailing_comments: vec![],
38813 double_colon_syntax: false,
38814 format: None,
38815 default: None,
38816 inferred_type: None,
38817 }))
38818 }
38819 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Datetime(_)) => {
38820 let Literal::Datetime(s) = lit.as_ref() else {
38821 unreachable!()
38822 };
38823 Expression::Cast(Box::new(Cast {
38824 this: Expression::Literal(Box::new(Literal::String(s.clone()))),
38825 to: DataType::Timestamp {
38826 timezone: false,
38827 precision: None,
38828 },
38829 trailing_comments: vec![],
38830 double_colon_syntax: false,
38831 format: None,
38832 default: None,
38833 inferred_type: None,
38834 }))
38835 }
38836 other => other,
38837 }
38838 }
38839
38840 /// Force CAST to TIMESTAMP for any expression (not just literals)
38841 /// Used when transpiling from Redshift/TSQL where DATEDIFF/DATEADD args need explicit timestamp cast
38842 fn force_cast_timestamp(expr: Expression) -> Expression {
38843 use crate::expressions::{Cast, DataType};
38844 // Don't double-wrap if already a CAST to TIMESTAMP
38845 if let Expression::Cast(ref c) = expr {
38846 if matches!(c.to, DataType::Timestamp { .. }) {
38847 return expr;
38848 }
38849 }
38850 Expression::Cast(Box::new(Cast {
38851 this: expr,
38852 to: DataType::Timestamp {
38853 timezone: false,
38854 precision: None,
38855 },
38856 trailing_comments: vec![],
38857 double_colon_syntax: false,
38858 format: None,
38859 default: None,
38860 inferred_type: None,
38861 }))
38862 }
38863
38864 /// Ensure a timestamp-like expression for DuckDB with CAST(... AS TIMESTAMPTZ)
38865 fn ensure_cast_timestamptz(expr: Expression) -> Expression {
38866 use crate::expressions::{Cast, DataType, Literal};
38867 match expr {
38868 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Timestamp(_)) => {
38869 let Literal::Timestamp(s) = lit.as_ref() else {
38870 unreachable!()
38871 };
38872 Expression::Cast(Box::new(Cast {
38873 this: Expression::Literal(Box::new(Literal::String(s.clone()))),
38874 to: DataType::Timestamp {
38875 timezone: true,
38876 precision: None,
38877 },
38878 trailing_comments: vec![],
38879 double_colon_syntax: false,
38880 format: None,
38881 default: None,
38882 inferred_type: None,
38883 }))
38884 }
38885 Expression::Literal(ref lit) if matches!(lit.as_ref(), Literal::String(ref _s)) => {
38886 Expression::Cast(Box::new(Cast {
38887 this: expr,
38888 to: DataType::Timestamp {
38889 timezone: true,
38890 precision: None,
38891 },
38892 trailing_comments: vec![],
38893 double_colon_syntax: false,
38894 format: None,
38895 default: None,
38896 inferred_type: None,
38897 }))
38898 }
38899 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Datetime(_)) => {
38900 let Literal::Datetime(s) = lit.as_ref() else {
38901 unreachable!()
38902 };
38903 Expression::Cast(Box::new(Cast {
38904 this: Expression::Literal(Box::new(Literal::String(s.clone()))),
38905 to: DataType::Timestamp {
38906 timezone: true,
38907 precision: None,
38908 },
38909 trailing_comments: vec![],
38910 double_colon_syntax: false,
38911 format: None,
38912 default: None,
38913 inferred_type: None,
38914 }))
38915 }
38916 other => other,
38917 }
38918 }
38919
38920 /// Ensure expression is CAST to DATETIME (for BigQuery)
38921 fn ensure_cast_datetime(expr: Expression) -> Expression {
38922 use crate::expressions::{Cast, DataType, Literal};
38923 match expr {
38924 Expression::Literal(ref lit) if matches!(lit.as_ref(), Literal::String(ref _s)) => {
38925 Expression::Cast(Box::new(Cast {
38926 this: expr,
38927 to: DataType::Custom {
38928 name: "DATETIME".to_string(),
38929 },
38930 trailing_comments: vec![],
38931 double_colon_syntax: false,
38932 format: None,
38933 default: None,
38934 inferred_type: None,
38935 }))
38936 }
38937 other => other,
38938 }
38939 }
38940
38941 /// Force CAST expression to DATETIME (for BigQuery) - always wraps unless already DATETIME
38942 fn force_cast_datetime(expr: Expression) -> Expression {
38943 use crate::expressions::{Cast, DataType};
38944 if let Expression::Cast(ref c) = expr {
38945 if let DataType::Custom { ref name } = c.to {
38946 if name.eq_ignore_ascii_case("DATETIME") {
38947 return expr;
38948 }
38949 }
38950 }
38951 Expression::Cast(Box::new(Cast {
38952 this: expr,
38953 to: DataType::Custom {
38954 name: "DATETIME".to_string(),
38955 },
38956 trailing_comments: vec![],
38957 double_colon_syntax: false,
38958 format: None,
38959 default: None,
38960 inferred_type: None,
38961 }))
38962 }
38963
38964 /// Ensure expression is CAST to DATETIME2 (for TSQL)
38965 fn ensure_cast_datetime2(expr: Expression) -> Expression {
38966 use crate::expressions::{Cast, DataType, Literal};
38967 match expr {
38968 Expression::Literal(ref lit) if matches!(lit.as_ref(), Literal::String(ref _s)) => {
38969 Expression::Cast(Box::new(Cast {
38970 this: expr,
38971 to: DataType::Custom {
38972 name: "DATETIME2".to_string(),
38973 },
38974 trailing_comments: vec![],
38975 double_colon_syntax: false,
38976 format: None,
38977 default: None,
38978 inferred_type: None,
38979 }))
38980 }
38981 other => other,
38982 }
38983 }
38984
38985 /// Convert TIMESTAMP 'x' literal to CAST('x' AS TIMESTAMPTZ) for DuckDB
38986 fn ts_literal_to_cast_tz(expr: Expression) -> Expression {
38987 use crate::expressions::{Cast, DataType, Literal};
38988 match expr {
38989 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Timestamp(_)) => {
38990 let Literal::Timestamp(s) = lit.as_ref() else {
38991 unreachable!()
38992 };
38993 Expression::Cast(Box::new(Cast {
38994 this: Expression::Literal(Box::new(Literal::String(s.clone()))),
38995 to: DataType::Timestamp {
38996 timezone: true,
38997 precision: None,
38998 },
38999 trailing_comments: vec![],
39000 double_colon_syntax: false,
39001 format: None,
39002 default: None,
39003 inferred_type: None,
39004 }))
39005 }
39006 other => other,
39007 }
39008 }
39009
39010 /// Convert BigQuery format string to Snowflake format string
39011 fn bq_format_to_snowflake(format_expr: &Expression) -> Expression {
39012 use crate::expressions::Literal;
39013 if let Expression::Literal(lit) = format_expr {
39014 if let Literal::String(s) = lit.as_ref() {
39015 let sf = s
39016 .replace("%Y", "yyyy")
39017 .replace("%m", "mm")
39018 .replace("%d", "DD")
39019 .replace("%H", "HH24")
39020 .replace("%M", "MI")
39021 .replace("%S", "SS")
39022 .replace("%b", "mon")
39023 .replace("%B", "Month")
39024 .replace("%e", "FMDD");
39025 Expression::Literal(Box::new(Literal::String(sf)))
39026 } else {
39027 format_expr.clone()
39028 }
39029 } else {
39030 format_expr.clone()
39031 }
39032 }
39033
39034 /// Convert BigQuery format string to DuckDB format string
39035 fn bq_format_to_duckdb(format_expr: &Expression) -> Expression {
39036 use crate::expressions::Literal;
39037 if let Expression::Literal(lit) = format_expr {
39038 if let Literal::String(s) = lit.as_ref() {
39039 let duck = s
39040 .replace("%T", "%H:%M:%S")
39041 .replace("%F", "%Y-%m-%d")
39042 .replace("%D", "%m/%d/%y")
39043 .replace("%x", "%m/%d/%y")
39044 .replace("%c", "%a %b %-d %H:%M:%S %Y")
39045 .replace("%e", "%-d")
39046 .replace("%E6S", "%S.%f");
39047 Expression::Literal(Box::new(Literal::String(duck)))
39048 } else {
39049 format_expr.clone()
39050 }
39051 } else {
39052 format_expr.clone()
39053 }
39054 }
39055
39056 /// Convert BigQuery CAST FORMAT elements (like YYYY, MM, DD) to strftime (like %Y, %m, %d)
39057 fn bq_cast_format_to_strftime(format_expr: &Expression) -> Expression {
39058 use crate::expressions::Literal;
39059 if let Expression::Literal(lit) = format_expr {
39060 if let Literal::String(s) = lit.as_ref() {
39061 // Replace format elements from longest to shortest to avoid partial matches
39062 let result = s
39063 .replace("YYYYMMDD", "%Y%m%d")
39064 .replace("YYYY", "%Y")
39065 .replace("YY", "%y")
39066 .replace("MONTH", "%B")
39067 .replace("MON", "%b")
39068 .replace("MM", "%m")
39069 .replace("DD", "%d")
39070 .replace("HH24", "%H")
39071 .replace("HH12", "%I")
39072 .replace("HH", "%I")
39073 .replace("MI", "%M")
39074 .replace("SSTZH", "%S%z")
39075 .replace("SS", "%S")
39076 .replace("TZH", "%z");
39077 Expression::Literal(Box::new(Literal::String(result)))
39078 } else {
39079 format_expr.clone()
39080 }
39081 } else {
39082 format_expr.clone()
39083 }
39084 }
39085
39086 /// Normalize BigQuery format strings for BQ->BQ output
39087 fn bq_format_normalize_bq(format_expr: &Expression) -> Expression {
39088 use crate::expressions::Literal;
39089 if let Expression::Literal(lit) = format_expr {
39090 if let Literal::String(s) = lit.as_ref() {
39091 let norm = s.replace("%H:%M:%S", "%T").replace("%x", "%D");
39092 Expression::Literal(Box::new(Literal::String(norm)))
39093 } else {
39094 format_expr.clone()
39095 }
39096 } else {
39097 format_expr.clone()
39098 }
39099 }
39100}
39101
39102#[cfg(test)]
39103mod tests {
39104 use super::*;
39105
39106 #[test]
39107 fn test_dialect_type_from_str() {
39108 assert_eq!(
39109 "postgres".parse::<DialectType>().unwrap(),
39110 DialectType::PostgreSQL
39111 );
39112 assert_eq!(
39113 "postgresql".parse::<DialectType>().unwrap(),
39114 DialectType::PostgreSQL
39115 );
39116 assert_eq!("mysql".parse::<DialectType>().unwrap(), DialectType::MySQL);
39117 assert_eq!(
39118 "bigquery".parse::<DialectType>().unwrap(),
39119 DialectType::BigQuery
39120 );
39121 }
39122
39123 #[test]
39124 fn test_basic_transpile() {
39125 let dialect = Dialect::get(DialectType::Generic);
39126 let result = dialect
39127 .transpile("SELECT 1", DialectType::PostgreSQL)
39128 .unwrap();
39129 assert_eq!(result.len(), 1);
39130 assert_eq!(result[0], "SELECT 1");
39131 }
39132
39133 #[test]
39134 fn test_sqlite_double_quoted_column_defaults_to_postgres_strings() {
39135 let sqlite = Dialect::get(DialectType::SQLite);
39136 let result = sqlite
39137 .transpile(
39138 r#"CREATE TABLE "_collections" (
39139 "type" TEXT DEFAULT "base" NOT NULL,
39140 "fields" JSON DEFAULT "[]" NOT NULL,
39141 "options" JSON DEFAULT "{}" NOT NULL
39142 )"#,
39143 DialectType::PostgreSQL,
39144 )
39145 .unwrap();
39146
39147 assert!(result[0].contains(r#""type" TEXT DEFAULT 'base' NOT NULL"#));
39148 assert!(result[0].contains(r#""fields" JSON DEFAULT '[]' NOT NULL"#));
39149 assert!(result[0].contains(r#""options" JSON DEFAULT '{}' NOT NULL"#));
39150 }
39151
39152 #[test]
39153 fn test_sqlite_identity_preserves_double_quoted_column_defaults() {
39154 let sqlite = Dialect::get(DialectType::SQLite);
39155 let result = sqlite
39156 .transpile(
39157 r#"CREATE TABLE "_collections" ("type" TEXT DEFAULT "base" NOT NULL)"#,
39158 DialectType::SQLite,
39159 )
39160 .unwrap();
39161
39162 assert_eq!(
39163 result[0],
39164 r#"CREATE TABLE "_collections" ("type" TEXT DEFAULT "base" NOT NULL)"#
39165 );
39166 }
39167
39168 #[test]
39169 fn test_function_transformation_mysql() {
39170 // NVL should be transformed to IFNULL in MySQL
39171 let dialect = Dialect::get(DialectType::Generic);
39172 let result = dialect
39173 .transpile("SELECT NVL(a, b)", DialectType::MySQL)
39174 .unwrap();
39175 assert_eq!(result[0], "SELECT IFNULL(a, b)");
39176 }
39177
39178 #[test]
39179 fn test_get_path_duckdb() {
39180 // Test: step by step
39181 let snowflake = Dialect::get(DialectType::Snowflake);
39182
39183 // Step 1: Parse and check what Snowflake produces as intermediate
39184 let result_sf_sf = snowflake
39185 .transpile(
39186 "SELECT PARSE_JSON('{\"fruit\":\"banana\"}'):fruit",
39187 DialectType::Snowflake,
39188 )
39189 .unwrap();
39190 eprintln!("Snowflake->Snowflake colon: {}", result_sf_sf[0]);
39191
39192 // Step 2: DuckDB target
39193 let result_sf_dk = snowflake
39194 .transpile(
39195 "SELECT PARSE_JSON('{\"fruit\":\"banana\"}'):fruit",
39196 DialectType::DuckDB,
39197 )
39198 .unwrap();
39199 eprintln!("Snowflake->DuckDB colon: {}", result_sf_dk[0]);
39200
39201 // Step 3: GET_PATH directly
39202 let result_gp = snowflake
39203 .transpile(
39204 "SELECT GET_PATH(PARSE_JSON('{\"fruit\":\"banana\"}'), 'fruit')",
39205 DialectType::DuckDB,
39206 )
39207 .unwrap();
39208 eprintln!("Snowflake->DuckDB explicit GET_PATH: {}", result_gp[0]);
39209 }
39210
39211 #[test]
39212 fn test_function_transformation_postgres() {
39213 // IFNULL should be transformed to COALESCE in PostgreSQL
39214 let dialect = Dialect::get(DialectType::Generic);
39215 let result = dialect
39216 .transpile("SELECT IFNULL(a, b)", DialectType::PostgreSQL)
39217 .unwrap();
39218 assert_eq!(result[0], "SELECT COALESCE(a, b)");
39219
39220 // NVL should also be transformed to COALESCE
39221 let result = dialect
39222 .transpile("SELECT NVL(a, b)", DialectType::PostgreSQL)
39223 .unwrap();
39224 assert_eq!(result[0], "SELECT COALESCE(a, b)");
39225 }
39226
39227 #[test]
39228 fn test_hive_cast_to_trycast() {
39229 // Hive CAST should become TRY_CAST for targets that support it
39230 let hive = Dialect::get(DialectType::Hive);
39231 let result = hive
39232 .transpile("CAST(1 AS INT)", DialectType::DuckDB)
39233 .unwrap();
39234 assert_eq!(result[0], "TRY_CAST(1 AS INT)");
39235
39236 let result = hive
39237 .transpile("CAST(1 AS INT)", DialectType::Presto)
39238 .unwrap();
39239 assert_eq!(result[0], "TRY_CAST(1 AS INTEGER)");
39240 }
39241
39242 #[test]
39243 fn test_hive_array_identity() {
39244 // Hive ARRAY<DATE> should preserve angle bracket syntax
39245 let sql = "CREATE EXTERNAL TABLE `my_table` (`a7` ARRAY<DATE>) ROW FORMAT SERDE 'a' STORED AS INPUTFORMAT 'b' OUTPUTFORMAT 'c' LOCATION 'd' TBLPROPERTIES ('e'='f')";
39246 let hive = Dialect::get(DialectType::Hive);
39247
39248 // Test via transpile (this works)
39249 let result = hive.transpile(sql, DialectType::Hive).unwrap();
39250 eprintln!("Hive ARRAY via transpile: {}", result[0]);
39251 assert!(
39252 result[0].contains("ARRAY<DATE>"),
39253 "transpile: Expected ARRAY<DATE>, got: {}",
39254 result[0]
39255 );
39256
39257 // Test via parse -> transform -> generate (identity test path)
39258 let ast = hive.parse(sql).unwrap();
39259 let transformed = hive.transform(ast[0].clone()).unwrap();
39260 let output = hive.generate(&transformed).unwrap();
39261 eprintln!("Hive ARRAY via identity path: {}", output);
39262 assert!(
39263 output.contains("ARRAY<DATE>"),
39264 "identity path: Expected ARRAY<DATE>, got: {}",
39265 output
39266 );
39267 }
39268
39269 #[test]
39270 fn test_starrocks_delete_between_expansion() {
39271 // StarRocks doesn't support BETWEEN in DELETE statements
39272 let dialect = Dialect::get(DialectType::Generic);
39273
39274 // BETWEEN should be expanded to >= AND <= in DELETE
39275 let result = dialect
39276 .transpile(
39277 "DELETE FROM t WHERE a BETWEEN b AND c",
39278 DialectType::StarRocks,
39279 )
39280 .unwrap();
39281 assert_eq!(result[0], "DELETE FROM t WHERE a >= b AND a <= c");
39282
39283 // NOT BETWEEN should be expanded to < OR > in DELETE
39284 let result = dialect
39285 .transpile(
39286 "DELETE FROM t WHERE a NOT BETWEEN b AND c",
39287 DialectType::StarRocks,
39288 )
39289 .unwrap();
39290 assert_eq!(result[0], "DELETE FROM t WHERE a < b OR a > c");
39291
39292 // BETWEEN in SELECT should NOT be expanded (StarRocks supports it there)
39293 let result = dialect
39294 .transpile(
39295 "SELECT * FROM t WHERE a BETWEEN b AND c",
39296 DialectType::StarRocks,
39297 )
39298 .unwrap();
39299 assert!(
39300 result[0].contains("BETWEEN"),
39301 "BETWEEN should be preserved in SELECT"
39302 );
39303 }
39304
39305 #[test]
39306 fn test_snowflake_ltrim_rtrim_parse() {
39307 let sf = Dialect::get(DialectType::Snowflake);
39308 let sql = "SELECT LTRIM(RTRIM(col)) FROM t1";
39309 let result = sf.transpile(sql, DialectType::DuckDB);
39310 match &result {
39311 Ok(r) => eprintln!("LTRIM/RTRIM result: {}", r[0]),
39312 Err(e) => eprintln!("LTRIM/RTRIM error: {}", e),
39313 }
39314 assert!(
39315 result.is_ok(),
39316 "Expected successful parse of LTRIM(RTRIM(col)), got error: {:?}",
39317 result.err()
39318 );
39319 }
39320
39321 #[test]
39322 fn test_duckdb_count_if_parse() {
39323 let duck = Dialect::get(DialectType::DuckDB);
39324 let sql = "COUNT_IF(x)";
39325 let result = duck.transpile(sql, DialectType::DuckDB);
39326 match &result {
39327 Ok(r) => eprintln!("COUNT_IF result: {}", r[0]),
39328 Err(e) => eprintln!("COUNT_IF error: {}", e),
39329 }
39330 assert!(
39331 result.is_ok(),
39332 "Expected successful parse of COUNT_IF(x), got error: {:?}",
39333 result.err()
39334 );
39335 }
39336
39337 #[test]
39338 fn test_tsql_cast_tinyint_parse() {
39339 let tsql = Dialect::get(DialectType::TSQL);
39340 let sql = "CAST(X AS TINYINT)";
39341 let result = tsql.transpile(sql, DialectType::DuckDB);
39342 match &result {
39343 Ok(r) => eprintln!("TSQL CAST TINYINT result: {}", r[0]),
39344 Err(e) => eprintln!("TSQL CAST TINYINT error: {}", e),
39345 }
39346 assert!(
39347 result.is_ok(),
39348 "Expected successful transpile, got error: {:?}",
39349 result.err()
39350 );
39351 }
39352
39353 #[test]
39354 fn test_pg_hash_bitwise_xor() {
39355 let dialect = Dialect::get(DialectType::PostgreSQL);
39356 let result = dialect.transpile("x # y", DialectType::PostgreSQL).unwrap();
39357 assert_eq!(result[0], "x # y");
39358 }
39359
39360 #[test]
39361 fn test_pg_array_to_duckdb() {
39362 let dialect = Dialect::get(DialectType::PostgreSQL);
39363 let result = dialect
39364 .transpile("SELECT ARRAY[1, 2, 3] @> ARRAY[1, 2]", DialectType::DuckDB)
39365 .unwrap();
39366 assert_eq!(result[0], "SELECT [1, 2, 3] @> [1, 2]");
39367 }
39368
39369 #[test]
39370 fn test_array_remove_bigquery() {
39371 let dialect = Dialect::get(DialectType::Generic);
39372 let result = dialect
39373 .transpile("ARRAY_REMOVE(the_array, target)", DialectType::BigQuery)
39374 .unwrap();
39375 assert_eq!(
39376 result[0],
39377 "ARRAY(SELECT _u FROM UNNEST(the_array) AS _u WHERE _u <> target)"
39378 );
39379 }
39380
39381 #[test]
39382 fn test_map_clickhouse_case() {
39383 let dialect = Dialect::get(DialectType::Generic);
39384 let parsed = dialect
39385 .parse("CAST(MAP('a', '1') AS MAP(TEXT, TEXT))")
39386 .unwrap();
39387 eprintln!("MAP parsed: {:?}", parsed);
39388 let result = dialect
39389 .transpile(
39390 "CAST(MAP('a', '1') AS MAP(TEXT, TEXT))",
39391 DialectType::ClickHouse,
39392 )
39393 .unwrap();
39394 eprintln!("MAP result: {}", result[0]);
39395 }
39396
39397 #[test]
39398 fn test_generate_date_array_presto() {
39399 let dialect = Dialect::get(DialectType::Generic);
39400 let result = dialect.transpile(
39401 "SELECT * FROM UNNEST(GENERATE_DATE_ARRAY(DATE '2020-01-01', DATE '2020-02-01', INTERVAL 1 WEEK))",
39402 DialectType::Presto,
39403 ).unwrap();
39404 eprintln!("GDA -> Presto: {}", result[0]);
39405 assert_eq!(result[0], "SELECT * FROM UNNEST(SEQUENCE(CAST('2020-01-01' AS DATE), CAST('2020-02-01' AS DATE), (1 * INTERVAL '7' DAY)))");
39406 }
39407
39408 #[test]
39409 fn test_generate_date_array_postgres() {
39410 let dialect = Dialect::get(DialectType::Generic);
39411 let result = dialect.transpile(
39412 "SELECT * FROM UNNEST(GENERATE_DATE_ARRAY(DATE '2020-01-01', DATE '2020-02-01', INTERVAL 1 WEEK))",
39413 DialectType::PostgreSQL,
39414 ).unwrap();
39415 eprintln!("GDA -> PostgreSQL: {}", result[0]);
39416 }
39417
39418 #[test]
39419 fn test_generate_date_array_snowflake() {
39420 let dialect = Dialect::get(DialectType::Generic);
39421 let result = dialect
39422 .transpile(
39423 "SELECT * FROM UNNEST(GENERATE_DATE_ARRAY(DATE '2020-01-01', DATE '2020-02-01', INTERVAL 1 WEEK))",
39424 DialectType::Snowflake,
39425 )
39426 .unwrap();
39427 eprintln!("GDA -> Snowflake: {}", result[0]);
39428 }
39429
39430 #[test]
39431 fn test_array_length_generate_date_array_snowflake() {
39432 let dialect = Dialect::get(DialectType::Generic);
39433 let result = dialect.transpile(
39434 "SELECT ARRAY_LENGTH(GENERATE_DATE_ARRAY(DATE '2020-01-01', DATE '2020-02-01', INTERVAL 1 WEEK))",
39435 DialectType::Snowflake,
39436 ).unwrap();
39437 eprintln!("ARRAY_LENGTH(GDA) -> Snowflake: {}", result[0]);
39438 }
39439
39440 #[test]
39441 fn test_generate_date_array_mysql() {
39442 let dialect = Dialect::get(DialectType::Generic);
39443 let result = dialect.transpile(
39444 "SELECT * FROM UNNEST(GENERATE_DATE_ARRAY(DATE '2020-01-01', DATE '2020-02-01', INTERVAL 1 WEEK))",
39445 DialectType::MySQL,
39446 ).unwrap();
39447 eprintln!("GDA -> MySQL: {}", result[0]);
39448 }
39449
39450 #[test]
39451 fn test_generate_date_array_redshift() {
39452 let dialect = Dialect::get(DialectType::Generic);
39453 let result = dialect.transpile(
39454 "SELECT * FROM UNNEST(GENERATE_DATE_ARRAY(DATE '2020-01-01', DATE '2020-02-01', INTERVAL 1 WEEK))",
39455 DialectType::Redshift,
39456 ).unwrap();
39457 eprintln!("GDA -> Redshift: {}", result[0]);
39458 }
39459
39460 #[test]
39461 fn test_generate_date_array_tsql() {
39462 let dialect = Dialect::get(DialectType::Generic);
39463 let result = dialect.transpile(
39464 "SELECT * FROM UNNEST(GENERATE_DATE_ARRAY(DATE '2020-01-01', DATE '2020-02-01', INTERVAL 1 WEEK))",
39465 DialectType::TSQL,
39466 ).unwrap();
39467 eprintln!("GDA -> TSQL: {}", result[0]);
39468 }
39469
39470 #[test]
39471 fn test_struct_colon_syntax() {
39472 let dialect = Dialect::get(DialectType::Generic);
39473 // Test without colon first
39474 let result = dialect.transpile(
39475 "CAST((1, 2, 3, 4) AS STRUCT<a TINYINT, b SMALLINT, c INT, d BIGINT>)",
39476 DialectType::ClickHouse,
39477 );
39478 match result {
39479 Ok(r) => eprintln!("STRUCT no colon -> ClickHouse: {}", r[0]),
39480 Err(e) => eprintln!("STRUCT no colon error: {}", e),
39481 }
39482 // Now test with colon
39483 let result = dialect.transpile(
39484 "CAST((1, 2, 3, 4) AS STRUCT<a: TINYINT, b: SMALLINT, c: INT, d: BIGINT>)",
39485 DialectType::ClickHouse,
39486 );
39487 match result {
39488 Ok(r) => eprintln!("STRUCT colon -> ClickHouse: {}", r[0]),
39489 Err(e) => eprintln!("STRUCT colon error: {}", e),
39490 }
39491 }
39492
39493 #[test]
39494 fn test_generate_date_array_cte_wrapped_mysql() {
39495 let dialect = Dialect::get(DialectType::Generic);
39496 let result = dialect.transpile(
39497 "WITH dates AS (SELECT * FROM UNNEST(GENERATE_DATE_ARRAY(DATE '2020-01-01', DATE '2020-02-01', INTERVAL 1 WEEK))) SELECT * FROM dates",
39498 DialectType::MySQL,
39499 ).unwrap();
39500 eprintln!("GDA CTE -> MySQL: {}", result[0]);
39501 }
39502
39503 #[test]
39504 fn test_generate_date_array_cte_wrapped_tsql() {
39505 let dialect = Dialect::get(DialectType::Generic);
39506 let result = dialect.transpile(
39507 "WITH dates AS (SELECT * FROM UNNEST(GENERATE_DATE_ARRAY(DATE '2020-01-01', DATE '2020-02-01', INTERVAL 1 WEEK))) SELECT * FROM dates",
39508 DialectType::TSQL,
39509 ).unwrap();
39510 eprintln!("GDA CTE -> TSQL: {}", result[0]);
39511 }
39512
39513 #[test]
39514 fn test_decode_literal_no_null_check() {
39515 // Oracle DECODE with all literals should produce simple equality, no IS NULL
39516 let dialect = Dialect::get(DialectType::Oracle);
39517 let result = dialect
39518 .transpile("SELECT decode(1,2,3,4)", DialectType::DuckDB)
39519 .unwrap();
39520 assert_eq!(
39521 result[0], "SELECT CASE WHEN 1 = 2 THEN 3 ELSE 4 END",
39522 "Literal DECODE should not have IS NULL checks"
39523 );
39524 }
39525
39526 #[test]
39527 fn test_decode_column_vs_literal_no_null_check() {
39528 // Oracle DECODE with column vs literal should use simple equality (like sqlglot)
39529 let dialect = Dialect::get(DialectType::Oracle);
39530 let result = dialect
39531 .transpile("SELECT decode(col, 2, 3, 4) FROM t", DialectType::DuckDB)
39532 .unwrap();
39533 assert_eq!(
39534 result[0], "SELECT CASE WHEN col = 2 THEN 3 ELSE 4 END FROM t",
39535 "Column vs literal DECODE should not have IS NULL checks"
39536 );
39537 }
39538
39539 #[test]
39540 fn test_decode_column_vs_column_keeps_null_check() {
39541 // Oracle DECODE with column vs column should keep null-safe comparison
39542 let dialect = Dialect::get(DialectType::Oracle);
39543 let result = dialect
39544 .transpile("SELECT decode(col, col2, 3, 4) FROM t", DialectType::DuckDB)
39545 .unwrap();
39546 assert!(
39547 result[0].contains("IS NULL"),
39548 "Column vs column DECODE should have IS NULL checks, got: {}",
39549 result[0]
39550 );
39551 }
39552
39553 #[test]
39554 fn test_decode_null_search() {
39555 // Oracle DECODE with NULL search should use IS NULL
39556 let dialect = Dialect::get(DialectType::Oracle);
39557 let result = dialect
39558 .transpile("SELECT decode(col, NULL, 3, 4) FROM t", DialectType::DuckDB)
39559 .unwrap();
39560 assert_eq!(
39561 result[0],
39562 "SELECT CASE WHEN col IS NULL THEN 3 ELSE 4 END FROM t",
39563 );
39564 }
39565
39566 // =========================================================================
39567 // REGEXP function transpilation tests
39568 // =========================================================================
39569
39570 #[test]
39571 fn test_regexp_substr_snowflake_to_duckdb_2arg() {
39572 let dialect = Dialect::get(DialectType::Snowflake);
39573 let result = dialect
39574 .transpile("SELECT REGEXP_SUBSTR(s, 'pattern')", DialectType::DuckDB)
39575 .unwrap();
39576 assert_eq!(result[0], "SELECT REGEXP_EXTRACT(s, 'pattern')");
39577 }
39578
39579 #[test]
39580 fn test_regexp_substr_snowflake_to_duckdb_3arg_pos1() {
39581 let dialect = Dialect::get(DialectType::Snowflake);
39582 let result = dialect
39583 .transpile("SELECT REGEXP_SUBSTR(s, 'pattern', 1)", DialectType::DuckDB)
39584 .unwrap();
39585 assert_eq!(result[0], "SELECT REGEXP_EXTRACT(s, 'pattern')");
39586 }
39587
39588 #[test]
39589 fn test_regexp_substr_snowflake_to_duckdb_3arg_pos_gt1() {
39590 let dialect = Dialect::get(DialectType::Snowflake);
39591 let result = dialect
39592 .transpile("SELECT REGEXP_SUBSTR(s, 'pattern', 3)", DialectType::DuckDB)
39593 .unwrap();
39594 assert_eq!(
39595 result[0],
39596 "SELECT REGEXP_EXTRACT(NULLIF(SUBSTRING(s, 3), ''), 'pattern')"
39597 );
39598 }
39599
39600 #[test]
39601 fn test_regexp_substr_snowflake_to_duckdb_4arg_occ_gt1() {
39602 let dialect = Dialect::get(DialectType::Snowflake);
39603 let result = dialect
39604 .transpile(
39605 "SELECT REGEXP_SUBSTR(s, 'pattern', 1, 3)",
39606 DialectType::DuckDB,
39607 )
39608 .unwrap();
39609 assert_eq!(
39610 result[0],
39611 "SELECT ARRAY_EXTRACT(REGEXP_EXTRACT_ALL(s, 'pattern'), 3)"
39612 );
39613 }
39614
39615 #[test]
39616 fn test_regexp_substr_snowflake_to_duckdb_5arg_e_flag() {
39617 let dialect = Dialect::get(DialectType::Snowflake);
39618 let result = dialect
39619 .transpile(
39620 "SELECT REGEXP_SUBSTR(s, 'pattern', 1, 1, 'e')",
39621 DialectType::DuckDB,
39622 )
39623 .unwrap();
39624 assert_eq!(result[0], "SELECT REGEXP_EXTRACT(s, 'pattern')");
39625 }
39626
39627 #[test]
39628 fn test_regexp_substr_snowflake_to_duckdb_6arg_group0() {
39629 let dialect = Dialect::get(DialectType::Snowflake);
39630 let result = dialect
39631 .transpile(
39632 "SELECT REGEXP_SUBSTR(s, 'pattern', 1, 1, 'e', 0)",
39633 DialectType::DuckDB,
39634 )
39635 .unwrap();
39636 assert_eq!(result[0], "SELECT REGEXP_EXTRACT(s, 'pattern')");
39637 }
39638
39639 #[test]
39640 fn test_regexp_substr_snowflake_identity_strip_group0() {
39641 let dialect = Dialect::get(DialectType::Snowflake);
39642 let result = dialect
39643 .transpile(
39644 "SELECT REGEXP_SUBSTR(s, 'pattern', 1, 1, 'e', 0)",
39645 DialectType::Snowflake,
39646 )
39647 .unwrap();
39648 assert_eq!(result[0], "SELECT REGEXP_SUBSTR(s, 'pattern', 1, 1, 'e')");
39649 }
39650
39651 #[test]
39652 fn test_regexp_substr_all_snowflake_to_duckdb_2arg() {
39653 let dialect = Dialect::get(DialectType::Snowflake);
39654 let result = dialect
39655 .transpile(
39656 "SELECT REGEXP_SUBSTR_ALL(s, 'pattern')",
39657 DialectType::DuckDB,
39658 )
39659 .unwrap();
39660 assert_eq!(result[0], "SELECT REGEXP_EXTRACT_ALL(s, 'pattern')");
39661 }
39662
39663 #[test]
39664 fn test_regexp_substr_all_snowflake_to_duckdb_3arg_pos_gt1() {
39665 let dialect = Dialect::get(DialectType::Snowflake);
39666 let result = dialect
39667 .transpile(
39668 "SELECT REGEXP_SUBSTR_ALL(s, 'pattern', 3)",
39669 DialectType::DuckDB,
39670 )
39671 .unwrap();
39672 assert_eq!(
39673 result[0],
39674 "SELECT REGEXP_EXTRACT_ALL(SUBSTRING(s, 3), 'pattern')"
39675 );
39676 }
39677
39678 #[test]
39679 fn test_regexp_substr_all_snowflake_to_duckdb_5arg_e_flag() {
39680 let dialect = Dialect::get(DialectType::Snowflake);
39681 let result = dialect
39682 .transpile(
39683 "SELECT REGEXP_SUBSTR_ALL(s, 'pattern', 1, 1, 'e')",
39684 DialectType::DuckDB,
39685 )
39686 .unwrap();
39687 assert_eq!(result[0], "SELECT REGEXP_EXTRACT_ALL(s, 'pattern')");
39688 }
39689
39690 #[test]
39691 fn test_regexp_substr_all_snowflake_to_duckdb_6arg_group0() {
39692 let dialect = Dialect::get(DialectType::Snowflake);
39693 let result = dialect
39694 .transpile(
39695 "SELECT REGEXP_SUBSTR_ALL(s, 'pattern', 1, 1, 'e', 0)",
39696 DialectType::DuckDB,
39697 )
39698 .unwrap();
39699 assert_eq!(result[0], "SELECT REGEXP_EXTRACT_ALL(s, 'pattern')");
39700 }
39701
39702 #[test]
39703 fn test_regexp_substr_all_snowflake_identity_strip_group0() {
39704 let dialect = Dialect::get(DialectType::Snowflake);
39705 let result = dialect
39706 .transpile(
39707 "SELECT REGEXP_SUBSTR_ALL(s, 'pattern', 1, 1, 'e', 0)",
39708 DialectType::Snowflake,
39709 )
39710 .unwrap();
39711 assert_eq!(
39712 result[0],
39713 "SELECT REGEXP_SUBSTR_ALL(s, 'pattern', 1, 1, 'e')"
39714 );
39715 }
39716
39717 #[test]
39718 fn test_regexp_count_snowflake_to_duckdb_2arg() {
39719 let dialect = Dialect::get(DialectType::Snowflake);
39720 let result = dialect
39721 .transpile("SELECT REGEXP_COUNT(s, 'pattern')", DialectType::DuckDB)
39722 .unwrap();
39723 assert_eq!(
39724 result[0],
39725 "SELECT CASE WHEN 'pattern' = '' THEN 0 ELSE LENGTH(REGEXP_EXTRACT_ALL(s, 'pattern')) END"
39726 );
39727 }
39728
39729 #[test]
39730 fn test_regexp_count_snowflake_to_duckdb_3arg() {
39731 let dialect = Dialect::get(DialectType::Snowflake);
39732 let result = dialect
39733 .transpile("SELECT REGEXP_COUNT(s, 'pattern', 3)", DialectType::DuckDB)
39734 .unwrap();
39735 assert_eq!(
39736 result[0],
39737 "SELECT CASE WHEN 'pattern' = '' THEN 0 ELSE LENGTH(REGEXP_EXTRACT_ALL(SUBSTRING(s, 3), 'pattern')) END"
39738 );
39739 }
39740
39741 #[test]
39742 fn test_regexp_count_snowflake_to_duckdb_4arg_flags() {
39743 let dialect = Dialect::get(DialectType::Snowflake);
39744 let result = dialect
39745 .transpile(
39746 "SELECT REGEXP_COUNT(s, 'pattern', 1, 'i')",
39747 DialectType::DuckDB,
39748 )
39749 .unwrap();
39750 assert_eq!(
39751 result[0],
39752 "SELECT CASE WHEN '(?i)' || 'pattern' = '' THEN 0 ELSE LENGTH(REGEXP_EXTRACT_ALL(SUBSTRING(s, 1), '(?i)' || 'pattern')) END"
39753 );
39754 }
39755
39756 #[test]
39757 fn test_regexp_count_snowflake_to_duckdb_4arg_flags_literal_string() {
39758 let dialect = Dialect::get(DialectType::Snowflake);
39759 let result = dialect
39760 .transpile(
39761 "SELECT REGEXP_COUNT('Hello World', 'L', 1, 'im')",
39762 DialectType::DuckDB,
39763 )
39764 .unwrap();
39765 assert_eq!(
39766 result[0],
39767 "SELECT CASE WHEN '(?im)' || 'L' = '' THEN 0 ELSE LENGTH(REGEXP_EXTRACT_ALL(SUBSTRING('Hello World', 1), '(?im)' || 'L')) END"
39768 );
39769 }
39770
39771 #[test]
39772 fn test_regexp_replace_snowflake_to_duckdb_5arg_pos1_occ1() {
39773 let dialect = Dialect::get(DialectType::Snowflake);
39774 let result = dialect
39775 .transpile(
39776 "SELECT REGEXP_REPLACE(s, 'pattern', 'repl', 1, 1)",
39777 DialectType::DuckDB,
39778 )
39779 .unwrap();
39780 assert_eq!(result[0], "SELECT REGEXP_REPLACE(s, 'pattern', 'repl')");
39781 }
39782
39783 #[test]
39784 fn test_regexp_replace_snowflake_to_duckdb_5arg_pos_gt1_occ0() {
39785 let dialect = Dialect::get(DialectType::Snowflake);
39786 let result = dialect
39787 .transpile(
39788 "SELECT REGEXP_REPLACE(s, 'pattern', 'repl', 3, 0)",
39789 DialectType::DuckDB,
39790 )
39791 .unwrap();
39792 assert_eq!(
39793 result[0],
39794 "SELECT SUBSTRING(s, 1, 2) || REGEXP_REPLACE(SUBSTRING(s, 3), 'pattern', 'repl', 'g')"
39795 );
39796 }
39797
39798 #[test]
39799 fn test_regexp_replace_snowflake_to_duckdb_5arg_pos_gt1_occ1() {
39800 let dialect = Dialect::get(DialectType::Snowflake);
39801 let result = dialect
39802 .transpile(
39803 "SELECT REGEXP_REPLACE(s, 'pattern', 'repl', 3, 1)",
39804 DialectType::DuckDB,
39805 )
39806 .unwrap();
39807 assert_eq!(
39808 result[0],
39809 "SELECT SUBSTRING(s, 1, 2) || REGEXP_REPLACE(SUBSTRING(s, 3), 'pattern', 'repl')"
39810 );
39811 }
39812
39813 #[test]
39814 fn test_rlike_snowflake_to_duckdb_2arg() {
39815 let dialect = Dialect::get(DialectType::Snowflake);
39816 let result = dialect
39817 .transpile("SELECT RLIKE(a, b)", DialectType::DuckDB)
39818 .unwrap();
39819 assert_eq!(result[0], "SELECT REGEXP_FULL_MATCH(a, b)");
39820 }
39821
39822 #[test]
39823 fn test_rlike_snowflake_to_duckdb_3arg_flags() {
39824 let dialect = Dialect::get(DialectType::Snowflake);
39825 let result = dialect
39826 .transpile("SELECT RLIKE(a, b, 'i')", DialectType::DuckDB)
39827 .unwrap();
39828 assert_eq!(result[0], "SELECT REGEXP_FULL_MATCH(a, b, 'i')");
39829 }
39830
39831 #[test]
39832 fn test_regexp_extract_all_bigquery_to_snowflake_no_capture() {
39833 let dialect = Dialect::get(DialectType::BigQuery);
39834 let result = dialect
39835 .transpile(
39836 "SELECT REGEXP_EXTRACT_ALL(s, 'pattern')",
39837 DialectType::Snowflake,
39838 )
39839 .unwrap();
39840 assert_eq!(result[0], "SELECT REGEXP_SUBSTR_ALL(s, 'pattern')");
39841 }
39842
39843 #[test]
39844 fn test_regexp_extract_all_bigquery_to_snowflake_with_capture() {
39845 let dialect = Dialect::get(DialectType::BigQuery);
39846 let result = dialect
39847 .transpile(
39848 "SELECT REGEXP_EXTRACT_ALL(s, '(a)[0-9]')",
39849 DialectType::Snowflake,
39850 )
39851 .unwrap();
39852 assert_eq!(
39853 result[0],
39854 "SELECT REGEXP_SUBSTR_ALL(s, '(a)[0-9]', 1, 1, 'c', 1)"
39855 );
39856 }
39857
39858 #[test]
39859 fn test_regexp_instr_snowflake_to_duckdb_2arg() {
39860 let dialect = Dialect::get(DialectType::Snowflake);
39861 let result = dialect
39862 .transpile("SELECT REGEXP_INSTR(s, 'pattern')", DialectType::DuckDB)
39863 .unwrap();
39864 assert!(
39865 result[0].contains("CASE WHEN"),
39866 "Expected CASE WHEN in result: {}",
39867 result[0]
39868 );
39869 assert!(
39870 result[0].contains("LIST_SUM"),
39871 "Expected LIST_SUM in result: {}",
39872 result[0]
39873 );
39874 }
39875
39876 #[test]
39877 fn test_array_except_generic_to_duckdb() {
39878 let dialect = Dialect::get(DialectType::Generic);
39879 let result = dialect
39880 .transpile(
39881 "SELECT ARRAY_EXCEPT(ARRAY(1, 2, 3), ARRAY(2))",
39882 DialectType::DuckDB,
39883 )
39884 .unwrap();
39885 eprintln!("ARRAY_EXCEPT Generic->DuckDB: {}", result[0]);
39886 assert!(
39887 result[0].contains("CASE WHEN"),
39888 "Expected CASE WHEN: {}",
39889 result[0]
39890 );
39891 assert!(
39892 result[0].contains("LIST_FILTER"),
39893 "Expected LIST_FILTER: {}",
39894 result[0]
39895 );
39896 assert!(
39897 result[0].contains("LIST_DISTINCT"),
39898 "Expected LIST_DISTINCT: {}",
39899 result[0]
39900 );
39901 assert!(
39902 result[0].contains("IS NOT DISTINCT FROM"),
39903 "Expected IS NOT DISTINCT FROM: {}",
39904 result[0]
39905 );
39906 assert!(
39907 result[0].contains("= 0"),
39908 "Expected = 0 filter: {}",
39909 result[0]
39910 );
39911 }
39912
39913 #[test]
39914 fn test_array_except_generic_to_snowflake() {
39915 let dialect = Dialect::get(DialectType::Generic);
39916 let result = dialect
39917 .transpile(
39918 "SELECT ARRAY_EXCEPT(ARRAY(1, 2, 3), ARRAY(2))",
39919 DialectType::Snowflake,
39920 )
39921 .unwrap();
39922 eprintln!("ARRAY_EXCEPT Generic->Snowflake: {}", result[0]);
39923 assert_eq!(result[0], "SELECT ARRAY_EXCEPT([1, 2, 3], [2])");
39924 }
39925
39926 #[test]
39927 fn test_array_except_generic_to_presto() {
39928 let dialect = Dialect::get(DialectType::Generic);
39929 let result = dialect
39930 .transpile(
39931 "SELECT ARRAY_EXCEPT(ARRAY(1, 2, 3), ARRAY(2))",
39932 DialectType::Presto,
39933 )
39934 .unwrap();
39935 eprintln!("ARRAY_EXCEPT Generic->Presto: {}", result[0]);
39936 assert_eq!(result[0], "SELECT ARRAY_EXCEPT(ARRAY[1, 2, 3], ARRAY[2])");
39937 }
39938
39939 #[test]
39940 fn test_array_except_snowflake_to_duckdb() {
39941 let dialect = Dialect::get(DialectType::Snowflake);
39942 let result = dialect
39943 .transpile("SELECT ARRAY_EXCEPT([1, 2, 3], [2])", DialectType::DuckDB)
39944 .unwrap();
39945 eprintln!("ARRAY_EXCEPT Snowflake->DuckDB: {}", result[0]);
39946 assert!(
39947 result[0].contains("CASE WHEN"),
39948 "Expected CASE WHEN: {}",
39949 result[0]
39950 );
39951 assert!(
39952 result[0].contains("LIST_TRANSFORM"),
39953 "Expected LIST_TRANSFORM: {}",
39954 result[0]
39955 );
39956 }
39957
39958 #[test]
39959 fn test_array_contains_snowflake_to_snowflake() {
39960 let dialect = Dialect::get(DialectType::Snowflake);
39961 let result = dialect
39962 .transpile(
39963 "SELECT ARRAY_CONTAINS(x, [1, NULL, 3])",
39964 DialectType::Snowflake,
39965 )
39966 .unwrap();
39967 eprintln!("ARRAY_CONTAINS Snowflake->Snowflake: {}", result[0]);
39968 assert_eq!(result[0], "SELECT ARRAY_CONTAINS(x, [1, NULL, 3])");
39969 }
39970
39971 #[test]
39972 fn test_array_contains_snowflake_to_duckdb() {
39973 let dialect = Dialect::get(DialectType::Snowflake);
39974 let result = dialect
39975 .transpile(
39976 "SELECT ARRAY_CONTAINS(x, [1, NULL, 3])",
39977 DialectType::DuckDB,
39978 )
39979 .unwrap();
39980 eprintln!("ARRAY_CONTAINS Snowflake->DuckDB: {}", result[0]);
39981 assert!(
39982 result[0].contains("CASE WHEN"),
39983 "Expected CASE WHEN: {}",
39984 result[0]
39985 );
39986 assert!(
39987 result[0].contains("NULLIF"),
39988 "Expected NULLIF: {}",
39989 result[0]
39990 );
39991 assert!(
39992 result[0].contains("ARRAY_CONTAINS"),
39993 "Expected ARRAY_CONTAINS: {}",
39994 result[0]
39995 );
39996 }
39997
39998 #[test]
39999 fn test_array_distinct_snowflake_to_duckdb() {
40000 let dialect = Dialect::get(DialectType::Snowflake);
40001 let result = dialect
40002 .transpile(
40003 "SELECT ARRAY_DISTINCT([1, 2, 2, 3, 1])",
40004 DialectType::DuckDB,
40005 )
40006 .unwrap();
40007 eprintln!("ARRAY_DISTINCT Snowflake->DuckDB: {}", result[0]);
40008 assert!(
40009 result[0].contains("CASE WHEN"),
40010 "Expected CASE WHEN: {}",
40011 result[0]
40012 );
40013 assert!(
40014 result[0].contains("LIST_DISTINCT"),
40015 "Expected LIST_DISTINCT: {}",
40016 result[0]
40017 );
40018 assert!(
40019 result[0].contains("LIST_APPEND"),
40020 "Expected LIST_APPEND: {}",
40021 result[0]
40022 );
40023 assert!(
40024 result[0].contains("LIST_FILTER"),
40025 "Expected LIST_FILTER: {}",
40026 result[0]
40027 );
40028 }
40029}