polyglot_sql/dialects/mod.rs
1//! SQL Dialect System
2//!
3//! This module implements the dialect abstraction layer that enables SQL transpilation
4//! between 30+ database engines. Each dialect encapsulates three concerns:
5//!
6//! - **Tokenization**: Dialect-specific lexing rules (e.g., BigQuery uses backtick quoting,
7//! MySQL uses backtick for identifiers, TSQL uses square brackets).
8//! - **Generation**: How AST nodes are rendered back to SQL text, including identifier quoting
9//! style, function name casing, and syntax variations.
10//! - **Transformation**: AST-level rewrites that convert dialect-specific constructs to/from
11//! a normalized form (e.g., Snowflake `SQUARE(x)` becomes `POWER(x, 2)`).
12//!
13//! The primary entry point is [`Dialect::get`], which returns a configured [`Dialect`] instance
14//! for a given [`DialectType`]. From there, callers can [`parse`](Dialect::parse),
15//! [`generate`](Dialect::generate), [`transform`](Dialect::transform), or
16//! [`transpile`](Dialect::transpile) to another dialect in a single call.
17//!
18//! Each concrete dialect (e.g., `PostgresDialect`, `BigQueryDialect`) implements the
19//! [`DialectImpl`] trait, which provides configuration hooks and expression-level transforms.
20//! Dialect modules live in submodules of this module and are re-exported here.
21
22mod generic; // Always compiled
23
24#[cfg(feature = "dialect-athena")]
25mod athena;
26#[cfg(feature = "dialect-bigquery")]
27mod bigquery;
28#[cfg(feature = "dialect-clickhouse")]
29mod clickhouse;
30#[cfg(feature = "dialect-cockroachdb")]
31mod cockroachdb;
32#[cfg(feature = "dialect-databricks")]
33mod databricks;
34#[cfg(feature = "dialect-datafusion")]
35mod datafusion;
36#[cfg(feature = "dialect-doris")]
37mod doris;
38#[cfg(feature = "dialect-dremio")]
39mod dremio;
40#[cfg(feature = "dialect-drill")]
41mod drill;
42#[cfg(feature = "dialect-druid")]
43mod druid;
44#[cfg(feature = "dialect-duckdb")]
45mod duckdb;
46#[cfg(feature = "dialect-dune")]
47mod dune;
48#[cfg(feature = "dialect-exasol")]
49mod exasol;
50#[cfg(feature = "dialect-fabric")]
51mod fabric;
52#[cfg(feature = "dialect-hive")]
53mod hive;
54#[cfg(feature = "dialect-materialize")]
55mod materialize;
56#[cfg(feature = "dialect-mysql")]
57mod mysql;
58#[cfg(feature = "dialect-oracle")]
59mod oracle;
60#[cfg(feature = "dialect-postgresql")]
61mod postgres;
62#[cfg(feature = "dialect-presto")]
63mod presto;
64#[cfg(feature = "dialect-redshift")]
65mod redshift;
66#[cfg(feature = "dialect-risingwave")]
67mod risingwave;
68#[cfg(feature = "dialect-singlestore")]
69mod singlestore;
70#[cfg(feature = "dialect-snowflake")]
71mod snowflake;
72#[cfg(feature = "dialect-solr")]
73mod solr;
74#[cfg(feature = "dialect-spark")]
75mod spark;
76#[cfg(feature = "dialect-sqlite")]
77mod sqlite;
78#[cfg(feature = "dialect-starrocks")]
79mod starrocks;
80#[cfg(feature = "dialect-tableau")]
81mod tableau;
82#[cfg(feature = "dialect-teradata")]
83mod teradata;
84#[cfg(feature = "dialect-tidb")]
85mod tidb;
86#[cfg(feature = "dialect-trino")]
87mod trino;
88#[cfg(feature = "dialect-tsql")]
89mod tsql;
90
91pub use generic::GenericDialect; // Always available
92
93#[cfg(feature = "dialect-athena")]
94pub use athena::AthenaDialect;
95#[cfg(feature = "dialect-bigquery")]
96pub use bigquery::BigQueryDialect;
97#[cfg(feature = "dialect-clickhouse")]
98pub use clickhouse::ClickHouseDialect;
99#[cfg(feature = "dialect-cockroachdb")]
100pub use cockroachdb::CockroachDBDialect;
101#[cfg(feature = "dialect-databricks")]
102pub use databricks::DatabricksDialect;
103#[cfg(feature = "dialect-datafusion")]
104pub use datafusion::DataFusionDialect;
105#[cfg(feature = "dialect-doris")]
106pub use doris::DorisDialect;
107#[cfg(feature = "dialect-dremio")]
108pub use dremio::DremioDialect;
109#[cfg(feature = "dialect-drill")]
110pub use drill::DrillDialect;
111#[cfg(feature = "dialect-druid")]
112pub use druid::DruidDialect;
113#[cfg(feature = "dialect-duckdb")]
114pub use duckdb::DuckDBDialect;
115#[cfg(feature = "dialect-dune")]
116pub use dune::DuneDialect;
117#[cfg(feature = "dialect-exasol")]
118pub use exasol::ExasolDialect;
119#[cfg(feature = "dialect-fabric")]
120pub use fabric::FabricDialect;
121#[cfg(feature = "dialect-hive")]
122pub use hive::HiveDialect;
123#[cfg(feature = "dialect-materialize")]
124pub use materialize::MaterializeDialect;
125#[cfg(feature = "dialect-mysql")]
126pub use mysql::MySQLDialect;
127#[cfg(feature = "dialect-oracle")]
128pub use oracle::OracleDialect;
129#[cfg(feature = "dialect-postgresql")]
130pub use postgres::PostgresDialect;
131#[cfg(feature = "dialect-presto")]
132pub use presto::PrestoDialect;
133#[cfg(feature = "dialect-redshift")]
134pub use redshift::RedshiftDialect;
135#[cfg(feature = "dialect-risingwave")]
136pub use risingwave::RisingWaveDialect;
137#[cfg(feature = "dialect-singlestore")]
138pub use singlestore::SingleStoreDialect;
139#[cfg(feature = "dialect-snowflake")]
140pub use snowflake::SnowflakeDialect;
141#[cfg(feature = "dialect-solr")]
142pub use solr::SolrDialect;
143#[cfg(feature = "dialect-spark")]
144pub use spark::SparkDialect;
145#[cfg(feature = "dialect-sqlite")]
146pub use sqlite::SQLiteDialect;
147#[cfg(feature = "dialect-starrocks")]
148pub use starrocks::StarRocksDialect;
149#[cfg(feature = "dialect-tableau")]
150pub use tableau::TableauDialect;
151#[cfg(feature = "dialect-teradata")]
152pub use teradata::TeradataDialect;
153#[cfg(feature = "dialect-tidb")]
154pub use tidb::TiDBDialect;
155#[cfg(feature = "dialect-trino")]
156pub use trino::TrinoDialect;
157#[cfg(feature = "dialect-tsql")]
158pub use tsql::TSQLDialect;
159
160use crate::error::Result;
161#[cfg(feature = "transpile")]
162use crate::expressions::{ColumnConstraint, Function, Identifier, Literal};
163use crate::expressions::{DataType, Expression};
164#[cfg(any(
165 feature = "transpile",
166 feature = "ast-tools",
167 feature = "generate",
168 feature = "semantic"
169))]
170use crate::expressions::{From, FunctionBody, Join, Null, OrderBy, OutputClause, TableRef, With};
171#[cfg(feature = "transpile")]
172use crate::generator::UnsupportedLevel;
173#[cfg(feature = "generate")]
174use crate::generator::{Generator, GeneratorConfig};
175use crate::parser::Parser;
176#[cfg(feature = "transpile")]
177use crate::tokens::TokenType;
178use crate::tokens::{Token, Tokenizer, TokenizerConfig};
179#[cfg(feature = "transpile")]
180use crate::traversal::ExpressionWalk;
181use serde::{Deserialize, Serialize};
182use std::collections::HashMap;
183use std::sync::{Arc, LazyLock, RwLock};
184
185/// Enumeration of all supported SQL dialects.
186///
187/// Each variant corresponds to a specific SQL database engine or query language.
188/// The `Generic` variant represents standard SQL with no dialect-specific behavior,
189/// and is used as the default when no dialect is specified.
190///
191/// Dialect names are case-insensitive when parsed from strings via [`FromStr`].
192/// Some dialects accept aliases (e.g., "mssql" and "sqlserver" both resolve to [`TSQL`](DialectType::TSQL)).
193#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
194#[serde(rename_all = "lowercase")]
195pub enum DialectType {
196 /// Standard SQL with no dialect-specific behavior (default).
197 Generic,
198 /// PostgreSQL -- advanced open-source relational database.
199 PostgreSQL,
200 /// MySQL -- widely-used open-source relational database (also accepts "mysql").
201 MySQL,
202 /// Google BigQuery -- serverless cloud data warehouse with unique syntax (backtick quoting, STRUCT types, QUALIFY).
203 BigQuery,
204 /// Snowflake -- cloud data platform with QUALIFY clause, FLATTEN, and variant types.
205 Snowflake,
206 /// DuckDB -- in-process analytical database with modern SQL extensions.
207 DuckDB,
208 /// SQLite -- lightweight embedded relational database.
209 SQLite,
210 /// Apache Hive -- data warehouse on Hadoop with HiveQL syntax.
211 Hive,
212 /// Apache Spark SQL -- distributed query engine (also accepts "spark2").
213 Spark,
214 /// Trino -- distributed SQL query engine (formerly PrestoSQL).
215 Trino,
216 /// PrestoDB -- distributed SQL query engine for big data.
217 Presto,
218 /// Amazon Redshift -- cloud data warehouse based on PostgreSQL.
219 Redshift,
220 /// Transact-SQL (T-SQL) -- Microsoft SQL Server and Azure SQL (also accepts "mssql", "sqlserver").
221 TSQL,
222 /// Oracle Database -- commercial relational database with PL/SQL extensions.
223 Oracle,
224 /// ClickHouse -- column-oriented OLAP database for real-time analytics.
225 ClickHouse,
226 /// Databricks SQL -- Spark-based lakehouse platform with QUALIFY support.
227 Databricks,
228 /// Amazon Athena -- serverless query service (hybrid Trino/Hive engine).
229 Athena,
230 /// Teradata -- enterprise data warehouse with proprietary SQL extensions.
231 Teradata,
232 /// Apache Doris -- real-time analytical database (MySQL-compatible).
233 Doris,
234 /// StarRocks -- sub-second OLAP database (MySQL-compatible).
235 StarRocks,
236 /// Materialize -- streaming SQL database built on differential dataflow.
237 Materialize,
238 /// RisingWave -- distributed streaming database with PostgreSQL compatibility.
239 RisingWave,
240 /// SingleStore (formerly MemSQL) -- distributed SQL database (also accepts "memsql").
241 SingleStore,
242 /// CockroachDB -- distributed SQL database with PostgreSQL compatibility (also accepts "cockroach").
243 CockroachDB,
244 /// TiDB -- distributed HTAP database with MySQL compatibility.
245 TiDB,
246 /// Apache Druid -- real-time analytics database.
247 Druid,
248 /// Apache Solr -- search platform with SQL interface.
249 Solr,
250 /// Tableau -- data visualization platform with its own SQL dialect.
251 Tableau,
252 /// Dune Analytics -- blockchain analytics SQL engine.
253 Dune,
254 /// Microsoft Fabric -- unified analytics platform (T-SQL based).
255 Fabric,
256 /// Apache Drill -- schema-free SQL query engine for big data.
257 Drill,
258 /// Dremio -- data lakehouse platform with Arrow-based query engine.
259 Dremio,
260 /// Exasol -- in-memory analytic database.
261 Exasol,
262 /// Apache DataFusion -- Arrow-based query engine with modern SQL extensions.
263 DataFusion,
264}
265
266impl Default for DialectType {
267 fn default() -> Self {
268 DialectType::Generic
269 }
270}
271
272impl std::fmt::Display for DialectType {
273 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
274 match self {
275 DialectType::Generic => write!(f, "generic"),
276 DialectType::PostgreSQL => write!(f, "postgresql"),
277 DialectType::MySQL => write!(f, "mysql"),
278 DialectType::BigQuery => write!(f, "bigquery"),
279 DialectType::Snowflake => write!(f, "snowflake"),
280 DialectType::DuckDB => write!(f, "duckdb"),
281 DialectType::SQLite => write!(f, "sqlite"),
282 DialectType::Hive => write!(f, "hive"),
283 DialectType::Spark => write!(f, "spark"),
284 DialectType::Trino => write!(f, "trino"),
285 DialectType::Presto => write!(f, "presto"),
286 DialectType::Redshift => write!(f, "redshift"),
287 DialectType::TSQL => write!(f, "tsql"),
288 DialectType::Oracle => write!(f, "oracle"),
289 DialectType::ClickHouse => write!(f, "clickhouse"),
290 DialectType::Databricks => write!(f, "databricks"),
291 DialectType::Athena => write!(f, "athena"),
292 DialectType::Teradata => write!(f, "teradata"),
293 DialectType::Doris => write!(f, "doris"),
294 DialectType::StarRocks => write!(f, "starrocks"),
295 DialectType::Materialize => write!(f, "materialize"),
296 DialectType::RisingWave => write!(f, "risingwave"),
297 DialectType::SingleStore => write!(f, "singlestore"),
298 DialectType::CockroachDB => write!(f, "cockroachdb"),
299 DialectType::TiDB => write!(f, "tidb"),
300 DialectType::Druid => write!(f, "druid"),
301 DialectType::Solr => write!(f, "solr"),
302 DialectType::Tableau => write!(f, "tableau"),
303 DialectType::Dune => write!(f, "dune"),
304 DialectType::Fabric => write!(f, "fabric"),
305 DialectType::Drill => write!(f, "drill"),
306 DialectType::Dremio => write!(f, "dremio"),
307 DialectType::Exasol => write!(f, "exasol"),
308 DialectType::DataFusion => write!(f, "datafusion"),
309 }
310 }
311}
312
313impl std::str::FromStr for DialectType {
314 type Err = crate::error::Error;
315
316 fn from_str(s: &str) -> Result<Self> {
317 match s.to_ascii_lowercase().as_str() {
318 "generic" | "" => Ok(DialectType::Generic),
319 "postgres" | "postgresql" => Ok(DialectType::PostgreSQL),
320 "mysql" => Ok(DialectType::MySQL),
321 "bigquery" => Ok(DialectType::BigQuery),
322 "snowflake" => Ok(DialectType::Snowflake),
323 "duckdb" => Ok(DialectType::DuckDB),
324 "sqlite" => Ok(DialectType::SQLite),
325 "hive" => Ok(DialectType::Hive),
326 "spark" | "spark2" => Ok(DialectType::Spark),
327 "trino" => Ok(DialectType::Trino),
328 "presto" => Ok(DialectType::Presto),
329 "redshift" => Ok(DialectType::Redshift),
330 "tsql" | "mssql" | "sqlserver" => Ok(DialectType::TSQL),
331 "oracle" => Ok(DialectType::Oracle),
332 "clickhouse" => Ok(DialectType::ClickHouse),
333 "databricks" => Ok(DialectType::Databricks),
334 "athena" => Ok(DialectType::Athena),
335 "teradata" => Ok(DialectType::Teradata),
336 "doris" => Ok(DialectType::Doris),
337 "starrocks" => Ok(DialectType::StarRocks),
338 "materialize" => Ok(DialectType::Materialize),
339 "risingwave" => Ok(DialectType::RisingWave),
340 "singlestore" | "memsql" => Ok(DialectType::SingleStore),
341 "cockroachdb" | "cockroach" => Ok(DialectType::CockroachDB),
342 "tidb" => Ok(DialectType::TiDB),
343 "druid" => Ok(DialectType::Druid),
344 "solr" => Ok(DialectType::Solr),
345 "tableau" => Ok(DialectType::Tableau),
346 "dune" => Ok(DialectType::Dune),
347 "fabric" => Ok(DialectType::Fabric),
348 "drill" => Ok(DialectType::Drill),
349 "dremio" => Ok(DialectType::Dremio),
350 "exasol" => Ok(DialectType::Exasol),
351 "datafusion" | "arrow-datafusion" | "arrow_datafusion" => Ok(DialectType::DataFusion),
352 _ => Err(crate::error::Error::parse(
353 format!("Unknown dialect: {}", s),
354 0,
355 0,
356 0,
357 0,
358 )),
359 }
360 }
361}
362
363/// Trait that each concrete SQL dialect must implement.
364///
365/// `DialectImpl` provides the configuration hooks and per-expression transform logic
366/// that distinguish one dialect from another. Implementors supply:
367///
368/// - A [`DialectType`] identifier.
369/// - Optional overrides for tokenizer and generator configuration (defaults to generic SQL).
370/// - An expression-level transform function ([`transform_expr`](DialectImpl::transform_expr))
371/// that rewrites individual AST nodes for this dialect (e.g., converting `NVL` to `COALESCE`).
372/// - An optional preprocessing step ([`preprocess`](DialectImpl::preprocess)) for whole-tree
373/// rewrites that must run before the recursive per-node transform (e.g., eliminating QUALIFY).
374///
375/// The default implementations are no-ops, so a minimal dialect only needs to provide
376/// [`dialect_type`](DialectImpl::dialect_type) and override the methods that differ from
377/// standard SQL.
378pub trait DialectImpl {
379 /// Returns the [`DialectType`] that identifies this dialect.
380 fn dialect_type(&self) -> DialectType;
381
382 /// Returns the tokenizer configuration for this dialect.
383 ///
384 /// Override to customize identifier quoting characters, string escape rules,
385 /// comment styles, and other lexing behavior.
386 fn tokenizer_config(&self) -> TokenizerConfig {
387 TokenizerConfig::default()
388 }
389
390 /// Returns the generator configuration for this dialect.
391 ///
392 /// Override to customize identifier quoting style, function name casing,
393 /// keyword casing, and other SQL generation behavior.
394 #[cfg(feature = "generate")]
395 fn generator_config(&self) -> GeneratorConfig {
396 GeneratorConfig::default()
397 }
398
399 /// Returns a generator configuration tailored to a specific expression.
400 ///
401 /// Override this for hybrid dialects like Athena that route to different SQL engines
402 /// based on expression type (e.g., Hive-style generation for DDL, Trino-style for DML).
403 /// The default delegates to [`generator_config`](DialectImpl::generator_config).
404 #[cfg(feature = "generate")]
405 fn generator_config_for_expr(&self, _expr: &Expression) -> GeneratorConfig {
406 self.generator_config()
407 }
408
409 /// Transforms a single expression node for this dialect, without recursing into children.
410 ///
411 /// This is the per-node rewrite hook invoked by [`transform_recursive`]. Return the
412 /// expression unchanged if no dialect-specific rewrite is needed. Transformations
413 /// typically include function renaming, operator substitution, and type mapping.
414 #[cfg(feature = "transpile")]
415 fn transform_expr(&self, expr: Expression) -> Result<Expression> {
416 Ok(expr)
417 }
418
419 /// Applies whole-tree preprocessing transforms before the recursive per-node pass.
420 ///
421 /// Override this to apply structural rewrites that must see the entire tree at once,
422 /// such as `eliminate_qualify`, `eliminate_distinct_on`, `ensure_bools`, or
423 /// `explode_projection_to_unnest`. The default is a no-op pass-through.
424 #[cfg(feature = "transpile")]
425 fn preprocess(&self, expr: Expression) -> Result<Expression> {
426 Ok(expr)
427 }
428}
429
430/// Recursively transforms a [`DataType`](crate::expressions::DataType), handling nested
431/// parametric types such as `ARRAY<INT>`, `STRUCT<a INT, b TEXT>`, and `MAP<STRING, INT>`.
432///
433/// The outer type is first passed through `transform_fn` as an `Expression::DataType`,
434/// and then nested element/field types are recursed into. This ensures that dialect-level
435/// type mappings (e.g., `INT` to `INTEGER`) propagate into complex nested types.
436#[cfg(any(
437 feature = "transpile",
438 feature = "ast-tools",
439 feature = "generate",
440 feature = "semantic"
441))]
442fn transform_data_type_recursive<F>(
443 dt: crate::expressions::DataType,
444 transform_fn: &F,
445) -> Result<crate::expressions::DataType>
446where
447 F: Fn(Expression) -> Result<Expression>,
448{
449 use crate::expressions::DataType;
450 // First, transform the outermost type through the expression system
451 let dt_expr = transform_fn(Expression::DataType(dt))?;
452 let dt = match dt_expr {
453 Expression::DataType(d) => d,
454 _ => {
455 return Ok(match dt_expr {
456 _ => DataType::Custom {
457 name: "UNKNOWN".to_string(),
458 },
459 })
460 }
461 };
462 // Then recurse into nested types
463 match dt {
464 DataType::Array {
465 element_type,
466 dimension,
467 } => {
468 let inner = transform_data_type_recursive(*element_type, transform_fn)?;
469 Ok(DataType::Array {
470 element_type: Box::new(inner),
471 dimension,
472 })
473 }
474 DataType::List { element_type } => {
475 let inner = transform_data_type_recursive(*element_type, transform_fn)?;
476 Ok(DataType::List {
477 element_type: Box::new(inner),
478 })
479 }
480 DataType::Struct { fields, nested } => {
481 let mut new_fields = Vec::new();
482 for mut field in fields {
483 field.data_type = transform_data_type_recursive(field.data_type, transform_fn)?;
484 new_fields.push(field);
485 }
486 Ok(DataType::Struct {
487 fields: new_fields,
488 nested,
489 })
490 }
491 DataType::Map {
492 key_type,
493 value_type,
494 } => {
495 let k = transform_data_type_recursive(*key_type, transform_fn)?;
496 let v = transform_data_type_recursive(*value_type, transform_fn)?;
497 Ok(DataType::Map {
498 key_type: Box::new(k),
499 value_type: Box::new(v),
500 })
501 }
502 other => Ok(other),
503 }
504}
505
506/// Convert DuckDB C-style format strings to Presto C-style format strings.
507/// DuckDB and Presto both use C-style % directives but with different specifiers for some cases.
508#[cfg(feature = "transpile")]
509fn duckdb_to_presto_format(fmt: &str) -> String {
510 // Order matters: handle longer patterns first to avoid partial replacements
511 let mut result = fmt.to_string();
512 // First pass: mark multi-char patterns with placeholders
513 result = result.replace("%-m", "\x01NOPADM\x01");
514 result = result.replace("%-d", "\x01NOPADD\x01");
515 result = result.replace("%-I", "\x01NOPADI\x01");
516 result = result.replace("%-H", "\x01NOPADH\x01");
517 result = result.replace("%H:%M:%S", "\x01HMS\x01");
518 result = result.replace("%Y-%m-%d", "\x01YMD\x01");
519 // Now convert individual specifiers
520 result = result.replace("%M", "%i");
521 result = result.replace("%S", "%s");
522 // Restore multi-char patterns with Presto equivalents
523 result = result.replace("\x01NOPADM\x01", "%c");
524 result = result.replace("\x01NOPADD\x01", "%e");
525 result = result.replace("\x01NOPADI\x01", "%l");
526 result = result.replace("\x01NOPADH\x01", "%k");
527 result = result.replace("\x01HMS\x01", "%T");
528 result = result.replace("\x01YMD\x01", "%Y-%m-%d");
529 result
530}
531
532/// Convert DuckDB C-style format strings to BigQuery format strings.
533/// BigQuery uses a mix of strftime-like directives.
534#[cfg(feature = "transpile")]
535fn duckdb_to_bigquery_format(fmt: &str) -> String {
536 let mut result = fmt.to_string();
537 // Handle longer patterns first
538 result = result.replace("%-d", "%e");
539 result = result.replace("%Y-%m-%d %H:%M:%S", "%F %T");
540 result = result.replace("%Y-%m-%d", "%F");
541 result = result.replace("%H:%M:%S", "%T");
542 result
543}
544
545#[cfg(feature = "transpile")]
546fn presto_to_java_format(fmt: &str) -> String {
547 fmt.replace("%Y", "yyyy")
548 .replace("%m", "MM")
549 .replace("%d", "dd")
550 .replace("%H", "HH")
551 .replace("%i", "mm")
552 .replace("%S", "ss")
553 .replace("%s", "ss")
554 .replace("%y", "yy")
555 .replace("%T", "HH:mm:ss")
556 .replace("%F", "yyyy-MM-dd")
557 .replace("%M", "MMMM")
558}
559
560#[cfg(feature = "transpile")]
561fn normalize_presto_format(fmt: &str) -> String {
562 fmt.replace("%H:%i:%S", "%T").replace("%H:%i:%s", "%T")
563}
564
565#[cfg(feature = "transpile")]
566fn presto_to_duckdb_format(fmt: &str) -> String {
567 fmt.replace("%i", "%M")
568 .replace("%s", "%S")
569 .replace("%T", "%H:%M:%S")
570}
571
572#[cfg(feature = "transpile")]
573fn presto_to_bigquery_format(fmt: &str) -> String {
574 fmt.replace("%Y-%m-%d", "%F")
575 .replace("%H:%i:%S", "%T")
576 .replace("%H:%i:%s", "%T")
577 .replace("%i", "%M")
578 .replace("%s", "%S")
579}
580
581#[cfg(feature = "transpile")]
582fn is_default_presto_timestamp_format(fmt: &str) -> bool {
583 let normalized = normalize_presto_format(fmt);
584 normalized == "%Y-%m-%d %T"
585 || normalized == "%Y-%m-%d %H:%i:%S"
586 || fmt == "%Y-%m-%d %H:%i:%S"
587 || fmt == "%Y-%m-%d %T"
588}
589
590#[cfg(feature = "transpile")]
591fn is_default_presto_date_format(fmt: &str) -> bool {
592 fmt == "%Y-%m-%d" || fmt == "%F"
593}
594
595#[cfg(any(
596 feature = "transpile",
597 feature = "ast-tools",
598 feature = "generate",
599 feature = "semantic"
600))]
601#[derive(Debug)]
602enum TransformTask {
603 Visit(Expression),
604 Finish(FinishTask),
605}
606
607#[cfg(any(
608 feature = "transpile",
609 feature = "ast-tools",
610 feature = "generate",
611 feature = "semantic"
612))]
613#[derive(Debug)]
614enum FinishTask {
615 Unary(Expression),
616 Binary(Expression),
617 CastLike(Expression),
618 List(Expression, usize),
619 From(crate::expressions::From, usize),
620 Select(SelectFrame),
621 SetOp(Expression),
622}
623
624#[cfg(any(
625 feature = "transpile",
626 feature = "ast-tools",
627 feature = "generate",
628 feature = "semantic"
629))]
630#[derive(Debug)]
631struct SelectFrame {
632 select: Box<crate::expressions::Select>,
633 expr_count: usize,
634 from_present: bool,
635 where_present: bool,
636 group_by_count: usize,
637 having_present: bool,
638 qualify_present: bool,
639}
640
641#[cfg(any(
642 feature = "transpile",
643 feature = "ast-tools",
644 feature = "generate",
645 feature = "semantic"
646))]
647fn transform_pop_result(results: &mut Vec<Expression>) -> Result<Expression> {
648 results
649 .pop()
650 .ok_or_else(|| crate::error::Error::Internal("transform stack underflow".to_string()))
651}
652
653#[cfg(any(
654 feature = "transpile",
655 feature = "ast-tools",
656 feature = "generate",
657 feature = "semantic"
658))]
659fn transform_pop_results(results: &mut Vec<Expression>, count: usize) -> Result<Vec<Expression>> {
660 if results.len() < count {
661 return Err(crate::error::Error::Internal(
662 "transform result stack underflow".to_string(),
663 ));
664 }
665 Ok(results.split_off(results.len() - count))
666}
667
668/// Applies a transform function bottom-up through an entire expression tree.
669///
670/// The public entrypoint uses an explicit task stack for the recursion-heavy shapes
671/// that dominate deeply nested SQL (nested SELECT/FROM/SUBQUERY chains, set-operation
672/// trees, and common binary/unary expression chains). Less common shapes currently
673/// reuse the reference recursive implementation so semantics stay identical while
674/// the hot path avoids stack growth.
675#[cfg(any(
676 feature = "transpile",
677 feature = "ast-tools",
678 feature = "generate",
679 feature = "semantic"
680))]
681pub fn transform_recursive<F>(expr: Expression, transform_fn: &F) -> Result<Expression>
682where
683 F: Fn(Expression) -> Result<Expression>,
684{
685 #[cfg(feature = "stacker")]
686 {
687 let red_zone = if cfg!(debug_assertions) {
688 4 * 1024 * 1024
689 } else {
690 1024 * 1024
691 };
692 stacker::maybe_grow(red_zone, 8 * 1024 * 1024, move || {
693 transform_recursive_inner(expr, transform_fn)
694 })
695 }
696 #[cfg(not(feature = "stacker"))]
697 {
698 transform_recursive_inner(expr, transform_fn)
699 }
700}
701
702#[cfg(any(
703 feature = "transpile",
704 feature = "ast-tools",
705 feature = "generate",
706 feature = "semantic"
707))]
708fn transform_recursive_inner<F>(expr: Expression, transform_fn: &F) -> Result<Expression>
709where
710 F: Fn(Expression) -> Result<Expression>,
711{
712 let mut tasks = vec![TransformTask::Visit(expr)];
713 let mut results = Vec::new();
714
715 while let Some(task) = tasks.pop() {
716 match task {
717 TransformTask::Visit(expr) => {
718 if matches!(
719 &expr,
720 Expression::Literal(_)
721 | Expression::Boolean(_)
722 | Expression::Null(_)
723 | Expression::Identifier(_)
724 | Expression::Star(_)
725 | Expression::Parameter(_)
726 | Expression::Placeholder(_)
727 | Expression::SessionParameter(_)
728 ) {
729 results.push(transform_fn(expr)?);
730 continue;
731 }
732
733 match expr {
734 Expression::Alias(mut alias) => {
735 let child = std::mem::replace(&mut alias.this, Expression::Null(Null));
736 tasks.push(TransformTask::Finish(FinishTask::Unary(Expression::Alias(
737 alias,
738 ))));
739 tasks.push(TransformTask::Visit(child));
740 }
741 Expression::Paren(mut paren) => {
742 let child = std::mem::replace(&mut paren.this, Expression::Null(Null));
743 tasks.push(TransformTask::Finish(FinishTask::Unary(Expression::Paren(
744 paren,
745 ))));
746 tasks.push(TransformTask::Visit(child));
747 }
748 Expression::Not(mut not) => {
749 let child = std::mem::replace(&mut not.this, Expression::Null(Null));
750 tasks.push(TransformTask::Finish(FinishTask::Unary(Expression::Not(
751 not,
752 ))));
753 tasks.push(TransformTask::Visit(child));
754 }
755 Expression::Neg(mut neg) => {
756 let child = std::mem::replace(&mut neg.this, Expression::Null(Null));
757 tasks.push(TransformTask::Finish(FinishTask::Unary(Expression::Neg(
758 neg,
759 ))));
760 tasks.push(TransformTask::Visit(child));
761 }
762 Expression::IsNull(mut expr) => {
763 let child = std::mem::replace(&mut expr.this, Expression::Null(Null));
764 tasks.push(TransformTask::Finish(FinishTask::Unary(
765 Expression::IsNull(expr),
766 )));
767 tasks.push(TransformTask::Visit(child));
768 }
769 Expression::IsTrue(mut expr) => {
770 let child = std::mem::replace(&mut expr.this, Expression::Null(Null));
771 tasks.push(TransformTask::Finish(FinishTask::Unary(
772 Expression::IsTrue(expr),
773 )));
774 tasks.push(TransformTask::Visit(child));
775 }
776 Expression::IsFalse(mut expr) => {
777 let child = std::mem::replace(&mut expr.this, Expression::Null(Null));
778 tasks.push(TransformTask::Finish(FinishTask::Unary(
779 Expression::IsFalse(expr),
780 )));
781 tasks.push(TransformTask::Visit(child));
782 }
783 Expression::Subquery(mut subquery) => {
784 let child = std::mem::replace(&mut subquery.this, Expression::Null(Null));
785 tasks.push(TransformTask::Finish(FinishTask::Unary(
786 Expression::Subquery(subquery),
787 )));
788 tasks.push(TransformTask::Visit(child));
789 }
790 Expression::Exists(mut exists) => {
791 let child = std::mem::replace(&mut exists.this, Expression::Null(Null));
792 tasks.push(TransformTask::Finish(FinishTask::Unary(
793 Expression::Exists(exists),
794 )));
795 tasks.push(TransformTask::Visit(child));
796 }
797 Expression::TableArgument(mut arg) => {
798 let child = std::mem::replace(&mut arg.this, Expression::Null(Null));
799 tasks.push(TransformTask::Finish(FinishTask::Unary(
800 Expression::TableArgument(arg),
801 )));
802 tasks.push(TransformTask::Visit(child));
803 }
804 Expression::And(mut op) => {
805 let right = std::mem::replace(&mut op.right, Expression::Null(Null));
806 let left = std::mem::replace(&mut op.left, Expression::Null(Null));
807 tasks.push(TransformTask::Finish(FinishTask::Binary(Expression::And(
808 op,
809 ))));
810 tasks.push(TransformTask::Visit(right));
811 tasks.push(TransformTask::Visit(left));
812 }
813 Expression::Or(mut op) => {
814 let right = std::mem::replace(&mut op.right, Expression::Null(Null));
815 let left = std::mem::replace(&mut op.left, Expression::Null(Null));
816 tasks.push(TransformTask::Finish(FinishTask::Binary(Expression::Or(
817 op,
818 ))));
819 tasks.push(TransformTask::Visit(right));
820 tasks.push(TransformTask::Visit(left));
821 }
822 Expression::Add(mut op) => {
823 let right = std::mem::replace(&mut op.right, Expression::Null(Null));
824 let left = std::mem::replace(&mut op.left, Expression::Null(Null));
825 tasks.push(TransformTask::Finish(FinishTask::Binary(Expression::Add(
826 op,
827 ))));
828 tasks.push(TransformTask::Visit(right));
829 tasks.push(TransformTask::Visit(left));
830 }
831 Expression::Sub(mut op) => {
832 let right = std::mem::replace(&mut op.right, Expression::Null(Null));
833 let left = std::mem::replace(&mut op.left, Expression::Null(Null));
834 tasks.push(TransformTask::Finish(FinishTask::Binary(Expression::Sub(
835 op,
836 ))));
837 tasks.push(TransformTask::Visit(right));
838 tasks.push(TransformTask::Visit(left));
839 }
840 Expression::Mul(mut op) => {
841 let right = std::mem::replace(&mut op.right, Expression::Null(Null));
842 let left = std::mem::replace(&mut op.left, Expression::Null(Null));
843 tasks.push(TransformTask::Finish(FinishTask::Binary(Expression::Mul(
844 op,
845 ))));
846 tasks.push(TransformTask::Visit(right));
847 tasks.push(TransformTask::Visit(left));
848 }
849 Expression::Div(mut op) => {
850 let right = std::mem::replace(&mut op.right, Expression::Null(Null));
851 let left = std::mem::replace(&mut op.left, Expression::Null(Null));
852 tasks.push(TransformTask::Finish(FinishTask::Binary(Expression::Div(
853 op,
854 ))));
855 tasks.push(TransformTask::Visit(right));
856 tasks.push(TransformTask::Visit(left));
857 }
858 Expression::Eq(mut op) => {
859 let right = std::mem::replace(&mut op.right, Expression::Null(Null));
860 let left = std::mem::replace(&mut op.left, Expression::Null(Null));
861 tasks.push(TransformTask::Finish(FinishTask::Binary(Expression::Eq(
862 op,
863 ))));
864 tasks.push(TransformTask::Visit(right));
865 tasks.push(TransformTask::Visit(left));
866 }
867 Expression::Lt(mut op) => {
868 let right = std::mem::replace(&mut op.right, Expression::Null(Null));
869 let left = std::mem::replace(&mut op.left, Expression::Null(Null));
870 tasks.push(TransformTask::Finish(FinishTask::Binary(Expression::Lt(
871 op,
872 ))));
873 tasks.push(TransformTask::Visit(right));
874 tasks.push(TransformTask::Visit(left));
875 }
876 Expression::Gt(mut op) => {
877 let right = std::mem::replace(&mut op.right, Expression::Null(Null));
878 let left = std::mem::replace(&mut op.left, Expression::Null(Null));
879 tasks.push(TransformTask::Finish(FinishTask::Binary(Expression::Gt(
880 op,
881 ))));
882 tasks.push(TransformTask::Visit(right));
883 tasks.push(TransformTask::Visit(left));
884 }
885 Expression::Neq(mut op) => {
886 let right = std::mem::replace(&mut op.right, Expression::Null(Null));
887 let left = std::mem::replace(&mut op.left, Expression::Null(Null));
888 tasks.push(TransformTask::Finish(FinishTask::Binary(Expression::Neq(
889 op,
890 ))));
891 tasks.push(TransformTask::Visit(right));
892 tasks.push(TransformTask::Visit(left));
893 }
894 Expression::Lte(mut op) => {
895 let right = std::mem::replace(&mut op.right, Expression::Null(Null));
896 let left = std::mem::replace(&mut op.left, Expression::Null(Null));
897 tasks.push(TransformTask::Finish(FinishTask::Binary(Expression::Lte(
898 op,
899 ))));
900 tasks.push(TransformTask::Visit(right));
901 tasks.push(TransformTask::Visit(left));
902 }
903 Expression::Gte(mut op) => {
904 let right = std::mem::replace(&mut op.right, Expression::Null(Null));
905 let left = std::mem::replace(&mut op.left, Expression::Null(Null));
906 tasks.push(TransformTask::Finish(FinishTask::Binary(Expression::Gte(
907 op,
908 ))));
909 tasks.push(TransformTask::Visit(right));
910 tasks.push(TransformTask::Visit(left));
911 }
912 Expression::Mod(mut op) => {
913 let right = std::mem::replace(&mut op.right, Expression::Null(Null));
914 let left = std::mem::replace(&mut op.left, Expression::Null(Null));
915 tasks.push(TransformTask::Finish(FinishTask::Binary(Expression::Mod(
916 op,
917 ))));
918 tasks.push(TransformTask::Visit(right));
919 tasks.push(TransformTask::Visit(left));
920 }
921 Expression::Concat(mut op) => {
922 let right = std::mem::replace(&mut op.right, Expression::Null(Null));
923 let left = std::mem::replace(&mut op.left, Expression::Null(Null));
924 tasks.push(TransformTask::Finish(FinishTask::Binary(
925 Expression::Concat(op),
926 )));
927 tasks.push(TransformTask::Visit(right));
928 tasks.push(TransformTask::Visit(left));
929 }
930 Expression::BitwiseAnd(mut op) => {
931 let right = std::mem::replace(&mut op.right, Expression::Null(Null));
932 let left = std::mem::replace(&mut op.left, Expression::Null(Null));
933 tasks.push(TransformTask::Finish(FinishTask::Binary(
934 Expression::BitwiseAnd(op),
935 )));
936 tasks.push(TransformTask::Visit(right));
937 tasks.push(TransformTask::Visit(left));
938 }
939 Expression::BitwiseOr(mut op) => {
940 let right = std::mem::replace(&mut op.right, Expression::Null(Null));
941 let left = std::mem::replace(&mut op.left, Expression::Null(Null));
942 tasks.push(TransformTask::Finish(FinishTask::Binary(
943 Expression::BitwiseOr(op),
944 )));
945 tasks.push(TransformTask::Visit(right));
946 tasks.push(TransformTask::Visit(left));
947 }
948 Expression::BitwiseXor(mut op) => {
949 let right = std::mem::replace(&mut op.right, Expression::Null(Null));
950 let left = std::mem::replace(&mut op.left, Expression::Null(Null));
951 tasks.push(TransformTask::Finish(FinishTask::Binary(
952 Expression::BitwiseXor(op),
953 )));
954 tasks.push(TransformTask::Visit(right));
955 tasks.push(TransformTask::Visit(left));
956 }
957 Expression::Is(mut op) => {
958 let right = std::mem::replace(&mut op.right, Expression::Null(Null));
959 let left = std::mem::replace(&mut op.left, Expression::Null(Null));
960 tasks.push(TransformTask::Finish(FinishTask::Binary(Expression::Is(
961 op,
962 ))));
963 tasks.push(TransformTask::Visit(right));
964 tasks.push(TransformTask::Visit(left));
965 }
966 Expression::MemberOf(mut op) => {
967 let right = std::mem::replace(&mut op.right, Expression::Null(Null));
968 let left = std::mem::replace(&mut op.left, Expression::Null(Null));
969 tasks.push(TransformTask::Finish(FinishTask::Binary(
970 Expression::MemberOf(op),
971 )));
972 tasks.push(TransformTask::Visit(right));
973 tasks.push(TransformTask::Visit(left));
974 }
975 Expression::ArrayContainsAll(mut op) => {
976 let right = std::mem::replace(&mut op.right, Expression::Null(Null));
977 let left = std::mem::replace(&mut op.left, Expression::Null(Null));
978 tasks.push(TransformTask::Finish(FinishTask::Binary(
979 Expression::ArrayContainsAll(op),
980 )));
981 tasks.push(TransformTask::Visit(right));
982 tasks.push(TransformTask::Visit(left));
983 }
984 Expression::ArrayContainedBy(mut op) => {
985 let right = std::mem::replace(&mut op.right, Expression::Null(Null));
986 let left = std::mem::replace(&mut op.left, Expression::Null(Null));
987 tasks.push(TransformTask::Finish(FinishTask::Binary(
988 Expression::ArrayContainedBy(op),
989 )));
990 tasks.push(TransformTask::Visit(right));
991 tasks.push(TransformTask::Visit(left));
992 }
993 Expression::ArrayOverlaps(mut op) => {
994 let right = std::mem::replace(&mut op.right, Expression::Null(Null));
995 let left = std::mem::replace(&mut op.left, Expression::Null(Null));
996 tasks.push(TransformTask::Finish(FinishTask::Binary(
997 Expression::ArrayOverlaps(op),
998 )));
999 tasks.push(TransformTask::Visit(right));
1000 tasks.push(TransformTask::Visit(left));
1001 }
1002 Expression::TsMatch(mut op) => {
1003 let right = std::mem::replace(&mut op.right, Expression::Null(Null));
1004 let left = std::mem::replace(&mut op.left, Expression::Null(Null));
1005 tasks.push(TransformTask::Finish(FinishTask::Binary(
1006 Expression::TsMatch(op),
1007 )));
1008 tasks.push(TransformTask::Visit(right));
1009 tasks.push(TransformTask::Visit(left));
1010 }
1011 Expression::Adjacent(mut op) => {
1012 let right = std::mem::replace(&mut op.right, Expression::Null(Null));
1013 let left = std::mem::replace(&mut op.left, Expression::Null(Null));
1014 tasks.push(TransformTask::Finish(FinishTask::Binary(
1015 Expression::Adjacent(op),
1016 )));
1017 tasks.push(TransformTask::Visit(right));
1018 tasks.push(TransformTask::Visit(left));
1019 }
1020 Expression::Like(mut like) => {
1021 let right = std::mem::replace(&mut like.right, Expression::Null(Null));
1022 let left = std::mem::replace(&mut like.left, Expression::Null(Null));
1023 tasks.push(TransformTask::Finish(FinishTask::Binary(Expression::Like(
1024 like,
1025 ))));
1026 tasks.push(TransformTask::Visit(right));
1027 tasks.push(TransformTask::Visit(left));
1028 }
1029 Expression::ILike(mut like) => {
1030 let right = std::mem::replace(&mut like.right, Expression::Null(Null));
1031 let left = std::mem::replace(&mut like.left, Expression::Null(Null));
1032 tasks.push(TransformTask::Finish(FinishTask::Binary(
1033 Expression::ILike(like),
1034 )));
1035 tasks.push(TransformTask::Visit(right));
1036 tasks.push(TransformTask::Visit(left));
1037 }
1038 Expression::Cast(mut cast) => {
1039 let child = std::mem::replace(&mut cast.this, Expression::Null(Null));
1040 tasks.push(TransformTask::Finish(FinishTask::CastLike(
1041 Expression::Cast(cast),
1042 )));
1043 tasks.push(TransformTask::Visit(child));
1044 }
1045 Expression::TryCast(mut cast) => {
1046 let child = std::mem::replace(&mut cast.this, Expression::Null(Null));
1047 tasks.push(TransformTask::Finish(FinishTask::CastLike(
1048 Expression::TryCast(cast),
1049 )));
1050 tasks.push(TransformTask::Visit(child));
1051 }
1052 Expression::SafeCast(mut cast) => {
1053 let child = std::mem::replace(&mut cast.this, Expression::Null(Null));
1054 tasks.push(TransformTask::Finish(FinishTask::CastLike(
1055 Expression::SafeCast(cast),
1056 )));
1057 tasks.push(TransformTask::Visit(child));
1058 }
1059 Expression::Function(mut function) => {
1060 let args = std::mem::take(&mut function.args);
1061 let count = args.len();
1062 tasks.push(TransformTask::Finish(FinishTask::List(
1063 Expression::Function(function),
1064 count,
1065 )));
1066 for child in args.into_iter().rev() {
1067 tasks.push(TransformTask::Visit(child));
1068 }
1069 }
1070 Expression::Array(mut array) => {
1071 let expressions = std::mem::take(&mut array.expressions);
1072 let count = expressions.len();
1073 tasks.push(TransformTask::Finish(FinishTask::List(
1074 Expression::Array(array),
1075 count,
1076 )));
1077 for child in expressions.into_iter().rev() {
1078 tasks.push(TransformTask::Visit(child));
1079 }
1080 }
1081 Expression::Tuple(mut tuple) => {
1082 let expressions = std::mem::take(&mut tuple.expressions);
1083 let count = expressions.len();
1084 tasks.push(TransformTask::Finish(FinishTask::List(
1085 Expression::Tuple(tuple),
1086 count,
1087 )));
1088 for child in expressions.into_iter().rev() {
1089 tasks.push(TransformTask::Visit(child));
1090 }
1091 }
1092 Expression::ArrayFunc(mut array) => {
1093 let expressions = std::mem::take(&mut array.expressions);
1094 let count = expressions.len();
1095 tasks.push(TransformTask::Finish(FinishTask::List(
1096 Expression::ArrayFunc(array),
1097 count,
1098 )));
1099 for child in expressions.into_iter().rev() {
1100 tasks.push(TransformTask::Visit(child));
1101 }
1102 }
1103 Expression::Coalesce(mut func) => {
1104 let expressions = std::mem::take(&mut func.expressions);
1105 let count = expressions.len();
1106 tasks.push(TransformTask::Finish(FinishTask::List(
1107 Expression::Coalesce(func),
1108 count,
1109 )));
1110 for child in expressions.into_iter().rev() {
1111 tasks.push(TransformTask::Visit(child));
1112 }
1113 }
1114 Expression::Greatest(mut func) => {
1115 let expressions = std::mem::take(&mut func.expressions);
1116 let count = expressions.len();
1117 tasks.push(TransformTask::Finish(FinishTask::List(
1118 Expression::Greatest(func),
1119 count,
1120 )));
1121 for child in expressions.into_iter().rev() {
1122 tasks.push(TransformTask::Visit(child));
1123 }
1124 }
1125 Expression::Least(mut func) => {
1126 let expressions = std::mem::take(&mut func.expressions);
1127 let count = expressions.len();
1128 tasks.push(TransformTask::Finish(FinishTask::List(
1129 Expression::Least(func),
1130 count,
1131 )));
1132 for child in expressions.into_iter().rev() {
1133 tasks.push(TransformTask::Visit(child));
1134 }
1135 }
1136 Expression::ArrayConcat(mut func) => {
1137 let expressions = std::mem::take(&mut func.expressions);
1138 let count = expressions.len();
1139 tasks.push(TransformTask::Finish(FinishTask::List(
1140 Expression::ArrayConcat(func),
1141 count,
1142 )));
1143 for child in expressions.into_iter().rev() {
1144 tasks.push(TransformTask::Visit(child));
1145 }
1146 }
1147 Expression::ArrayIntersect(mut func) => {
1148 let expressions = std::mem::take(&mut func.expressions);
1149 let count = expressions.len();
1150 tasks.push(TransformTask::Finish(FinishTask::List(
1151 Expression::ArrayIntersect(func),
1152 count,
1153 )));
1154 for child in expressions.into_iter().rev() {
1155 tasks.push(TransformTask::Visit(child));
1156 }
1157 }
1158 Expression::ArrayZip(mut func) => {
1159 let expressions = std::mem::take(&mut func.expressions);
1160 let count = expressions.len();
1161 tasks.push(TransformTask::Finish(FinishTask::List(
1162 Expression::ArrayZip(func),
1163 count,
1164 )));
1165 for child in expressions.into_iter().rev() {
1166 tasks.push(TransformTask::Visit(child));
1167 }
1168 }
1169 Expression::MapConcat(mut func) => {
1170 let expressions = std::mem::take(&mut func.expressions);
1171 let count = expressions.len();
1172 tasks.push(TransformTask::Finish(FinishTask::List(
1173 Expression::MapConcat(func),
1174 count,
1175 )));
1176 for child in expressions.into_iter().rev() {
1177 tasks.push(TransformTask::Visit(child));
1178 }
1179 }
1180 Expression::JsonArray(mut func) => {
1181 let expressions = std::mem::take(&mut func.expressions);
1182 let count = expressions.len();
1183 tasks.push(TransformTask::Finish(FinishTask::List(
1184 Expression::JsonArray(func),
1185 count,
1186 )));
1187 for child in expressions.into_iter().rev() {
1188 tasks.push(TransformTask::Visit(child));
1189 }
1190 }
1191 Expression::From(mut from) => {
1192 let expressions = std::mem::take(&mut from.expressions);
1193 let count = expressions.len();
1194 tasks.push(TransformTask::Finish(FinishTask::From(*from, count)));
1195 for child in expressions.into_iter().rev() {
1196 tasks.push(TransformTask::Visit(child));
1197 }
1198 }
1199 Expression::Select(mut select) => {
1200 let expressions = std::mem::take(&mut select.expressions);
1201 let expr_count = expressions.len();
1202
1203 let from_info = select.from.take().map(|mut from| {
1204 let children = std::mem::take(&mut from.expressions);
1205 (from, children)
1206 });
1207 let from_present = from_info.is_some();
1208
1209 let where_child = select.where_clause.as_mut().map(|where_clause| {
1210 std::mem::replace(&mut where_clause.this, Expression::Null(Null))
1211 });
1212 let where_present = where_child.is_some();
1213
1214 let group_expressions = select
1215 .group_by
1216 .as_mut()
1217 .map(|group_by| std::mem::take(&mut group_by.expressions))
1218 .unwrap_or_default();
1219 let group_by_count = group_expressions.len();
1220
1221 let having_child = select.having.as_mut().map(|having| {
1222 std::mem::replace(&mut having.this, Expression::Null(Null))
1223 });
1224 let having_present = having_child.is_some();
1225
1226 let qualify_child = select.qualify.as_mut().map(|qualify| {
1227 std::mem::replace(&mut qualify.this, Expression::Null(Null))
1228 });
1229 let qualify_present = qualify_child.is_some();
1230
1231 tasks.push(TransformTask::Finish(FinishTask::Select(SelectFrame {
1232 select,
1233 expr_count,
1234 from_present,
1235 where_present,
1236 group_by_count,
1237 having_present,
1238 qualify_present,
1239 })));
1240
1241 if let Some(child) = qualify_child {
1242 tasks.push(TransformTask::Visit(child));
1243 }
1244 if let Some(child) = having_child {
1245 tasks.push(TransformTask::Visit(child));
1246 }
1247 for child in group_expressions.into_iter().rev() {
1248 tasks.push(TransformTask::Visit(child));
1249 }
1250 if let Some(child) = where_child {
1251 tasks.push(TransformTask::Visit(child));
1252 }
1253 if let Some((from, children)) = from_info {
1254 tasks.push(TransformTask::Finish(FinishTask::From(
1255 from,
1256 children.len(),
1257 )));
1258 for child in children.into_iter().rev() {
1259 tasks.push(TransformTask::Visit(child));
1260 }
1261 }
1262 for child in expressions.into_iter().rev() {
1263 tasks.push(TransformTask::Visit(child));
1264 }
1265 }
1266 Expression::Union(mut union) => {
1267 let right = std::mem::replace(&mut union.right, Expression::Null(Null));
1268 let left = std::mem::replace(&mut union.left, Expression::Null(Null));
1269 tasks.push(TransformTask::Finish(FinishTask::SetOp(Expression::Union(
1270 union,
1271 ))));
1272 tasks.push(TransformTask::Visit(right));
1273 tasks.push(TransformTask::Visit(left));
1274 }
1275 Expression::Intersect(mut intersect) => {
1276 let right = std::mem::replace(&mut intersect.right, Expression::Null(Null));
1277 let left = std::mem::replace(&mut intersect.left, Expression::Null(Null));
1278 tasks.push(TransformTask::Finish(FinishTask::SetOp(
1279 Expression::Intersect(intersect),
1280 )));
1281 tasks.push(TransformTask::Visit(right));
1282 tasks.push(TransformTask::Visit(left));
1283 }
1284 Expression::Except(mut except) => {
1285 let right = std::mem::replace(&mut except.right, Expression::Null(Null));
1286 let left = std::mem::replace(&mut except.left, Expression::Null(Null));
1287 tasks.push(TransformTask::Finish(FinishTask::SetOp(
1288 Expression::Except(except),
1289 )));
1290 tasks.push(TransformTask::Visit(right));
1291 tasks.push(TransformTask::Visit(left));
1292 }
1293 other => {
1294 results.push(transform_recursive_reference(other, transform_fn)?);
1295 }
1296 }
1297 }
1298 TransformTask::Finish(finish) => match finish {
1299 FinishTask::Unary(expr) => {
1300 let child = transform_pop_result(&mut results)?;
1301 let rebuilt = match expr {
1302 Expression::Alias(mut alias) => {
1303 alias.this = child;
1304 Expression::Alias(alias)
1305 }
1306 Expression::Paren(mut paren) => {
1307 paren.this = child;
1308 Expression::Paren(paren)
1309 }
1310 Expression::Not(mut not) => {
1311 not.this = child;
1312 Expression::Not(not)
1313 }
1314 Expression::Neg(mut neg) => {
1315 neg.this = child;
1316 Expression::Neg(neg)
1317 }
1318 Expression::IsNull(mut expr) => {
1319 expr.this = child;
1320 Expression::IsNull(expr)
1321 }
1322 Expression::IsTrue(mut expr) => {
1323 expr.this = child;
1324 Expression::IsTrue(expr)
1325 }
1326 Expression::IsFalse(mut expr) => {
1327 expr.this = child;
1328 Expression::IsFalse(expr)
1329 }
1330 Expression::Subquery(mut subquery) => {
1331 subquery.this = child;
1332 Expression::Subquery(subquery)
1333 }
1334 Expression::Exists(mut exists) => {
1335 exists.this = child;
1336 Expression::Exists(exists)
1337 }
1338 Expression::TableArgument(mut arg) => {
1339 arg.this = child;
1340 Expression::TableArgument(arg)
1341 }
1342 _ => {
1343 return Err(crate::error::Error::Internal(
1344 "unexpected unary transform task".to_string(),
1345 ));
1346 }
1347 };
1348 results.push(transform_fn(rebuilt)?);
1349 }
1350 FinishTask::Binary(expr) => {
1351 let mut children = transform_pop_results(&mut results, 2)?.into_iter();
1352 let left = children.next().expect("left child");
1353 let right = children.next().expect("right child");
1354 let rebuilt = match expr {
1355 Expression::And(mut op) => {
1356 op.left = left;
1357 op.right = right;
1358 Expression::And(op)
1359 }
1360 Expression::Or(mut op) => {
1361 op.left = left;
1362 op.right = right;
1363 Expression::Or(op)
1364 }
1365 Expression::Add(mut op) => {
1366 op.left = left;
1367 op.right = right;
1368 Expression::Add(op)
1369 }
1370 Expression::Sub(mut op) => {
1371 op.left = left;
1372 op.right = right;
1373 Expression::Sub(op)
1374 }
1375 Expression::Mul(mut op) => {
1376 op.left = left;
1377 op.right = right;
1378 Expression::Mul(op)
1379 }
1380 Expression::Div(mut op) => {
1381 op.left = left;
1382 op.right = right;
1383 Expression::Div(op)
1384 }
1385 Expression::Eq(mut op) => {
1386 op.left = left;
1387 op.right = right;
1388 Expression::Eq(op)
1389 }
1390 Expression::Lt(mut op) => {
1391 op.left = left;
1392 op.right = right;
1393 Expression::Lt(op)
1394 }
1395 Expression::Gt(mut op) => {
1396 op.left = left;
1397 op.right = right;
1398 Expression::Gt(op)
1399 }
1400 Expression::Neq(mut op) => {
1401 op.left = left;
1402 op.right = right;
1403 Expression::Neq(op)
1404 }
1405 Expression::Lte(mut op) => {
1406 op.left = left;
1407 op.right = right;
1408 Expression::Lte(op)
1409 }
1410 Expression::Gte(mut op) => {
1411 op.left = left;
1412 op.right = right;
1413 Expression::Gte(op)
1414 }
1415 Expression::Mod(mut op) => {
1416 op.left = left;
1417 op.right = right;
1418 Expression::Mod(op)
1419 }
1420 Expression::Concat(mut op) => {
1421 op.left = left;
1422 op.right = right;
1423 Expression::Concat(op)
1424 }
1425 Expression::BitwiseAnd(mut op) => {
1426 op.left = left;
1427 op.right = right;
1428 Expression::BitwiseAnd(op)
1429 }
1430 Expression::BitwiseOr(mut op) => {
1431 op.left = left;
1432 op.right = right;
1433 Expression::BitwiseOr(op)
1434 }
1435 Expression::BitwiseXor(mut op) => {
1436 op.left = left;
1437 op.right = right;
1438 Expression::BitwiseXor(op)
1439 }
1440 Expression::Is(mut op) => {
1441 op.left = left;
1442 op.right = right;
1443 Expression::Is(op)
1444 }
1445 Expression::MemberOf(mut op) => {
1446 op.left = left;
1447 op.right = right;
1448 Expression::MemberOf(op)
1449 }
1450 Expression::ArrayContainsAll(mut op) => {
1451 op.left = left;
1452 op.right = right;
1453 Expression::ArrayContainsAll(op)
1454 }
1455 Expression::ArrayContainedBy(mut op) => {
1456 op.left = left;
1457 op.right = right;
1458 Expression::ArrayContainedBy(op)
1459 }
1460 Expression::ArrayOverlaps(mut op) => {
1461 op.left = left;
1462 op.right = right;
1463 Expression::ArrayOverlaps(op)
1464 }
1465 Expression::TsMatch(mut op) => {
1466 op.left = left;
1467 op.right = right;
1468 Expression::TsMatch(op)
1469 }
1470 Expression::Adjacent(mut op) => {
1471 op.left = left;
1472 op.right = right;
1473 Expression::Adjacent(op)
1474 }
1475 Expression::Like(mut like) => {
1476 like.left = left;
1477 like.right = right;
1478 Expression::Like(like)
1479 }
1480 Expression::ILike(mut like) => {
1481 like.left = left;
1482 like.right = right;
1483 Expression::ILike(like)
1484 }
1485 _ => {
1486 return Err(crate::error::Error::Internal(
1487 "unexpected binary transform task".to_string(),
1488 ));
1489 }
1490 };
1491 results.push(transform_fn(rebuilt)?);
1492 }
1493 FinishTask::CastLike(expr) => {
1494 let child = transform_pop_result(&mut results)?;
1495 let rebuilt = match expr {
1496 Expression::Cast(mut cast) => {
1497 cast.this = child;
1498 cast.to = transform_data_type_recursive(cast.to, transform_fn)?;
1499 Expression::Cast(cast)
1500 }
1501 Expression::TryCast(mut cast) => {
1502 cast.this = child;
1503 cast.to = transform_data_type_recursive(cast.to, transform_fn)?;
1504 Expression::TryCast(cast)
1505 }
1506 Expression::SafeCast(mut cast) => {
1507 cast.this = child;
1508 cast.to = transform_data_type_recursive(cast.to, transform_fn)?;
1509 Expression::SafeCast(cast)
1510 }
1511 _ => {
1512 return Err(crate::error::Error::Internal(
1513 "unexpected cast transform task".to_string(),
1514 ));
1515 }
1516 };
1517 results.push(transform_fn(rebuilt)?);
1518 }
1519 FinishTask::List(expr, count) => {
1520 let children = transform_pop_results(&mut results, count)?;
1521 let rebuilt = match expr {
1522 Expression::Function(mut function) => {
1523 function.args = children;
1524 Expression::Function(function)
1525 }
1526 Expression::Array(mut array) => {
1527 array.expressions = children;
1528 Expression::Array(array)
1529 }
1530 Expression::Tuple(mut tuple) => {
1531 tuple.expressions = children;
1532 Expression::Tuple(tuple)
1533 }
1534 Expression::ArrayFunc(mut array) => {
1535 array.expressions = children;
1536 Expression::ArrayFunc(array)
1537 }
1538 Expression::Coalesce(mut func) => {
1539 func.expressions = children;
1540 Expression::Coalesce(func)
1541 }
1542 Expression::Greatest(mut func) => {
1543 func.expressions = children;
1544 Expression::Greatest(func)
1545 }
1546 Expression::Least(mut func) => {
1547 func.expressions = children;
1548 Expression::Least(func)
1549 }
1550 Expression::ArrayConcat(mut func) => {
1551 func.expressions = children;
1552 Expression::ArrayConcat(func)
1553 }
1554 Expression::ArrayIntersect(mut func) => {
1555 func.expressions = children;
1556 Expression::ArrayIntersect(func)
1557 }
1558 Expression::ArrayZip(mut func) => {
1559 func.expressions = children;
1560 Expression::ArrayZip(func)
1561 }
1562 Expression::MapConcat(mut func) => {
1563 func.expressions = children;
1564 Expression::MapConcat(func)
1565 }
1566 Expression::JsonArray(mut func) => {
1567 func.expressions = children;
1568 Expression::JsonArray(func)
1569 }
1570 _ => {
1571 return Err(crate::error::Error::Internal(
1572 "unexpected list transform task".to_string(),
1573 ));
1574 }
1575 };
1576 results.push(transform_fn(rebuilt)?);
1577 }
1578 FinishTask::From(mut from, count) => {
1579 from.expressions = transform_pop_results(&mut results, count)?;
1580 results.push(transform_fn(Expression::From(Box::new(from)))?);
1581 }
1582 FinishTask::Select(frame) => {
1583 let mut select = *frame.select;
1584
1585 if frame.qualify_present {
1586 if let Some(ref mut qualify) = select.qualify {
1587 qualify.this = transform_pop_result(&mut results)?;
1588 }
1589 }
1590 if frame.having_present {
1591 if let Some(ref mut having) = select.having {
1592 having.this = transform_pop_result(&mut results)?;
1593 }
1594 }
1595 if frame.group_by_count > 0 {
1596 if let Some(ref mut group_by) = select.group_by {
1597 group_by.expressions =
1598 transform_pop_results(&mut results, frame.group_by_count)?;
1599 }
1600 }
1601 if frame.where_present {
1602 if let Some(ref mut where_clause) = select.where_clause {
1603 where_clause.this = transform_pop_result(&mut results)?;
1604 }
1605 }
1606 if frame.from_present {
1607 match transform_pop_result(&mut results)? {
1608 Expression::From(from) => {
1609 select.from = Some(*from);
1610 }
1611 _ => {
1612 return Err(crate::error::Error::Internal(
1613 "expected FROM expression result".to_string(),
1614 ));
1615 }
1616 }
1617 }
1618 select.expressions = transform_pop_results(&mut results, frame.expr_count)?;
1619
1620 select.joins = select
1621 .joins
1622 .into_iter()
1623 .map(|mut join| {
1624 join.this = transform_recursive(join.this, transform_fn)?;
1625 if let Some(on) = join.on.take() {
1626 join.on = Some(transform_recursive(on, transform_fn)?);
1627 }
1628 match transform_fn(Expression::Join(Box::new(join)))? {
1629 Expression::Join(j) => Ok(*j),
1630 _ => Err(crate::error::Error::parse(
1631 "Join transformation returned non-join expression",
1632 0,
1633 0,
1634 0,
1635 0,
1636 )),
1637 }
1638 })
1639 .collect::<Result<Vec<_>>>()?;
1640
1641 select.lateral_views = select
1642 .lateral_views
1643 .into_iter()
1644 .map(|mut lv| {
1645 lv.this = transform_recursive(lv.this, transform_fn)?;
1646 Ok(lv)
1647 })
1648 .collect::<Result<Vec<_>>>()?;
1649
1650 if let Some(mut with) = select.with.take() {
1651 with.ctes = with
1652 .ctes
1653 .into_iter()
1654 .map(|mut cte| {
1655 let original = cte.this.clone();
1656 cte.this =
1657 transform_recursive(cte.this, transform_fn).unwrap_or(original);
1658 cte
1659 })
1660 .collect();
1661 select.with = Some(with);
1662 }
1663
1664 if let Some(mut order) = select.order_by.take() {
1665 order.expressions = order
1666 .expressions
1667 .into_iter()
1668 .map(|o| {
1669 let mut o = o;
1670 let original = o.this.clone();
1671 o.this =
1672 transform_recursive(o.this, transform_fn).unwrap_or(original);
1673 match transform_fn(Expression::Ordered(Box::new(o.clone()))) {
1674 Ok(Expression::Ordered(transformed)) => *transformed,
1675 Ok(_) | Err(_) => o,
1676 }
1677 })
1678 .collect();
1679 select.order_by = Some(order);
1680 }
1681
1682 if let Some(ref mut windows) = select.windows {
1683 for nw in windows.iter_mut() {
1684 nw.spec.order_by = std::mem::take(&mut nw.spec.order_by)
1685 .into_iter()
1686 .map(|o| {
1687 let mut o = o;
1688 let original = o.this.clone();
1689 o.this = transform_recursive(o.this, transform_fn)
1690 .unwrap_or(original);
1691 match transform_fn(Expression::Ordered(Box::new(o.clone()))) {
1692 Ok(Expression::Ordered(transformed)) => *transformed,
1693 Ok(_) | Err(_) => o,
1694 }
1695 })
1696 .collect();
1697 }
1698 }
1699
1700 results.push(transform_fn(Expression::Select(Box::new(select)))?);
1701 }
1702 FinishTask::SetOp(expr) => {
1703 let mut children = transform_pop_results(&mut results, 2)?.into_iter();
1704 let left = children.next().expect("left child");
1705 let right = children.next().expect("right child");
1706
1707 let rebuilt = match expr {
1708 Expression::Union(mut union) => {
1709 union.left = left;
1710 union.right = right;
1711 if let Some(mut order) = union.order_by.take() {
1712 order.expressions = order
1713 .expressions
1714 .into_iter()
1715 .map(|o| {
1716 let mut o = o;
1717 let original = o.this.clone();
1718 o.this = transform_recursive(o.this, transform_fn)
1719 .unwrap_or(original);
1720 match transform_fn(Expression::Ordered(Box::new(o.clone())))
1721 {
1722 Ok(Expression::Ordered(transformed)) => *transformed,
1723 Ok(_) | Err(_) => o,
1724 }
1725 })
1726 .collect();
1727 union.order_by = Some(order);
1728 }
1729 if let Some(mut with) = union.with.take() {
1730 with.ctes = with
1731 .ctes
1732 .into_iter()
1733 .map(|mut cte| {
1734 let original = cte.this.clone();
1735 cte.this = transform_recursive(cte.this, transform_fn)
1736 .unwrap_or(original);
1737 cte
1738 })
1739 .collect();
1740 union.with = Some(with);
1741 }
1742 Expression::Union(union)
1743 }
1744 Expression::Intersect(mut intersect) => {
1745 intersect.left = left;
1746 intersect.right = right;
1747 if let Some(mut order) = intersect.order_by.take() {
1748 order.expressions = order
1749 .expressions
1750 .into_iter()
1751 .map(|o| {
1752 let mut o = o;
1753 let original = o.this.clone();
1754 o.this = transform_recursive(o.this, transform_fn)
1755 .unwrap_or(original);
1756 match transform_fn(Expression::Ordered(Box::new(o.clone())))
1757 {
1758 Ok(Expression::Ordered(transformed)) => *transformed,
1759 Ok(_) | Err(_) => o,
1760 }
1761 })
1762 .collect();
1763 intersect.order_by = Some(order);
1764 }
1765 if let Some(mut with) = intersect.with.take() {
1766 with.ctes = with
1767 .ctes
1768 .into_iter()
1769 .map(|mut cte| {
1770 let original = cte.this.clone();
1771 cte.this = transform_recursive(cte.this, transform_fn)
1772 .unwrap_or(original);
1773 cte
1774 })
1775 .collect();
1776 intersect.with = Some(with);
1777 }
1778 Expression::Intersect(intersect)
1779 }
1780 Expression::Except(mut except) => {
1781 except.left = left;
1782 except.right = right;
1783 if let Some(mut order) = except.order_by.take() {
1784 order.expressions = order
1785 .expressions
1786 .into_iter()
1787 .map(|o| {
1788 let mut o = o;
1789 let original = o.this.clone();
1790 o.this = transform_recursive(o.this, transform_fn)
1791 .unwrap_or(original);
1792 match transform_fn(Expression::Ordered(Box::new(o.clone())))
1793 {
1794 Ok(Expression::Ordered(transformed)) => *transformed,
1795 Ok(_) | Err(_) => o,
1796 }
1797 })
1798 .collect();
1799 except.order_by = Some(order);
1800 }
1801 if let Some(mut with) = except.with.take() {
1802 with.ctes = with
1803 .ctes
1804 .into_iter()
1805 .map(|mut cte| {
1806 let original = cte.this.clone();
1807 cte.this = transform_recursive(cte.this, transform_fn)
1808 .unwrap_or(original);
1809 cte
1810 })
1811 .collect();
1812 except.with = Some(with);
1813 }
1814 Expression::Except(except)
1815 }
1816 _ => {
1817 return Err(crate::error::Error::Internal(
1818 "unexpected set-op transform task".to_string(),
1819 ));
1820 }
1821 };
1822 results.push(transform_fn(rebuilt)?);
1823 }
1824 },
1825 }
1826 }
1827
1828 match results.len() {
1829 1 => Ok(results.pop().expect("single transform result")),
1830 _ => Err(crate::error::Error::Internal(
1831 "unexpected transform result stack size".to_string(),
1832 )),
1833 }
1834}
1835
1836#[cfg(any(
1837 feature = "transpile",
1838 feature = "ast-tools",
1839 feature = "generate",
1840 feature = "semantic"
1841))]
1842fn transform_table_ref_recursive<F>(table: TableRef, transform_fn: &F) -> Result<TableRef>
1843where
1844 F: Fn(Expression) -> Result<Expression>,
1845{
1846 match transform_recursive(Expression::Table(Box::new(table)), transform_fn)? {
1847 Expression::Table(table) => Ok(*table),
1848 _ => Err(crate::error::Error::parse(
1849 "TableRef transformation returned non-table expression",
1850 0,
1851 0,
1852 0,
1853 0,
1854 )),
1855 }
1856}
1857
1858#[cfg(any(
1859 feature = "transpile",
1860 feature = "ast-tools",
1861 feature = "generate",
1862 feature = "semantic"
1863))]
1864fn transform_from_recursive<F>(from: From, transform_fn: &F) -> Result<From>
1865where
1866 F: Fn(Expression) -> Result<Expression>,
1867{
1868 match transform_recursive(Expression::From(Box::new(from)), transform_fn)? {
1869 Expression::From(from) => Ok(*from),
1870 _ => Err(crate::error::Error::parse(
1871 "FROM transformation returned non-FROM expression",
1872 0,
1873 0,
1874 0,
1875 0,
1876 )),
1877 }
1878}
1879
1880#[cfg(any(
1881 feature = "transpile",
1882 feature = "ast-tools",
1883 feature = "generate",
1884 feature = "semantic"
1885))]
1886fn transform_join_recursive<F>(mut join: Join, transform_fn: &F) -> Result<Join>
1887where
1888 F: Fn(Expression) -> Result<Expression>,
1889{
1890 join.this = transform_recursive(join.this, transform_fn)?;
1891 if let Some(on) = join.on.take() {
1892 join.on = Some(transform_recursive(on, transform_fn)?);
1893 }
1894 if let Some(match_condition) = join.match_condition.take() {
1895 join.match_condition = Some(transform_recursive(match_condition, transform_fn)?);
1896 }
1897 join.pivots = join
1898 .pivots
1899 .into_iter()
1900 .map(|pivot| transform_recursive(pivot, transform_fn))
1901 .collect::<Result<Vec<_>>>()?;
1902
1903 match transform_fn(Expression::Join(Box::new(join)))? {
1904 Expression::Join(join) => Ok(*join),
1905 _ => Err(crate::error::Error::parse(
1906 "Join transformation returned non-join expression",
1907 0,
1908 0,
1909 0,
1910 0,
1911 )),
1912 }
1913}
1914
1915#[cfg(any(
1916 feature = "transpile",
1917 feature = "ast-tools",
1918 feature = "generate",
1919 feature = "semantic"
1920))]
1921fn transform_output_clause_recursive<F>(
1922 mut output: OutputClause,
1923 transform_fn: &F,
1924) -> Result<OutputClause>
1925where
1926 F: Fn(Expression) -> Result<Expression>,
1927{
1928 output.columns = output
1929 .columns
1930 .into_iter()
1931 .map(|column| transform_recursive(column, transform_fn))
1932 .collect::<Result<Vec<_>>>()?;
1933 if let Some(into_table) = output.into_table.take() {
1934 output.into_table = Some(transform_recursive(into_table, transform_fn)?);
1935 }
1936 Ok(output)
1937}
1938
1939#[cfg(any(
1940 feature = "transpile",
1941 feature = "ast-tools",
1942 feature = "generate",
1943 feature = "semantic"
1944))]
1945fn transform_with_recursive<F>(mut with: With, transform_fn: &F) -> Result<With>
1946where
1947 F: Fn(Expression) -> Result<Expression>,
1948{
1949 with.ctes = with
1950 .ctes
1951 .into_iter()
1952 .map(|mut cte| {
1953 cte.this = transform_recursive(cte.this, transform_fn)?;
1954 Ok(cte)
1955 })
1956 .collect::<Result<Vec<_>>>()?;
1957 if let Some(search) = with.search.take() {
1958 with.search = Some(Box::new(transform_recursive(*search, transform_fn)?));
1959 }
1960 Ok(with)
1961}
1962
1963#[cfg(any(
1964 feature = "transpile",
1965 feature = "ast-tools",
1966 feature = "generate",
1967 feature = "semantic"
1968))]
1969fn transform_order_by_recursive<F>(mut order: OrderBy, transform_fn: &F) -> Result<OrderBy>
1970where
1971 F: Fn(Expression) -> Result<Expression>,
1972{
1973 order.expressions = order
1974 .expressions
1975 .into_iter()
1976 .map(|mut ordered| {
1977 let original = ordered.this.clone();
1978 ordered.this = transform_recursive(ordered.this, transform_fn).unwrap_or(original);
1979 match transform_fn(Expression::Ordered(Box::new(ordered.clone()))) {
1980 Ok(Expression::Ordered(transformed)) => Ok(*transformed),
1981 Ok(_) | Err(_) => Ok(ordered),
1982 }
1983 })
1984 .collect::<Result<Vec<_>>>()?;
1985 Ok(order)
1986}
1987
1988#[cfg(any(
1989 feature = "transpile",
1990 feature = "ast-tools",
1991 feature = "generate",
1992 feature = "semantic"
1993))]
1994fn transform_recursive_reference<F>(expr: Expression, transform_fn: &F) -> Result<Expression>
1995where
1996 F: Fn(Expression) -> Result<Expression>,
1997{
1998 use crate::expressions::BinaryOp;
1999
2000 // Helper macro to recurse into AggFunc-based expressions (this, filter, order_by, having_max, limit).
2001 macro_rules! recurse_agg {
2002 ($variant:ident, $f:expr) => {{
2003 let mut f = $f;
2004 f.this = transform_recursive(f.this, transform_fn)?;
2005 if let Some(filter) = f.filter.take() {
2006 f.filter = Some(transform_recursive(filter, transform_fn)?);
2007 }
2008 for ord in &mut f.order_by {
2009 ord.this = transform_recursive(
2010 std::mem::replace(&mut ord.this, Expression::Null(crate::expressions::Null)),
2011 transform_fn,
2012 )?;
2013 }
2014 if let Some((ref mut expr, _)) = f.having_max {
2015 *expr = Box::new(transform_recursive(
2016 std::mem::replace(expr.as_mut(), Expression::Null(crate::expressions::Null)),
2017 transform_fn,
2018 )?);
2019 }
2020 if let Some(limit) = f.limit.take() {
2021 f.limit = Some(Box::new(transform_recursive(*limit, transform_fn)?));
2022 }
2023 Expression::$variant(f)
2024 }};
2025 }
2026
2027 // Helper macro to transform binary ops with Box<BinaryOp>
2028 macro_rules! transform_binary {
2029 ($variant:ident, $op:expr) => {{
2030 let left = transform_recursive($op.left, transform_fn)?;
2031 let right = transform_recursive($op.right, transform_fn)?;
2032 Expression::$variant(Box::new(BinaryOp {
2033 left,
2034 right,
2035 left_comments: $op.left_comments,
2036 operator_comments: $op.operator_comments,
2037 trailing_comments: $op.trailing_comments,
2038 inferred_type: $op.inferred_type,
2039 }))
2040 }};
2041 }
2042
2043 // Fast path: leaf nodes never need child traversal, apply transform directly
2044 if matches!(
2045 &expr,
2046 Expression::Literal(_)
2047 | Expression::Boolean(_)
2048 | Expression::Null(_)
2049 | Expression::Identifier(_)
2050 | Expression::Star(_)
2051 | Expression::Parameter(_)
2052 | Expression::Placeholder(_)
2053 | Expression::SessionParameter(_)
2054 ) {
2055 return transform_fn(expr);
2056 }
2057
2058 // First recursively transform children, then apply the transform function
2059 let expr = match expr {
2060 Expression::Select(mut select) => {
2061 select.expressions = select
2062 .expressions
2063 .into_iter()
2064 .map(|e| transform_recursive(e, transform_fn))
2065 .collect::<Result<Vec<_>>>()?;
2066
2067 // Transform FROM clause
2068 if let Some(mut from) = select.from.take() {
2069 from.expressions = from
2070 .expressions
2071 .into_iter()
2072 .map(|e| transform_recursive(e, transform_fn))
2073 .collect::<Result<Vec<_>>>()?;
2074 select.from = Some(from);
2075 }
2076
2077 // Transform JOINs - important for CROSS APPLY / LATERAL transformations
2078 select.joins = select
2079 .joins
2080 .into_iter()
2081 .map(|mut join| {
2082 join.this = transform_recursive(join.this, transform_fn)?;
2083 if let Some(on) = join.on.take() {
2084 join.on = Some(transform_recursive(on, transform_fn)?);
2085 }
2086 // Wrap join in Expression::Join to allow transform_fn to transform it
2087 match transform_fn(Expression::Join(Box::new(join)))? {
2088 Expression::Join(j) => Ok(*j),
2089 _ => Err(crate::error::Error::parse(
2090 "Join transformation returned non-join expression",
2091 0,
2092 0,
2093 0,
2094 0,
2095 )),
2096 }
2097 })
2098 .collect::<Result<Vec<_>>>()?;
2099
2100 // Transform LATERAL VIEW expressions (Hive/Spark)
2101 select.lateral_views = select
2102 .lateral_views
2103 .into_iter()
2104 .map(|mut lv| {
2105 lv.this = transform_recursive(lv.this, transform_fn)?;
2106 Ok(lv)
2107 })
2108 .collect::<Result<Vec<_>>>()?;
2109
2110 // Transform WHERE clause
2111 if let Some(mut where_clause) = select.where_clause.take() {
2112 where_clause.this = transform_recursive(where_clause.this, transform_fn)?;
2113 select.where_clause = Some(where_clause);
2114 }
2115
2116 // Transform GROUP BY
2117 if let Some(mut group_by) = select.group_by.take() {
2118 group_by.expressions = group_by
2119 .expressions
2120 .into_iter()
2121 .map(|e| transform_recursive(e, transform_fn))
2122 .collect::<Result<Vec<_>>>()?;
2123 select.group_by = Some(group_by);
2124 }
2125
2126 // Transform HAVING
2127 if let Some(mut having) = select.having.take() {
2128 having.this = transform_recursive(having.this, transform_fn)?;
2129 select.having = Some(having);
2130 }
2131
2132 // Transform WITH (CTEs)
2133 if let Some(mut with) = select.with.take() {
2134 with.ctes = with
2135 .ctes
2136 .into_iter()
2137 .map(|mut cte| {
2138 let original = cte.this.clone();
2139 cte.this = transform_recursive(cte.this, transform_fn).unwrap_or(original);
2140 cte
2141 })
2142 .collect();
2143 select.with = Some(with);
2144 }
2145
2146 // Transform ORDER BY
2147 if let Some(mut order) = select.order_by.take() {
2148 order.expressions = order
2149 .expressions
2150 .into_iter()
2151 .map(|o| {
2152 let mut o = o;
2153 let original = o.this.clone();
2154 o.this = transform_recursive(o.this, transform_fn).unwrap_or(original);
2155 // Also apply transform to the Ordered wrapper itself (for NULLS FIRST etc.)
2156 match transform_fn(Expression::Ordered(Box::new(o.clone()))) {
2157 Ok(Expression::Ordered(transformed)) => *transformed,
2158 Ok(_) | Err(_) => o,
2159 }
2160 })
2161 .collect();
2162 select.order_by = Some(order);
2163 }
2164
2165 // Transform WINDOW clause order_by
2166 if let Some(ref mut windows) = select.windows {
2167 for nw in windows.iter_mut() {
2168 nw.spec.order_by = std::mem::take(&mut nw.spec.order_by)
2169 .into_iter()
2170 .map(|o| {
2171 let mut o = o;
2172 let original = o.this.clone();
2173 o.this = transform_recursive(o.this, transform_fn).unwrap_or(original);
2174 match transform_fn(Expression::Ordered(Box::new(o.clone()))) {
2175 Ok(Expression::Ordered(transformed)) => *transformed,
2176 Ok(_) | Err(_) => o,
2177 }
2178 })
2179 .collect();
2180 }
2181 }
2182
2183 // Transform QUALIFY
2184 if let Some(mut qual) = select.qualify.take() {
2185 qual.this = transform_recursive(qual.this, transform_fn)?;
2186 select.qualify = Some(qual);
2187 }
2188
2189 Expression::Select(select)
2190 }
2191 Expression::Function(mut f) => {
2192 f.args = f
2193 .args
2194 .into_iter()
2195 .map(|e| transform_recursive(e, transform_fn))
2196 .collect::<Result<Vec<_>>>()?;
2197 Expression::Function(f)
2198 }
2199 Expression::AggregateFunction(mut f) => {
2200 f.args = f
2201 .args
2202 .into_iter()
2203 .map(|e| transform_recursive(e, transform_fn))
2204 .collect::<Result<Vec<_>>>()?;
2205 if let Some(filter) = f.filter {
2206 f.filter = Some(transform_recursive(filter, transform_fn)?);
2207 }
2208 Expression::AggregateFunction(f)
2209 }
2210 Expression::WindowFunction(mut wf) => {
2211 wf.this = transform_recursive(wf.this, transform_fn)?;
2212 wf.over.partition_by = wf
2213 .over
2214 .partition_by
2215 .into_iter()
2216 .map(|e| transform_recursive(e, transform_fn))
2217 .collect::<Result<Vec<_>>>()?;
2218 // Transform order_by items through Expression::Ordered wrapper
2219 wf.over.order_by = wf
2220 .over
2221 .order_by
2222 .into_iter()
2223 .map(|o| {
2224 let mut o = o;
2225 o.this = transform_recursive(o.this, transform_fn)?;
2226 match transform_fn(Expression::Ordered(Box::new(o)))? {
2227 Expression::Ordered(transformed) => Ok(*transformed),
2228 _ => Err(crate::error::Error::parse(
2229 "Ordered transformation returned non-Ordered expression",
2230 0,
2231 0,
2232 0,
2233 0,
2234 )),
2235 }
2236 })
2237 .collect::<Result<Vec<_>>>()?;
2238 Expression::WindowFunction(wf)
2239 }
2240 Expression::Alias(mut a) => {
2241 a.this = transform_recursive(a.this, transform_fn)?;
2242 Expression::Alias(a)
2243 }
2244 Expression::Cast(mut c) => {
2245 c.this = transform_recursive(c.this, transform_fn)?;
2246 // Also transform the target data type (recursively for nested types like ARRAY<INT>, STRUCT<a INT>)
2247 c.to = transform_data_type_recursive(c.to, transform_fn)?;
2248 Expression::Cast(c)
2249 }
2250 Expression::And(op) => transform_binary!(And, *op),
2251 Expression::Or(op) => transform_binary!(Or, *op),
2252 Expression::Add(op) => transform_binary!(Add, *op),
2253 Expression::Sub(op) => transform_binary!(Sub, *op),
2254 Expression::Mul(op) => transform_binary!(Mul, *op),
2255 Expression::Div(op) => transform_binary!(Div, *op),
2256 Expression::Eq(op) => transform_binary!(Eq, *op),
2257 Expression::Lt(op) => transform_binary!(Lt, *op),
2258 Expression::Gt(op) => transform_binary!(Gt, *op),
2259 Expression::Paren(mut p) => {
2260 p.this = transform_recursive(p.this, transform_fn)?;
2261 Expression::Paren(p)
2262 }
2263 Expression::Coalesce(mut f) => {
2264 f.expressions = f
2265 .expressions
2266 .into_iter()
2267 .map(|e| transform_recursive(e, transform_fn))
2268 .collect::<Result<Vec<_>>>()?;
2269 Expression::Coalesce(f)
2270 }
2271 Expression::IfNull(mut f) => {
2272 f.this = transform_recursive(f.this, transform_fn)?;
2273 f.expression = transform_recursive(f.expression, transform_fn)?;
2274 Expression::IfNull(f)
2275 }
2276 Expression::Nvl(mut f) => {
2277 f.this = transform_recursive(f.this, transform_fn)?;
2278 f.expression = transform_recursive(f.expression, transform_fn)?;
2279 Expression::Nvl(f)
2280 }
2281 Expression::In(mut i) => {
2282 i.this = transform_recursive(i.this, transform_fn)?;
2283 i.expressions = i
2284 .expressions
2285 .into_iter()
2286 .map(|e| transform_recursive(e, transform_fn))
2287 .collect::<Result<Vec<_>>>()?;
2288 if let Some(query) = i.query {
2289 i.query = Some(transform_recursive(query, transform_fn)?);
2290 }
2291 Expression::In(i)
2292 }
2293 Expression::Not(mut n) => {
2294 n.this = transform_recursive(n.this, transform_fn)?;
2295 Expression::Not(n)
2296 }
2297 Expression::ArraySlice(mut s) => {
2298 s.this = transform_recursive(s.this, transform_fn)?;
2299 if let Some(start) = s.start {
2300 s.start = Some(transform_recursive(start, transform_fn)?);
2301 }
2302 if let Some(end) = s.end {
2303 s.end = Some(transform_recursive(end, transform_fn)?);
2304 }
2305 Expression::ArraySlice(s)
2306 }
2307 Expression::Subscript(mut s) => {
2308 s.this = transform_recursive(s.this, transform_fn)?;
2309 s.index = transform_recursive(s.index, transform_fn)?;
2310 Expression::Subscript(s)
2311 }
2312 Expression::Array(mut a) => {
2313 a.expressions = a
2314 .expressions
2315 .into_iter()
2316 .map(|e| transform_recursive(e, transform_fn))
2317 .collect::<Result<Vec<_>>>()?;
2318 Expression::Array(a)
2319 }
2320 Expression::Struct(mut s) => {
2321 let mut new_fields = Vec::new();
2322 for (name, expr) in s.fields {
2323 let transformed = transform_recursive(expr, transform_fn)?;
2324 new_fields.push((name, transformed));
2325 }
2326 s.fields = new_fields;
2327 Expression::Struct(s)
2328 }
2329 Expression::NamedArgument(mut na) => {
2330 na.value = transform_recursive(na.value, transform_fn)?;
2331 Expression::NamedArgument(na)
2332 }
2333 Expression::MapFunc(mut m) => {
2334 m.keys = m
2335 .keys
2336 .into_iter()
2337 .map(|e| transform_recursive(e, transform_fn))
2338 .collect::<Result<Vec<_>>>()?;
2339 m.values = m
2340 .values
2341 .into_iter()
2342 .map(|e| transform_recursive(e, transform_fn))
2343 .collect::<Result<Vec<_>>>()?;
2344 Expression::MapFunc(m)
2345 }
2346 Expression::ArrayFunc(mut a) => {
2347 a.expressions = a
2348 .expressions
2349 .into_iter()
2350 .map(|e| transform_recursive(e, transform_fn))
2351 .collect::<Result<Vec<_>>>()?;
2352 Expression::ArrayFunc(a)
2353 }
2354 Expression::Lambda(mut l) => {
2355 l.body = transform_recursive(l.body, transform_fn)?;
2356 Expression::Lambda(l)
2357 }
2358 Expression::JsonExtract(mut f) => {
2359 f.this = transform_recursive(f.this, transform_fn)?;
2360 f.path = transform_recursive(f.path, transform_fn)?;
2361 Expression::JsonExtract(f)
2362 }
2363 Expression::JsonExtractScalar(mut f) => {
2364 f.this = transform_recursive(f.this, transform_fn)?;
2365 f.path = transform_recursive(f.path, transform_fn)?;
2366 Expression::JsonExtractScalar(f)
2367 }
2368
2369 // ===== UnaryFunc-based expressions =====
2370 // These all have a single `this: Expression` child
2371 Expression::Length(mut f) => {
2372 f.this = transform_recursive(f.this, transform_fn)?;
2373 Expression::Length(f)
2374 }
2375 Expression::Upper(mut f) => {
2376 f.this = transform_recursive(f.this, transform_fn)?;
2377 Expression::Upper(f)
2378 }
2379 Expression::Lower(mut f) => {
2380 f.this = transform_recursive(f.this, transform_fn)?;
2381 Expression::Lower(f)
2382 }
2383 Expression::LTrim(mut f) => {
2384 f.this = transform_recursive(f.this, transform_fn)?;
2385 Expression::LTrim(f)
2386 }
2387 Expression::RTrim(mut f) => {
2388 f.this = transform_recursive(f.this, transform_fn)?;
2389 Expression::RTrim(f)
2390 }
2391 Expression::Reverse(mut f) => {
2392 f.this = transform_recursive(f.this, transform_fn)?;
2393 Expression::Reverse(f)
2394 }
2395 Expression::Abs(mut f) => {
2396 f.this = transform_recursive(f.this, transform_fn)?;
2397 Expression::Abs(f)
2398 }
2399 Expression::Ceil(mut f) => {
2400 f.this = transform_recursive(f.this, transform_fn)?;
2401 Expression::Ceil(f)
2402 }
2403 Expression::Floor(mut f) => {
2404 f.this = transform_recursive(f.this, transform_fn)?;
2405 Expression::Floor(f)
2406 }
2407 Expression::Sign(mut f) => {
2408 f.this = transform_recursive(f.this, transform_fn)?;
2409 Expression::Sign(f)
2410 }
2411 Expression::Sqrt(mut f) => {
2412 f.this = transform_recursive(f.this, transform_fn)?;
2413 Expression::Sqrt(f)
2414 }
2415 Expression::Cbrt(mut f) => {
2416 f.this = transform_recursive(f.this, transform_fn)?;
2417 Expression::Cbrt(f)
2418 }
2419 Expression::Ln(mut f) => {
2420 f.this = transform_recursive(f.this, transform_fn)?;
2421 Expression::Ln(f)
2422 }
2423 Expression::Log(mut f) => {
2424 f.this = transform_recursive(f.this, transform_fn)?;
2425 if let Some(base) = f.base {
2426 f.base = Some(transform_recursive(base, transform_fn)?);
2427 }
2428 Expression::Log(f)
2429 }
2430 Expression::Exp(mut f) => {
2431 f.this = transform_recursive(f.this, transform_fn)?;
2432 Expression::Exp(f)
2433 }
2434 Expression::Date(mut f) => {
2435 f.this = transform_recursive(f.this, transform_fn)?;
2436 Expression::Date(f)
2437 }
2438 Expression::Stddev(f) => recurse_agg!(Stddev, f),
2439 Expression::StddevSamp(f) => recurse_agg!(StddevSamp, f),
2440 Expression::Variance(f) => recurse_agg!(Variance, f),
2441
2442 // ===== BinaryFunc-based expressions =====
2443 Expression::ModFunc(mut f) => {
2444 f.this = transform_recursive(f.this, transform_fn)?;
2445 f.expression = transform_recursive(f.expression, transform_fn)?;
2446 Expression::ModFunc(f)
2447 }
2448 Expression::Power(mut f) => {
2449 f.this = transform_recursive(f.this, transform_fn)?;
2450 f.expression = transform_recursive(f.expression, transform_fn)?;
2451 Expression::Power(f)
2452 }
2453 Expression::MapFromArrays(mut f) => {
2454 f.this = transform_recursive(f.this, transform_fn)?;
2455 f.expression = transform_recursive(f.expression, transform_fn)?;
2456 Expression::MapFromArrays(f)
2457 }
2458 Expression::ElementAt(mut f) => {
2459 f.this = transform_recursive(f.this, transform_fn)?;
2460 f.expression = transform_recursive(f.expression, transform_fn)?;
2461 Expression::ElementAt(f)
2462 }
2463 Expression::MapContainsKey(mut f) => {
2464 f.this = transform_recursive(f.this, transform_fn)?;
2465 f.expression = transform_recursive(f.expression, transform_fn)?;
2466 Expression::MapContainsKey(f)
2467 }
2468 Expression::Left(mut f) => {
2469 f.this = transform_recursive(f.this, transform_fn)?;
2470 f.length = transform_recursive(f.length, transform_fn)?;
2471 Expression::Left(f)
2472 }
2473 Expression::Right(mut f) => {
2474 f.this = transform_recursive(f.this, transform_fn)?;
2475 f.length = transform_recursive(f.length, transform_fn)?;
2476 Expression::Right(f)
2477 }
2478 Expression::Repeat(mut f) => {
2479 f.this = transform_recursive(f.this, transform_fn)?;
2480 f.times = transform_recursive(f.times, transform_fn)?;
2481 Expression::Repeat(f)
2482 }
2483
2484 // ===== Complex function expressions =====
2485 Expression::Substring(mut f) => {
2486 f.this = transform_recursive(f.this, transform_fn)?;
2487 f.start = transform_recursive(f.start, transform_fn)?;
2488 if let Some(len) = f.length {
2489 f.length = Some(transform_recursive(len, transform_fn)?);
2490 }
2491 Expression::Substring(f)
2492 }
2493 Expression::Replace(mut f) => {
2494 f.this = transform_recursive(f.this, transform_fn)?;
2495 f.old = transform_recursive(f.old, transform_fn)?;
2496 f.new = transform_recursive(f.new, transform_fn)?;
2497 Expression::Replace(f)
2498 }
2499 Expression::ConcatWs(mut f) => {
2500 f.separator = transform_recursive(f.separator, transform_fn)?;
2501 f.expressions = f
2502 .expressions
2503 .into_iter()
2504 .map(|e| transform_recursive(e, transform_fn))
2505 .collect::<Result<Vec<_>>>()?;
2506 Expression::ConcatWs(f)
2507 }
2508 Expression::Trim(mut f) => {
2509 f.this = transform_recursive(f.this, transform_fn)?;
2510 if let Some(chars) = f.characters {
2511 f.characters = Some(transform_recursive(chars, transform_fn)?);
2512 }
2513 Expression::Trim(f)
2514 }
2515 Expression::Split(mut f) => {
2516 f.this = transform_recursive(f.this, transform_fn)?;
2517 f.delimiter = transform_recursive(f.delimiter, transform_fn)?;
2518 Expression::Split(f)
2519 }
2520 Expression::Lpad(mut f) => {
2521 f.this = transform_recursive(f.this, transform_fn)?;
2522 f.length = transform_recursive(f.length, transform_fn)?;
2523 if let Some(fill) = f.fill {
2524 f.fill = Some(transform_recursive(fill, transform_fn)?);
2525 }
2526 Expression::Lpad(f)
2527 }
2528 Expression::Rpad(mut f) => {
2529 f.this = transform_recursive(f.this, transform_fn)?;
2530 f.length = transform_recursive(f.length, transform_fn)?;
2531 if let Some(fill) = f.fill {
2532 f.fill = Some(transform_recursive(fill, transform_fn)?);
2533 }
2534 Expression::Rpad(f)
2535 }
2536
2537 // ===== Conditional expressions =====
2538 Expression::Case(mut c) => {
2539 if let Some(operand) = c.operand {
2540 c.operand = Some(transform_recursive(operand, transform_fn)?);
2541 }
2542 c.whens = c
2543 .whens
2544 .into_iter()
2545 .map(|(cond, then)| {
2546 let new_cond = transform_recursive(cond.clone(), transform_fn).unwrap_or(cond);
2547 let new_then = transform_recursive(then.clone(), transform_fn).unwrap_or(then);
2548 (new_cond, new_then)
2549 })
2550 .collect();
2551 if let Some(else_expr) = c.else_ {
2552 c.else_ = Some(transform_recursive(else_expr, transform_fn)?);
2553 }
2554 Expression::Case(c)
2555 }
2556 Expression::IfFunc(mut f) => {
2557 f.condition = transform_recursive(f.condition, transform_fn)?;
2558 f.true_value = transform_recursive(f.true_value, transform_fn)?;
2559 if let Some(false_val) = f.false_value {
2560 f.false_value = Some(transform_recursive(false_val, transform_fn)?);
2561 }
2562 Expression::IfFunc(f)
2563 }
2564
2565 // ===== Date/Time expressions =====
2566 Expression::DateAdd(mut f) => {
2567 f.this = transform_recursive(f.this, transform_fn)?;
2568 f.interval = transform_recursive(f.interval, transform_fn)?;
2569 Expression::DateAdd(f)
2570 }
2571 Expression::DateSub(mut f) => {
2572 f.this = transform_recursive(f.this, transform_fn)?;
2573 f.interval = transform_recursive(f.interval, transform_fn)?;
2574 Expression::DateSub(f)
2575 }
2576 Expression::DateDiff(mut f) => {
2577 f.this = transform_recursive(f.this, transform_fn)?;
2578 f.expression = transform_recursive(f.expression, transform_fn)?;
2579 Expression::DateDiff(f)
2580 }
2581 Expression::DateTrunc(mut f) => {
2582 f.this = transform_recursive(f.this, transform_fn)?;
2583 Expression::DateTrunc(f)
2584 }
2585 Expression::Extract(mut f) => {
2586 f.this = transform_recursive(f.this, transform_fn)?;
2587 Expression::Extract(f)
2588 }
2589
2590 // ===== JSON expressions =====
2591 Expression::JsonObject(mut f) => {
2592 f.pairs = f
2593 .pairs
2594 .into_iter()
2595 .map(|(k, v)| {
2596 let new_k = transform_recursive(k, transform_fn)?;
2597 let new_v = transform_recursive(v, transform_fn)?;
2598 Ok((new_k, new_v))
2599 })
2600 .collect::<Result<Vec<_>>>()?;
2601 Expression::JsonObject(f)
2602 }
2603
2604 // ===== Subquery expressions =====
2605 Expression::Subquery(mut s) => {
2606 s.this = transform_recursive(s.this, transform_fn)?;
2607 Expression::Subquery(s)
2608 }
2609 Expression::Exists(mut e) => {
2610 e.this = transform_recursive(e.this, transform_fn)?;
2611 Expression::Exists(e)
2612 }
2613 Expression::Describe(mut d) => {
2614 d.target = transform_recursive(d.target, transform_fn)?;
2615 Expression::Describe(d)
2616 }
2617
2618 // ===== Set operations =====
2619 Expression::Union(mut u) => {
2620 let left = std::mem::replace(&mut u.left, Expression::Null(Null));
2621 u.left = transform_recursive(left, transform_fn)?;
2622 let right = std::mem::replace(&mut u.right, Expression::Null(Null));
2623 u.right = transform_recursive(right, transform_fn)?;
2624 if let Some(mut order) = u.order_by.take() {
2625 order.expressions = order
2626 .expressions
2627 .into_iter()
2628 .map(|o| {
2629 let mut o = o;
2630 let original = o.this.clone();
2631 o.this = transform_recursive(o.this, transform_fn).unwrap_or(original);
2632 match transform_fn(Expression::Ordered(Box::new(o.clone()))) {
2633 Ok(Expression::Ordered(transformed)) => *transformed,
2634 Ok(_) | Err(_) => o,
2635 }
2636 })
2637 .collect();
2638 u.order_by = Some(order);
2639 }
2640 if let Some(mut with) = u.with.take() {
2641 with.ctes = with
2642 .ctes
2643 .into_iter()
2644 .map(|mut cte| {
2645 let original = cte.this.clone();
2646 cte.this = transform_recursive(cte.this, transform_fn).unwrap_or(original);
2647 cte
2648 })
2649 .collect();
2650 u.with = Some(with);
2651 }
2652 Expression::Union(u)
2653 }
2654 Expression::Intersect(mut i) => {
2655 let left = std::mem::replace(&mut i.left, Expression::Null(Null));
2656 i.left = transform_recursive(left, transform_fn)?;
2657 let right = std::mem::replace(&mut i.right, Expression::Null(Null));
2658 i.right = transform_recursive(right, transform_fn)?;
2659 if let Some(mut order) = i.order_by.take() {
2660 order.expressions = order
2661 .expressions
2662 .into_iter()
2663 .map(|o| {
2664 let mut o = o;
2665 let original = o.this.clone();
2666 o.this = transform_recursive(o.this, transform_fn).unwrap_or(original);
2667 match transform_fn(Expression::Ordered(Box::new(o.clone()))) {
2668 Ok(Expression::Ordered(transformed)) => *transformed,
2669 Ok(_) | Err(_) => o,
2670 }
2671 })
2672 .collect();
2673 i.order_by = Some(order);
2674 }
2675 if let Some(mut with) = i.with.take() {
2676 with.ctes = with
2677 .ctes
2678 .into_iter()
2679 .map(|mut cte| {
2680 let original = cte.this.clone();
2681 cte.this = transform_recursive(cte.this, transform_fn).unwrap_or(original);
2682 cte
2683 })
2684 .collect();
2685 i.with = Some(with);
2686 }
2687 Expression::Intersect(i)
2688 }
2689 Expression::Except(mut e) => {
2690 let left = std::mem::replace(&mut e.left, Expression::Null(Null));
2691 e.left = transform_recursive(left, transform_fn)?;
2692 let right = std::mem::replace(&mut e.right, Expression::Null(Null));
2693 e.right = transform_recursive(right, transform_fn)?;
2694 if let Some(mut order) = e.order_by.take() {
2695 order.expressions = order
2696 .expressions
2697 .into_iter()
2698 .map(|o| {
2699 let mut o = o;
2700 let original = o.this.clone();
2701 o.this = transform_recursive(o.this, transform_fn).unwrap_or(original);
2702 match transform_fn(Expression::Ordered(Box::new(o.clone()))) {
2703 Ok(Expression::Ordered(transformed)) => *transformed,
2704 Ok(_) | Err(_) => o,
2705 }
2706 })
2707 .collect();
2708 e.order_by = Some(order);
2709 }
2710 if let Some(mut with) = e.with.take() {
2711 with.ctes = with
2712 .ctes
2713 .into_iter()
2714 .map(|mut cte| {
2715 let original = cte.this.clone();
2716 cte.this = transform_recursive(cte.this, transform_fn).unwrap_or(original);
2717 cte
2718 })
2719 .collect();
2720 e.with = Some(with);
2721 }
2722 Expression::Except(e)
2723 }
2724
2725 // ===== DML expressions =====
2726 Expression::Insert(mut ins) => {
2727 // Transform VALUES clause expressions
2728 let mut new_values = Vec::new();
2729 for row in ins.values {
2730 let mut new_row = Vec::new();
2731 for e in row {
2732 new_row.push(transform_recursive(e, transform_fn)?);
2733 }
2734 new_values.push(new_row);
2735 }
2736 ins.values = new_values;
2737
2738 // Transform query (for INSERT ... SELECT)
2739 if let Some(query) = ins.query {
2740 ins.query = Some(transform_recursive(query, transform_fn)?);
2741 }
2742
2743 // Transform RETURNING clause
2744 let mut new_returning = Vec::new();
2745 for e in ins.returning {
2746 new_returning.push(transform_recursive(e, transform_fn)?);
2747 }
2748 ins.returning = new_returning;
2749
2750 // Transform ON CONFLICT clause
2751 if let Some(on_conflict) = ins.on_conflict {
2752 ins.on_conflict = Some(Box::new(transform_recursive(*on_conflict, transform_fn)?));
2753 }
2754
2755 Expression::Insert(ins)
2756 }
2757 Expression::Update(mut upd) => {
2758 upd.table = transform_table_ref_recursive(upd.table, transform_fn)?;
2759 upd.extra_tables = upd
2760 .extra_tables
2761 .into_iter()
2762 .map(|table| transform_table_ref_recursive(table, transform_fn))
2763 .collect::<Result<Vec<_>>>()?;
2764 upd.table_joins = upd
2765 .table_joins
2766 .into_iter()
2767 .map(|join| transform_join_recursive(join, transform_fn))
2768 .collect::<Result<Vec<_>>>()?;
2769 upd.set = upd
2770 .set
2771 .into_iter()
2772 .map(|(id, val)| {
2773 let new_val = transform_recursive(val.clone(), transform_fn).unwrap_or(val);
2774 (id, new_val)
2775 })
2776 .collect();
2777 if let Some(from_clause) = upd.from_clause.take() {
2778 upd.from_clause = Some(transform_from_recursive(from_clause, transform_fn)?);
2779 }
2780 upd.from_joins = upd
2781 .from_joins
2782 .into_iter()
2783 .map(|join| transform_join_recursive(join, transform_fn))
2784 .collect::<Result<Vec<_>>>()?;
2785 if let Some(mut where_clause) = upd.where_clause.take() {
2786 where_clause.this = transform_recursive(where_clause.this, transform_fn)?;
2787 upd.where_clause = Some(where_clause);
2788 }
2789 upd.returning = upd
2790 .returning
2791 .into_iter()
2792 .map(|expr| transform_recursive(expr, transform_fn))
2793 .collect::<Result<Vec<_>>>()?;
2794 if let Some(output) = upd.output.take() {
2795 upd.output = Some(transform_output_clause_recursive(output, transform_fn)?);
2796 }
2797 if let Some(with) = upd.with.take() {
2798 upd.with = Some(transform_with_recursive(with, transform_fn)?);
2799 }
2800 if let Some(limit) = upd.limit.take() {
2801 upd.limit = Some(transform_recursive(limit, transform_fn)?);
2802 }
2803 if let Some(order_by) = upd.order_by.take() {
2804 upd.order_by = Some(transform_order_by_recursive(order_by, transform_fn)?);
2805 }
2806 Expression::Update(upd)
2807 }
2808 Expression::Delete(mut del) => {
2809 del.table = transform_table_ref_recursive(del.table, transform_fn)?;
2810 del.using = del
2811 .using
2812 .into_iter()
2813 .map(|table| transform_table_ref_recursive(table, transform_fn))
2814 .collect::<Result<Vec<_>>>()?;
2815 if let Some(mut where_clause) = del.where_clause.take() {
2816 where_clause.this = transform_recursive(where_clause.this, transform_fn)?;
2817 del.where_clause = Some(where_clause);
2818 }
2819 if let Some(output) = del.output.take() {
2820 del.output = Some(transform_output_clause_recursive(output, transform_fn)?);
2821 }
2822 if let Some(with) = del.with.take() {
2823 del.with = Some(transform_with_recursive(with, transform_fn)?);
2824 }
2825 if let Some(limit) = del.limit.take() {
2826 del.limit = Some(transform_recursive(limit, transform_fn)?);
2827 }
2828 if let Some(order_by) = del.order_by.take() {
2829 del.order_by = Some(transform_order_by_recursive(order_by, transform_fn)?);
2830 }
2831 del.returning = del
2832 .returning
2833 .into_iter()
2834 .map(|expr| transform_recursive(expr, transform_fn))
2835 .collect::<Result<Vec<_>>>()?;
2836 del.tables = del
2837 .tables
2838 .into_iter()
2839 .map(|table| transform_table_ref_recursive(table, transform_fn))
2840 .collect::<Result<Vec<_>>>()?;
2841 del.joins = del
2842 .joins
2843 .into_iter()
2844 .map(|join| transform_join_recursive(join, transform_fn))
2845 .collect::<Result<Vec<_>>>()?;
2846 Expression::Delete(del)
2847 }
2848
2849 // ===== CTE expressions =====
2850 Expression::With(mut w) => {
2851 w.ctes = w
2852 .ctes
2853 .into_iter()
2854 .map(|mut cte| {
2855 let original = cte.this.clone();
2856 cte.this = transform_recursive(cte.this, transform_fn).unwrap_or(original);
2857 cte
2858 })
2859 .collect();
2860 Expression::With(w)
2861 }
2862 Expression::Cte(mut c) => {
2863 c.this = transform_recursive(c.this, transform_fn)?;
2864 Expression::Cte(c)
2865 }
2866
2867 // ===== Order expressions =====
2868 Expression::Ordered(mut o) => {
2869 o.this = transform_recursive(o.this, transform_fn)?;
2870 Expression::Ordered(o)
2871 }
2872
2873 // ===== Negation =====
2874 Expression::Neg(mut n) => {
2875 n.this = transform_recursive(n.this, transform_fn)?;
2876 Expression::Neg(n)
2877 }
2878
2879 // ===== Between =====
2880 Expression::Between(mut b) => {
2881 b.this = transform_recursive(b.this, transform_fn)?;
2882 b.low = transform_recursive(b.low, transform_fn)?;
2883 b.high = transform_recursive(b.high, transform_fn)?;
2884 Expression::Between(b)
2885 }
2886 Expression::IsNull(mut i) => {
2887 i.this = transform_recursive(i.this, transform_fn)?;
2888 Expression::IsNull(i)
2889 }
2890 Expression::IsTrue(mut i) => {
2891 i.this = transform_recursive(i.this, transform_fn)?;
2892 Expression::IsTrue(i)
2893 }
2894 Expression::IsFalse(mut i) => {
2895 i.this = transform_recursive(i.this, transform_fn)?;
2896 Expression::IsFalse(i)
2897 }
2898
2899 // ===== Like expressions =====
2900 Expression::Like(mut l) => {
2901 l.left = transform_recursive(l.left, transform_fn)?;
2902 l.right = transform_recursive(l.right, transform_fn)?;
2903 Expression::Like(l)
2904 }
2905 Expression::ILike(mut l) => {
2906 l.left = transform_recursive(l.left, transform_fn)?;
2907 l.right = transform_recursive(l.right, transform_fn)?;
2908 Expression::ILike(l)
2909 }
2910
2911 // ===== Additional binary ops not covered by macro =====
2912 Expression::Neq(op) => transform_binary!(Neq, *op),
2913 Expression::Lte(op) => transform_binary!(Lte, *op),
2914 Expression::Gte(op) => transform_binary!(Gte, *op),
2915 Expression::Mod(op) => transform_binary!(Mod, *op),
2916 Expression::Concat(op) => transform_binary!(Concat, *op),
2917 Expression::BitwiseAnd(op) => transform_binary!(BitwiseAnd, *op),
2918 Expression::BitwiseOr(op) => transform_binary!(BitwiseOr, *op),
2919 Expression::BitwiseXor(op) => transform_binary!(BitwiseXor, *op),
2920 Expression::Is(op) => transform_binary!(Is, *op),
2921
2922 // ===== TryCast / SafeCast =====
2923 Expression::TryCast(mut c) => {
2924 c.this = transform_recursive(c.this, transform_fn)?;
2925 c.to = transform_data_type_recursive(c.to, transform_fn)?;
2926 Expression::TryCast(c)
2927 }
2928 Expression::SafeCast(mut c) => {
2929 c.this = transform_recursive(c.this, transform_fn)?;
2930 c.to = transform_data_type_recursive(c.to, transform_fn)?;
2931 Expression::SafeCast(c)
2932 }
2933
2934 // ===== Misc =====
2935 Expression::Unnest(mut f) => {
2936 f.this = transform_recursive(f.this, transform_fn)?;
2937 f.expressions = f
2938 .expressions
2939 .into_iter()
2940 .map(|e| transform_recursive(e, transform_fn))
2941 .collect::<Result<Vec<_>>>()?;
2942 Expression::Unnest(f)
2943 }
2944 Expression::Explode(mut f) => {
2945 f.this = transform_recursive(f.this, transform_fn)?;
2946 Expression::Explode(f)
2947 }
2948 Expression::GroupConcat(mut f) => {
2949 f.this = transform_recursive(f.this, transform_fn)?;
2950 Expression::GroupConcat(f)
2951 }
2952 Expression::StringAgg(mut f) => {
2953 f.this = transform_recursive(f.this, transform_fn)?;
2954 if let Some(order_by) = f.order_by.take() {
2955 f.order_by = Some(
2956 order_by
2957 .into_iter()
2958 .map(|mut ordered| {
2959 let original = ordered.this.clone();
2960 ordered.this =
2961 transform_recursive(ordered.this, transform_fn).unwrap_or(original);
2962 match transform_fn(Expression::Ordered(Box::new(ordered.clone()))) {
2963 Ok(Expression::Ordered(transformed)) => Ok(*transformed),
2964 Ok(_) | Err(_) => Ok(ordered),
2965 }
2966 })
2967 .collect::<Result<Vec<_>>>()?,
2968 );
2969 }
2970 Expression::StringAgg(f)
2971 }
2972 Expression::ListAgg(mut f) => {
2973 f.this = transform_recursive(f.this, transform_fn)?;
2974 Expression::ListAgg(f)
2975 }
2976 Expression::ArrayAgg(mut f) => {
2977 f.this = transform_recursive(f.this, transform_fn)?;
2978 Expression::ArrayAgg(f)
2979 }
2980 Expression::ParseJson(mut f) => {
2981 f.this = transform_recursive(f.this, transform_fn)?;
2982 Expression::ParseJson(f)
2983 }
2984 Expression::ToJson(mut f) => {
2985 f.this = transform_recursive(f.this, transform_fn)?;
2986 Expression::ToJson(f)
2987 }
2988 Expression::JSONExtract(mut e) => {
2989 e.this = Box::new(transform_recursive(*e.this, transform_fn)?);
2990 e.expression = Box::new(transform_recursive(*e.expression, transform_fn)?);
2991 Expression::JSONExtract(e)
2992 }
2993 Expression::JSONExtractScalar(mut e) => {
2994 e.this = Box::new(transform_recursive(*e.this, transform_fn)?);
2995 e.expression = Box::new(transform_recursive(*e.expression, transform_fn)?);
2996 Expression::JSONExtractScalar(e)
2997 }
2998
2999 // StrToTime: recurse into this
3000 Expression::StrToTime(mut e) => {
3001 e.this = Box::new(transform_recursive(*e.this, transform_fn)?);
3002 Expression::StrToTime(e)
3003 }
3004
3005 // UnixToTime: recurse into this
3006 Expression::UnixToTime(mut e) => {
3007 e.this = Box::new(transform_recursive(*e.this, transform_fn)?);
3008 Expression::UnixToTime(e)
3009 }
3010
3011 // CreateTable: recurse into column defaults, on_update expressions, and data types
3012 Expression::CreateTable(mut ct) => {
3013 for col in &mut ct.columns {
3014 if let Some(default_expr) = col.default.take() {
3015 col.default = Some(transform_recursive(default_expr, transform_fn)?);
3016 }
3017 if let Some(on_update_expr) = col.on_update.take() {
3018 col.on_update = Some(transform_recursive(on_update_expr, transform_fn)?);
3019 }
3020 // Note: Column data type transformations (INT -> INT64 for BigQuery, etc.)
3021 // are NOT applied here because per-dialect transforms are designed for CAST/expression
3022 // contexts and may not produce correct results for DDL column definitions.
3023 // The DDL type mappings would need dedicated handling per source/target pair.
3024 }
3025 if let Some(as_select) = ct.as_select.take() {
3026 ct.as_select = Some(transform_recursive(as_select, transform_fn)?);
3027 }
3028 Expression::CreateTable(ct)
3029 }
3030
3031 // CreateView: recurse into the view body query
3032 Expression::CreateView(mut cv) => {
3033 cv.query = transform_recursive(cv.query, transform_fn)?;
3034 Expression::CreateView(cv)
3035 }
3036
3037 // CreateTask: recurse into the task body
3038 Expression::CreateTask(mut ct) => {
3039 ct.body = transform_recursive(ct.body, transform_fn)?;
3040 Expression::CreateTask(ct)
3041 }
3042
3043 // Prepare: recurse into the prepared statement body
3044 Expression::Prepare(mut prepare) => {
3045 prepare.statement = transform_recursive(prepare.statement, transform_fn)?;
3046 Expression::Prepare(prepare)
3047 }
3048
3049 // Execute: recurse into procedure/prepared name and argument values
3050 Expression::Execute(mut execute) => {
3051 execute.this = transform_recursive(execute.this, transform_fn)?;
3052 execute.arguments = execute
3053 .arguments
3054 .into_iter()
3055 .map(|argument| transform_recursive(argument, transform_fn))
3056 .collect::<Result<Vec<_>>>()?;
3057 execute.parameters = execute
3058 .parameters
3059 .into_iter()
3060 .map(|mut parameter| {
3061 parameter.value = transform_recursive(parameter.value, transform_fn)?;
3062 Ok(parameter)
3063 })
3064 .collect::<Result<Vec<_>>>()?;
3065 Expression::Execute(execute)
3066 }
3067
3068 // CreateProcedure: recurse into body expressions
3069 Expression::CreateProcedure(mut cp) => {
3070 if let Some(body) = cp.body.take() {
3071 cp.body = Some(match body {
3072 FunctionBody::Expression(expr) => {
3073 FunctionBody::Expression(transform_recursive(expr, transform_fn)?)
3074 }
3075 FunctionBody::Return(expr) => {
3076 FunctionBody::Return(transform_recursive(expr, transform_fn)?)
3077 }
3078 FunctionBody::Statements(stmts) => {
3079 let transformed_stmts = stmts
3080 .into_iter()
3081 .map(|s| transform_recursive(s, transform_fn))
3082 .collect::<Result<Vec<_>>>()?;
3083 FunctionBody::Statements(transformed_stmts)
3084 }
3085 other => other,
3086 });
3087 }
3088 Expression::CreateProcedure(cp)
3089 }
3090
3091 // CreateFunction: recurse into body expressions
3092 Expression::CreateFunction(mut cf) => {
3093 if let Some(body) = cf.body.take() {
3094 cf.body = Some(match body {
3095 FunctionBody::Expression(expr) => {
3096 FunctionBody::Expression(transform_recursive(expr, transform_fn)?)
3097 }
3098 FunctionBody::Return(expr) => {
3099 FunctionBody::Return(transform_recursive(expr, transform_fn)?)
3100 }
3101 FunctionBody::Statements(stmts) => {
3102 let transformed_stmts = stmts
3103 .into_iter()
3104 .map(|s| transform_recursive(s, transform_fn))
3105 .collect::<Result<Vec<_>>>()?;
3106 FunctionBody::Statements(transformed_stmts)
3107 }
3108 other => other,
3109 });
3110 }
3111 Expression::CreateFunction(cf)
3112 }
3113
3114 // MemberOf: recurse into left and right operands
3115 Expression::MemberOf(op) => transform_binary!(MemberOf, *op),
3116 // ArrayContainsAll (@>): recurse into left and right operands
3117 Expression::ArrayContainsAll(op) => transform_binary!(ArrayContainsAll, *op),
3118 // ArrayContainedBy (<@): recurse into left and right operands
3119 Expression::ArrayContainedBy(op) => transform_binary!(ArrayContainedBy, *op),
3120 // ArrayOverlaps (&&): recurse into left and right operands
3121 Expression::ArrayOverlaps(op) => transform_binary!(ArrayOverlaps, *op),
3122 // TsMatch (@@): recurse into left and right operands
3123 Expression::TsMatch(op) => transform_binary!(TsMatch, *op),
3124 // Adjacent (-|-): recurse into left and right operands
3125 Expression::Adjacent(op) => transform_binary!(Adjacent, *op),
3126
3127 // Table: recurse into when (HistoricalData) and changes fields
3128 Expression::Table(mut t) => {
3129 if let Some(when) = t.when.take() {
3130 let transformed =
3131 transform_recursive(Expression::HistoricalData(when), transform_fn)?;
3132 if let Expression::HistoricalData(hd) = transformed {
3133 t.when = Some(hd);
3134 }
3135 }
3136 if let Some(changes) = t.changes.take() {
3137 let transformed = transform_recursive(Expression::Changes(changes), transform_fn)?;
3138 if let Expression::Changes(c) = transformed {
3139 t.changes = Some(c);
3140 }
3141 }
3142 Expression::Table(t)
3143 }
3144
3145 // HistoricalData (Snowflake time travel): recurse into expression
3146 Expression::HistoricalData(mut hd) => {
3147 *hd.expression = transform_recursive(*hd.expression, transform_fn)?;
3148 Expression::HistoricalData(hd)
3149 }
3150
3151 // Changes (Snowflake CHANGES clause): recurse into at_before and end
3152 Expression::Changes(mut c) => {
3153 if let Some(at_before) = c.at_before.take() {
3154 c.at_before = Some(Box::new(transform_recursive(*at_before, transform_fn)?));
3155 }
3156 if let Some(end) = c.end.take() {
3157 c.end = Some(Box::new(transform_recursive(*end, transform_fn)?));
3158 }
3159 Expression::Changes(c)
3160 }
3161
3162 // TableArgument: TABLE(expr) or MODEL(expr)
3163 Expression::TableArgument(mut ta) => {
3164 ta.this = transform_recursive(ta.this, transform_fn)?;
3165 Expression::TableArgument(ta)
3166 }
3167
3168 // JoinedTable: (tbl1 JOIN tbl2 ON ...) - recurse into left and join tables
3169 Expression::JoinedTable(mut jt) => {
3170 jt.left = transform_recursive(jt.left, transform_fn)?;
3171 for join in &mut jt.joins {
3172 join.this = transform_recursive(
3173 std::mem::replace(&mut join.this, Expression::Null(crate::expressions::Null)),
3174 transform_fn,
3175 )?;
3176 if let Some(on) = join.on.take() {
3177 join.on = Some(transform_recursive(on, transform_fn)?);
3178 }
3179 }
3180 jt.lateral_views = jt
3181 .lateral_views
3182 .into_iter()
3183 .map(|mut lv| {
3184 lv.this = transform_recursive(lv.this, transform_fn)?;
3185 Ok(lv)
3186 })
3187 .collect::<Result<Vec<_>>>()?;
3188 Expression::JoinedTable(jt)
3189 }
3190
3191 // Lateral: LATERAL func() - recurse into the function expression
3192 Expression::Lateral(mut lat) => {
3193 *lat.this = transform_recursive(*lat.this, transform_fn)?;
3194 Expression::Lateral(lat)
3195 }
3196
3197 // WithinGroup: recurse into order_by items (for NULLS FIRST/LAST etc.)
3198 // but NOT into wg.this - the inner function is handled by StringAggConvert/GroupConcatConvert
3199 // as a unit together with the WithinGroup wrapper
3200 Expression::WithinGroup(mut wg) => {
3201 wg.order_by = wg
3202 .order_by
3203 .into_iter()
3204 .map(|mut o| {
3205 let original = o.this.clone();
3206 o.this = transform_recursive(o.this, transform_fn).unwrap_or(original);
3207 match transform_fn(Expression::Ordered(Box::new(o.clone()))) {
3208 Ok(Expression::Ordered(transformed)) => *transformed,
3209 Ok(_) | Err(_) => o,
3210 }
3211 })
3212 .collect();
3213 Expression::WithinGroup(wg)
3214 }
3215
3216 // Filter: recurse into both the aggregate and the filter condition
3217 Expression::Filter(mut f) => {
3218 f.this = Box::new(transform_recursive(*f.this, transform_fn)?);
3219 f.expression = Box::new(transform_recursive(*f.expression, transform_fn)?);
3220 Expression::Filter(f)
3221 }
3222
3223 // Aggregate functions (AggFunc-based): recurse into the aggregate argument,
3224 // filter, order_by, having_max, and limit.
3225 // Stddev, StddevSamp, Variance, and ArrayAgg are handled earlier in this match.
3226 Expression::Sum(f) => recurse_agg!(Sum, f),
3227 Expression::Avg(f) => recurse_agg!(Avg, f),
3228 Expression::Min(f) => recurse_agg!(Min, f),
3229 Expression::Max(f) => recurse_agg!(Max, f),
3230 Expression::CountIf(f) => recurse_agg!(CountIf, f),
3231 Expression::StddevPop(f) => recurse_agg!(StddevPop, f),
3232 Expression::VarPop(f) => recurse_agg!(VarPop, f),
3233 Expression::VarSamp(f) => recurse_agg!(VarSamp, f),
3234 Expression::Median(f) => recurse_agg!(Median, f),
3235 Expression::Mode(f) => recurse_agg!(Mode, f),
3236 Expression::First(f) => recurse_agg!(First, f),
3237 Expression::Last(f) => recurse_agg!(Last, f),
3238 Expression::AnyValue(f) => recurse_agg!(AnyValue, f),
3239 Expression::ApproxDistinct(f) => recurse_agg!(ApproxDistinct, f),
3240 Expression::ApproxCountDistinct(f) => recurse_agg!(ApproxCountDistinct, f),
3241 Expression::LogicalAnd(f) => recurse_agg!(LogicalAnd, f),
3242 Expression::LogicalOr(f) => recurse_agg!(LogicalOr, f),
3243 Expression::Skewness(f) => recurse_agg!(Skewness, f),
3244 Expression::ArrayConcatAgg(f) => recurse_agg!(ArrayConcatAgg, f),
3245 Expression::ArrayUniqueAgg(f) => recurse_agg!(ArrayUniqueAgg, f),
3246 Expression::BoolXorAgg(f) => recurse_agg!(BoolXorAgg, f),
3247 Expression::BitwiseOrAgg(f) => recurse_agg!(BitwiseOrAgg, f),
3248 Expression::BitwiseAndAgg(f) => recurse_agg!(BitwiseAndAgg, f),
3249 Expression::BitwiseXorAgg(f) => recurse_agg!(BitwiseXorAgg, f),
3250
3251 // Count has its own struct with an Option<Expression> `this` field
3252 Expression::Count(mut c) => {
3253 if let Some(this) = c.this.take() {
3254 c.this = Some(transform_recursive(this, transform_fn)?);
3255 }
3256 if let Some(filter) = c.filter.take() {
3257 c.filter = Some(transform_recursive(filter, transform_fn)?);
3258 }
3259 Expression::Count(c)
3260 }
3261
3262 Expression::PipeOperator(mut pipe) => {
3263 pipe.this = transform_recursive(pipe.this, transform_fn)?;
3264 pipe.expression = transform_recursive(pipe.expression, transform_fn)?;
3265 Expression::PipeOperator(pipe)
3266 }
3267
3268 // ArrayExcept/ArrayContains/ArrayDistinct: recurse into children
3269 Expression::ArrayExcept(mut f) => {
3270 f.this = transform_recursive(f.this, transform_fn)?;
3271 f.expression = transform_recursive(f.expression, transform_fn)?;
3272 Expression::ArrayExcept(f)
3273 }
3274 Expression::ArrayContains(mut f) => {
3275 f.this = transform_recursive(f.this, transform_fn)?;
3276 f.expression = transform_recursive(f.expression, transform_fn)?;
3277 Expression::ArrayContains(f)
3278 }
3279 Expression::ArrayDistinct(mut f) => {
3280 f.this = transform_recursive(f.this, transform_fn)?;
3281 Expression::ArrayDistinct(f)
3282 }
3283 Expression::ArrayPosition(mut f) => {
3284 f.this = transform_recursive(f.this, transform_fn)?;
3285 f.expression = transform_recursive(f.expression, transform_fn)?;
3286 Expression::ArrayPosition(f)
3287 }
3288
3289 // Pass through leaf nodes unchanged
3290 other => other,
3291 };
3292
3293 // Then apply the transform function
3294 transform_fn(expr)
3295}
3296
3297/// Returns the tokenizer config, generator config, and expression transform closure
3298/// for a built-in dialect type. This is the shared implementation used by both
3299/// `Dialect::get()` and custom dialect construction.
3300// ---------------------------------------------------------------------------
3301// Cached dialect configurations
3302// ---------------------------------------------------------------------------
3303
3304/// Pre-computed tokenizer + generator configs for a dialect, cached via `LazyLock`.
3305/// Transform closures are cheap (unit-struct method calls) and created fresh each time.
3306struct CachedDialectConfig {
3307 tokenizer_config: TokenizerConfig,
3308 #[cfg(feature = "generate")]
3309 generator_config: Arc<GeneratorConfig>,
3310}
3311
3312struct DialectConfigs {
3313 tokenizer_config: TokenizerConfig,
3314 #[cfg(feature = "generate")]
3315 generator_config: Arc<GeneratorConfig>,
3316 #[cfg(feature = "transpile")]
3317 transformer: Box<dyn Fn(Expression) -> Result<Expression> + Send + Sync>,
3318}
3319
3320/// Declare a per-dialect `LazyLock<CachedDialectConfig>` static.
3321macro_rules! cached_dialect {
3322 ($static_name:ident, $dialect_struct:expr, $feature:literal) => {
3323 #[cfg(feature = $feature)]
3324 static $static_name: LazyLock<CachedDialectConfig> = LazyLock::new(|| {
3325 let d = $dialect_struct;
3326 CachedDialectConfig {
3327 tokenizer_config: d.tokenizer_config(),
3328 #[cfg(feature = "generate")]
3329 generator_config: Arc::new(d.generator_config()),
3330 }
3331 });
3332 };
3333}
3334
3335static CACHED_GENERIC: LazyLock<CachedDialectConfig> = LazyLock::new(|| {
3336 let d = GenericDialect;
3337 CachedDialectConfig {
3338 tokenizer_config: d.tokenizer_config(),
3339 #[cfg(feature = "generate")]
3340 generator_config: Arc::new(d.generator_config()),
3341 }
3342});
3343
3344cached_dialect!(CACHED_POSTGRESQL, PostgresDialect, "dialect-postgresql");
3345cached_dialect!(CACHED_MYSQL, MySQLDialect, "dialect-mysql");
3346cached_dialect!(CACHED_BIGQUERY, BigQueryDialect, "dialect-bigquery");
3347cached_dialect!(CACHED_SNOWFLAKE, SnowflakeDialect, "dialect-snowflake");
3348cached_dialect!(CACHED_DUCKDB, DuckDBDialect, "dialect-duckdb");
3349cached_dialect!(CACHED_TSQL, TSQLDialect, "dialect-tsql");
3350cached_dialect!(CACHED_ORACLE, OracleDialect, "dialect-oracle");
3351cached_dialect!(CACHED_HIVE, HiveDialect, "dialect-hive");
3352cached_dialect!(CACHED_SPARK, SparkDialect, "dialect-spark");
3353cached_dialect!(CACHED_SQLITE, SQLiteDialect, "dialect-sqlite");
3354cached_dialect!(CACHED_PRESTO, PrestoDialect, "dialect-presto");
3355cached_dialect!(CACHED_TRINO, TrinoDialect, "dialect-trino");
3356cached_dialect!(CACHED_REDSHIFT, RedshiftDialect, "dialect-redshift");
3357cached_dialect!(CACHED_CLICKHOUSE, ClickHouseDialect, "dialect-clickhouse");
3358cached_dialect!(CACHED_DATABRICKS, DatabricksDialect, "dialect-databricks");
3359cached_dialect!(CACHED_ATHENA, AthenaDialect, "dialect-athena");
3360cached_dialect!(CACHED_TERADATA, TeradataDialect, "dialect-teradata");
3361cached_dialect!(CACHED_DORIS, DorisDialect, "dialect-doris");
3362cached_dialect!(CACHED_STARROCKS, StarRocksDialect, "dialect-starrocks");
3363cached_dialect!(
3364 CACHED_MATERIALIZE,
3365 MaterializeDialect,
3366 "dialect-materialize"
3367);
3368cached_dialect!(CACHED_RISINGWAVE, RisingWaveDialect, "dialect-risingwave");
3369cached_dialect!(
3370 CACHED_SINGLESTORE,
3371 SingleStoreDialect,
3372 "dialect-singlestore"
3373);
3374cached_dialect!(
3375 CACHED_COCKROACHDB,
3376 CockroachDBDialect,
3377 "dialect-cockroachdb"
3378);
3379cached_dialect!(CACHED_TIDB, TiDBDialect, "dialect-tidb");
3380cached_dialect!(CACHED_DRUID, DruidDialect, "dialect-druid");
3381cached_dialect!(CACHED_SOLR, SolrDialect, "dialect-solr");
3382cached_dialect!(CACHED_TABLEAU, TableauDialect, "dialect-tableau");
3383cached_dialect!(CACHED_DUNE, DuneDialect, "dialect-dune");
3384cached_dialect!(CACHED_FABRIC, FabricDialect, "dialect-fabric");
3385cached_dialect!(CACHED_DRILL, DrillDialect, "dialect-drill");
3386cached_dialect!(CACHED_DREMIO, DremioDialect, "dialect-dremio");
3387cached_dialect!(CACHED_EXASOL, ExasolDialect, "dialect-exasol");
3388cached_dialect!(CACHED_DATAFUSION, DataFusionDialect, "dialect-datafusion");
3389
3390fn configs_for_dialect_type(dt: DialectType) -> DialectConfigs {
3391 /// Clone configs from a cached static and pair with a fresh transform closure.
3392 macro_rules! from_cache {
3393 ($cache:expr, $dialect_struct:expr) => {{
3394 let c = &*$cache;
3395 DialectConfigs {
3396 tokenizer_config: c.tokenizer_config.clone(),
3397 #[cfg(feature = "generate")]
3398 generator_config: c.generator_config.clone(),
3399 #[cfg(feature = "transpile")]
3400 transformer: Box::new(move |e| $dialect_struct.transform_expr(e)),
3401 }
3402 }};
3403 }
3404 match dt {
3405 #[cfg(feature = "dialect-postgresql")]
3406 DialectType::PostgreSQL => from_cache!(CACHED_POSTGRESQL, PostgresDialect),
3407 #[cfg(feature = "dialect-mysql")]
3408 DialectType::MySQL => from_cache!(CACHED_MYSQL, MySQLDialect),
3409 #[cfg(feature = "dialect-bigquery")]
3410 DialectType::BigQuery => from_cache!(CACHED_BIGQUERY, BigQueryDialect),
3411 #[cfg(feature = "dialect-snowflake")]
3412 DialectType::Snowflake => from_cache!(CACHED_SNOWFLAKE, SnowflakeDialect),
3413 #[cfg(feature = "dialect-duckdb")]
3414 DialectType::DuckDB => from_cache!(CACHED_DUCKDB, DuckDBDialect),
3415 #[cfg(feature = "dialect-tsql")]
3416 DialectType::TSQL => from_cache!(CACHED_TSQL, TSQLDialect),
3417 #[cfg(feature = "dialect-oracle")]
3418 DialectType::Oracle => from_cache!(CACHED_ORACLE, OracleDialect),
3419 #[cfg(feature = "dialect-hive")]
3420 DialectType::Hive => from_cache!(CACHED_HIVE, HiveDialect),
3421 #[cfg(feature = "dialect-spark")]
3422 DialectType::Spark => from_cache!(CACHED_SPARK, SparkDialect),
3423 #[cfg(feature = "dialect-sqlite")]
3424 DialectType::SQLite => from_cache!(CACHED_SQLITE, SQLiteDialect),
3425 #[cfg(feature = "dialect-presto")]
3426 DialectType::Presto => from_cache!(CACHED_PRESTO, PrestoDialect),
3427 #[cfg(feature = "dialect-trino")]
3428 DialectType::Trino => from_cache!(CACHED_TRINO, TrinoDialect),
3429 #[cfg(feature = "dialect-redshift")]
3430 DialectType::Redshift => from_cache!(CACHED_REDSHIFT, RedshiftDialect),
3431 #[cfg(feature = "dialect-clickhouse")]
3432 DialectType::ClickHouse => from_cache!(CACHED_CLICKHOUSE, ClickHouseDialect),
3433 #[cfg(feature = "dialect-databricks")]
3434 DialectType::Databricks => from_cache!(CACHED_DATABRICKS, DatabricksDialect),
3435 #[cfg(feature = "dialect-athena")]
3436 DialectType::Athena => from_cache!(CACHED_ATHENA, AthenaDialect),
3437 #[cfg(feature = "dialect-teradata")]
3438 DialectType::Teradata => from_cache!(CACHED_TERADATA, TeradataDialect),
3439 #[cfg(feature = "dialect-doris")]
3440 DialectType::Doris => from_cache!(CACHED_DORIS, DorisDialect),
3441 #[cfg(feature = "dialect-starrocks")]
3442 DialectType::StarRocks => from_cache!(CACHED_STARROCKS, StarRocksDialect),
3443 #[cfg(feature = "dialect-materialize")]
3444 DialectType::Materialize => from_cache!(CACHED_MATERIALIZE, MaterializeDialect),
3445 #[cfg(feature = "dialect-risingwave")]
3446 DialectType::RisingWave => from_cache!(CACHED_RISINGWAVE, RisingWaveDialect),
3447 #[cfg(feature = "dialect-singlestore")]
3448 DialectType::SingleStore => from_cache!(CACHED_SINGLESTORE, SingleStoreDialect),
3449 #[cfg(feature = "dialect-cockroachdb")]
3450 DialectType::CockroachDB => from_cache!(CACHED_COCKROACHDB, CockroachDBDialect),
3451 #[cfg(feature = "dialect-tidb")]
3452 DialectType::TiDB => from_cache!(CACHED_TIDB, TiDBDialect),
3453 #[cfg(feature = "dialect-druid")]
3454 DialectType::Druid => from_cache!(CACHED_DRUID, DruidDialect),
3455 #[cfg(feature = "dialect-solr")]
3456 DialectType::Solr => from_cache!(CACHED_SOLR, SolrDialect),
3457 #[cfg(feature = "dialect-tableau")]
3458 DialectType::Tableau => from_cache!(CACHED_TABLEAU, TableauDialect),
3459 #[cfg(feature = "dialect-dune")]
3460 DialectType::Dune => from_cache!(CACHED_DUNE, DuneDialect),
3461 #[cfg(feature = "dialect-fabric")]
3462 DialectType::Fabric => from_cache!(CACHED_FABRIC, FabricDialect),
3463 #[cfg(feature = "dialect-drill")]
3464 DialectType::Drill => from_cache!(CACHED_DRILL, DrillDialect),
3465 #[cfg(feature = "dialect-dremio")]
3466 DialectType::Dremio => from_cache!(CACHED_DREMIO, DremioDialect),
3467 #[cfg(feature = "dialect-exasol")]
3468 DialectType::Exasol => from_cache!(CACHED_EXASOL, ExasolDialect),
3469 #[cfg(feature = "dialect-datafusion")]
3470 DialectType::DataFusion => from_cache!(CACHED_DATAFUSION, DataFusionDialect),
3471 _ => from_cache!(CACHED_GENERIC, GenericDialect),
3472 }
3473}
3474
3475// ---------------------------------------------------------------------------
3476// Custom dialect registry
3477// ---------------------------------------------------------------------------
3478
3479static CUSTOM_DIALECT_REGISTRY: LazyLock<RwLock<HashMap<String, Arc<CustomDialectConfig>>>> =
3480 LazyLock::new(|| RwLock::new(HashMap::new()));
3481
3482struct CustomDialectConfig {
3483 name: String,
3484 base_dialect: DialectType,
3485 tokenizer_config: TokenizerConfig,
3486 #[cfg(feature = "generate")]
3487 generator_config: GeneratorConfig,
3488 #[cfg(feature = "transpile")]
3489 transform: Option<Arc<dyn Fn(Expression) -> Result<Expression> + Send + Sync>>,
3490 #[cfg(feature = "transpile")]
3491 preprocess: Option<Arc<dyn Fn(Expression) -> Result<Expression> + Send + Sync>>,
3492}
3493
3494/// Fluent builder for creating and registering custom SQL dialects.
3495///
3496/// A custom dialect is based on an existing built-in dialect and allows selective
3497/// overrides of tokenizer configuration, generator configuration, and expression
3498/// transforms.
3499///
3500/// # Example
3501///
3502/// ```rust,ignore
3503/// use polyglot_sql::dialects::{CustomDialectBuilder, DialectType, Dialect};
3504/// use polyglot_sql::generator::NormalizeFunctions;
3505///
3506/// CustomDialectBuilder::new("my_postgres")
3507/// .based_on(DialectType::PostgreSQL)
3508/// .generator_config_modifier(|gc| {
3509/// gc.normalize_functions = NormalizeFunctions::Lower;
3510/// })
3511/// .register()
3512/// .unwrap();
3513///
3514/// let d = Dialect::get_by_name("my_postgres").unwrap();
3515/// let exprs = d.parse("SELECT COUNT(*)").unwrap();
3516/// let sql = d.generate(&exprs[0]).unwrap();
3517/// assert_eq!(sql, "select count(*)");
3518///
3519/// polyglot_sql::unregister_custom_dialect("my_postgres");
3520/// ```
3521pub struct CustomDialectBuilder {
3522 name: String,
3523 base_dialect: DialectType,
3524 tokenizer_modifier: Option<Box<dyn FnOnce(&mut TokenizerConfig)>>,
3525 #[cfg(feature = "generate")]
3526 generator_modifier: Option<Box<dyn FnOnce(&mut GeneratorConfig)>>,
3527 #[cfg(feature = "transpile")]
3528 transform: Option<Arc<dyn Fn(Expression) -> Result<Expression> + Send + Sync>>,
3529 #[cfg(feature = "transpile")]
3530 preprocess: Option<Arc<dyn Fn(Expression) -> Result<Expression> + Send + Sync>>,
3531}
3532
3533impl CustomDialectBuilder {
3534 /// Create a new builder with the given name. Defaults to `Generic` as the base dialect.
3535 pub fn new(name: impl Into<String>) -> Self {
3536 Self {
3537 name: name.into(),
3538 base_dialect: DialectType::Generic,
3539 tokenizer_modifier: None,
3540 #[cfg(feature = "generate")]
3541 generator_modifier: None,
3542 #[cfg(feature = "transpile")]
3543 transform: None,
3544 #[cfg(feature = "transpile")]
3545 preprocess: None,
3546 }
3547 }
3548
3549 /// Set the base built-in dialect to inherit configuration from.
3550 pub fn based_on(mut self, dialect: DialectType) -> Self {
3551 self.base_dialect = dialect;
3552 self
3553 }
3554
3555 /// Provide a closure that modifies the tokenizer configuration inherited from the base dialect.
3556 pub fn tokenizer_config_modifier<F>(mut self, f: F) -> Self
3557 where
3558 F: FnOnce(&mut TokenizerConfig) + 'static,
3559 {
3560 self.tokenizer_modifier = Some(Box::new(f));
3561 self
3562 }
3563
3564 /// Provide a closure that modifies the generator configuration inherited from the base dialect.
3565 #[cfg(feature = "generate")]
3566 pub fn generator_config_modifier<F>(mut self, f: F) -> Self
3567 where
3568 F: FnOnce(&mut GeneratorConfig) + 'static,
3569 {
3570 self.generator_modifier = Some(Box::new(f));
3571 self
3572 }
3573
3574 /// Set a custom per-node expression transform function.
3575 ///
3576 /// This replaces the base dialect's transform. It is called on every expression
3577 /// node during the recursive transform pass.
3578 #[cfg(feature = "transpile")]
3579 pub fn transform_fn<F>(mut self, f: F) -> Self
3580 where
3581 F: Fn(Expression) -> Result<Expression> + Send + Sync + 'static,
3582 {
3583 self.transform = Some(Arc::new(f));
3584 self
3585 }
3586
3587 /// Set a custom whole-tree preprocessing function.
3588 ///
3589 /// This replaces the base dialect's built-in preprocessing. It is called once
3590 /// on the entire expression tree before the recursive per-node transform.
3591 #[cfg(feature = "transpile")]
3592 pub fn preprocess_fn<F>(mut self, f: F) -> Self
3593 where
3594 F: Fn(Expression) -> Result<Expression> + Send + Sync + 'static,
3595 {
3596 self.preprocess = Some(Arc::new(f));
3597 self
3598 }
3599
3600 /// Build the custom dialect configuration and register it in the global registry.
3601 ///
3602 /// Returns an error if:
3603 /// - The name collides with a built-in dialect name
3604 /// - A custom dialect with the same name is already registered
3605 pub fn register(self) -> Result<()> {
3606 // Reject names that collide with built-in dialects
3607 if DialectType::from_str(&self.name).is_ok() {
3608 return Err(crate::error::Error::parse(
3609 format!(
3610 "Cannot register custom dialect '{}': name collides with built-in dialect",
3611 self.name
3612 ),
3613 0,
3614 0,
3615 0,
3616 0,
3617 ));
3618 }
3619
3620 // Get base configs
3621 let base_configs = configs_for_dialect_type(self.base_dialect);
3622 let mut tok_config = base_configs.tokenizer_config;
3623 #[cfg(feature = "generate")]
3624 let mut gen_config = (*base_configs.generator_config).clone();
3625
3626 // Apply modifiers
3627 if let Some(tok_mod) = self.tokenizer_modifier {
3628 tok_mod(&mut tok_config);
3629 }
3630 #[cfg(feature = "generate")]
3631 if let Some(gen_mod) = self.generator_modifier {
3632 gen_mod(&mut gen_config);
3633 }
3634
3635 let config = CustomDialectConfig {
3636 name: self.name.clone(),
3637 base_dialect: self.base_dialect,
3638 tokenizer_config: tok_config,
3639 #[cfg(feature = "generate")]
3640 generator_config: gen_config,
3641 #[cfg(feature = "transpile")]
3642 transform: self.transform,
3643 #[cfg(feature = "transpile")]
3644 preprocess: self.preprocess,
3645 };
3646
3647 register_custom_dialect(config)
3648 }
3649}
3650
3651use std::str::FromStr;
3652
3653fn register_custom_dialect(config: CustomDialectConfig) -> Result<()> {
3654 let mut registry = CUSTOM_DIALECT_REGISTRY.write().map_err(|e| {
3655 crate::error::Error::parse(format!("Registry lock poisoned: {}", e), 0, 0, 0, 0)
3656 })?;
3657
3658 if registry.contains_key(&config.name) {
3659 return Err(crate::error::Error::parse(
3660 format!("Custom dialect '{}' is already registered", config.name),
3661 0,
3662 0,
3663 0,
3664 0,
3665 ));
3666 }
3667
3668 registry.insert(config.name.clone(), Arc::new(config));
3669 Ok(())
3670}
3671
3672/// Remove a custom dialect from the global registry.
3673///
3674/// Returns `true` if a dialect with that name was found and removed,
3675/// `false` if no such custom dialect existed.
3676pub fn unregister_custom_dialect(name: &str) -> bool {
3677 if let Ok(mut registry) = CUSTOM_DIALECT_REGISTRY.write() {
3678 registry.remove(name).is_some()
3679 } else {
3680 false
3681 }
3682}
3683
3684fn get_custom_dialect_config(name: &str) -> Option<Arc<CustomDialectConfig>> {
3685 CUSTOM_DIALECT_REGISTRY
3686 .read()
3687 .ok()
3688 .and_then(|registry| registry.get(name).cloned())
3689}
3690
3691/// Main entry point for dialect-specific SQL operations.
3692///
3693/// A `Dialect` bundles together a tokenizer, generator configuration, and expression
3694/// transformer for a specific SQL database engine. It is the high-level API through
3695/// which callers parse, generate, transform, and transpile SQL.
3696///
3697/// # Usage
3698///
3699/// ```rust,ignore
3700/// use polyglot_sql::dialects::{Dialect, DialectType};
3701///
3702/// // Parse PostgreSQL SQL into an AST
3703/// let pg = Dialect::get(DialectType::PostgreSQL);
3704/// let exprs = pg.parse("SELECT id, name FROM users WHERE active")?;
3705///
3706/// // Transpile from PostgreSQL to BigQuery
3707/// let results = pg.transpile("SELECT NOW()", DialectType::BigQuery)?;
3708/// assert_eq!(results[0], "SELECT CURRENT_TIMESTAMP()");
3709/// ```
3710///
3711/// Obtain an instance via [`Dialect::get`] or [`Dialect::get_by_name`].
3712/// The struct is `Send + Sync` safe so it can be shared across threads.
3713pub struct Dialect {
3714 dialect_type: DialectType,
3715 tokenizer: Tokenizer,
3716 #[cfg(feature = "generate")]
3717 generator_config: Arc<GeneratorConfig>,
3718 #[cfg(feature = "transpile")]
3719 transformer: Box<dyn Fn(Expression) -> Result<Expression> + Send + Sync>,
3720 /// Optional function to get expression-specific generator config (for hybrid dialects like Athena).
3721 #[cfg(feature = "generate")]
3722 generator_config_for_expr: Option<Box<dyn Fn(&Expression) -> GeneratorConfig + Send + Sync>>,
3723 /// Optional custom preprocessing function (overrides built-in preprocess for custom dialects).
3724 #[cfg(feature = "transpile")]
3725 custom_preprocess: Option<Box<dyn Fn(Expression) -> Result<Expression> + Send + Sync>>,
3726}
3727
3728/// Options for [`Dialect::transpile_with`].
3729///
3730/// Use [`TranspileOptions::default`] for defaults, then tweak the fields you need.
3731/// The struct is marked `#[non_exhaustive]` so new fields can be added without
3732/// breaking the API.
3733///
3734/// The struct derives `Serialize`/`Deserialize` using camelCase field names so
3735/// it can be round-tripped over JSON bridges (C FFI, WASM) without mapping.
3736#[cfg(feature = "transpile")]
3737#[derive(Debug, Clone, Serialize, Deserialize)]
3738#[serde(rename_all = "camelCase", default)]
3739#[non_exhaustive]
3740pub struct TranspileOptions {
3741 /// Whether to pretty-print the output SQL.
3742 pub pretty: bool,
3743 /// How unsupported target-dialect constructs should be handled.
3744 ///
3745 /// The default is [`UnsupportedLevel::Warn`], which preserves the current
3746 /// compatibility behavior and continues transpilation.
3747 pub unsupported_level: UnsupportedLevel,
3748 /// Maximum number of unsupported diagnostics to include in raised errors.
3749 pub max_unsupported: usize,
3750}
3751
3752#[cfg(feature = "transpile")]
3753impl Default for TranspileOptions {
3754 fn default() -> Self {
3755 Self {
3756 pretty: false,
3757 unsupported_level: UnsupportedLevel::Warn,
3758 max_unsupported: 3,
3759 }
3760 }
3761}
3762
3763#[cfg(feature = "transpile")]
3764impl TranspileOptions {
3765 /// Construct options with pretty-printing enabled.
3766 pub fn pretty() -> Self {
3767 Self {
3768 pretty: true,
3769 ..Default::default()
3770 }
3771 }
3772
3773 /// Construct options that raise when known unsupported constructs remain.
3774 pub fn strict() -> Self {
3775 Self {
3776 unsupported_level: UnsupportedLevel::Raise,
3777 ..Default::default()
3778 }
3779 }
3780
3781 /// Set how unsupported target-dialect constructs should be handled.
3782 pub fn with_unsupported_level(mut self, level: UnsupportedLevel) -> Self {
3783 self.unsupported_level = level;
3784 self
3785 }
3786
3787 /// Set the maximum number of unsupported diagnostics to include in raised errors.
3788 pub fn with_max_unsupported(mut self, max: usize) -> Self {
3789 self.max_unsupported = max;
3790 self
3791 }
3792}
3793
3794/// A value that can be used as the target dialect in [`Dialect::transpile`] /
3795/// [`Dialect::transpile_with`].
3796///
3797/// Implemented for [`DialectType`] (built-in dialect enum) and `&Dialect` (any
3798/// dialect handle, including custom ones). End users do not normally need to
3799/// implement this trait themselves.
3800#[cfg(feature = "transpile")]
3801pub trait TranspileTarget {
3802 /// Invoke `f` with a reference to the resolved target dialect.
3803 fn with_dialect<R>(self, f: impl FnOnce(&Dialect) -> R) -> R;
3804}
3805
3806#[cfg(feature = "transpile")]
3807impl TranspileTarget for DialectType {
3808 fn with_dialect<R>(self, f: impl FnOnce(&Dialect) -> R) -> R {
3809 f(&Dialect::get(self))
3810 }
3811}
3812
3813#[cfg(feature = "transpile")]
3814impl TranspileTarget for &Dialect {
3815 fn with_dialect<R>(self, f: impl FnOnce(&Dialect) -> R) -> R {
3816 f(self)
3817 }
3818}
3819
3820impl Dialect {
3821 /// Creates a fully configured [`Dialect`] instance for the given [`DialectType`].
3822 ///
3823 /// This is the primary constructor. It initializes the tokenizer, generator config,
3824 /// and expression transformer based on the dialect's [`DialectImpl`] implementation.
3825 /// For hybrid dialects like Athena, it also sets up expression-specific generator
3826 /// config routing.
3827 pub fn get(dialect_type: DialectType) -> Self {
3828 let configs = configs_for_dialect_type(dialect_type);
3829 let tokenizer_config = configs.tokenizer_config;
3830 #[cfg(feature = "generate")]
3831 let generator_config = configs.generator_config;
3832 #[cfg(feature = "transpile")]
3833 let transformer = configs.transformer;
3834
3835 // Set up expression-specific generator config for hybrid dialects
3836 #[cfg(feature = "generate")]
3837 let generator_config_for_expr: Option<
3838 Box<dyn Fn(&Expression) -> GeneratorConfig + Send + Sync>,
3839 > = match dialect_type {
3840 #[cfg(feature = "dialect-athena")]
3841 DialectType::Athena => Some(Box::new(|expr| {
3842 AthenaDialect.generator_config_for_expr(expr)
3843 })),
3844 _ => None,
3845 };
3846
3847 Self {
3848 dialect_type,
3849 tokenizer: Tokenizer::new(tokenizer_config),
3850 #[cfg(feature = "generate")]
3851 generator_config,
3852 #[cfg(feature = "transpile")]
3853 transformer,
3854 #[cfg(feature = "generate")]
3855 generator_config_for_expr,
3856 #[cfg(feature = "transpile")]
3857 custom_preprocess: None,
3858 }
3859 }
3860
3861 /// Look up a dialect by string name.
3862 ///
3863 /// Checks built-in dialect names first (via [`DialectType::from_str`]), then
3864 /// falls back to the custom dialect registry. Returns `None` if no dialect
3865 /// with the given name exists.
3866 pub fn get_by_name(name: &str) -> Option<Self> {
3867 // Try built-in first
3868 if let Ok(dt) = DialectType::from_str(name) {
3869 return Some(Self::get(dt));
3870 }
3871
3872 // Try custom registry
3873 let config = get_custom_dialect_config(name)?;
3874 Some(Self::from_custom_config(&config))
3875 }
3876
3877 /// Construct a `Dialect` from a custom dialect configuration.
3878 fn from_custom_config(config: &CustomDialectConfig) -> Self {
3879 // Build the transformer: use custom if provided, else use base dialect's
3880 #[cfg(feature = "transpile")]
3881 let transformer: Box<dyn Fn(Expression) -> Result<Expression> + Send + Sync> =
3882 if let Some(ref custom_transform) = config.transform {
3883 let t = Arc::clone(custom_transform);
3884 Box::new(move |e| t(e))
3885 } else {
3886 configs_for_dialect_type(config.base_dialect).transformer
3887 };
3888
3889 // Build the custom preprocess: use custom if provided
3890 #[cfg(feature = "transpile")]
3891 let custom_preprocess: Option<
3892 Box<dyn Fn(Expression) -> Result<Expression> + Send + Sync>,
3893 > = config.preprocess.as_ref().map(|p| {
3894 let p = Arc::clone(p);
3895 Box::new(move |e: Expression| p(e))
3896 as Box<dyn Fn(Expression) -> Result<Expression> + Send + Sync>
3897 });
3898
3899 Self {
3900 dialect_type: config.base_dialect,
3901 tokenizer: Tokenizer::new(config.tokenizer_config.clone()),
3902 #[cfg(feature = "generate")]
3903 generator_config: Arc::new(config.generator_config.clone()),
3904 #[cfg(feature = "transpile")]
3905 transformer,
3906 #[cfg(feature = "generate")]
3907 generator_config_for_expr: None,
3908 #[cfg(feature = "transpile")]
3909 custom_preprocess,
3910 }
3911 }
3912
3913 /// Get the dialect type
3914 pub fn dialect_type(&self) -> DialectType {
3915 self.dialect_type
3916 }
3917
3918 /// Get the generator configuration
3919 #[cfg(feature = "generate")]
3920 pub fn generator_config(&self) -> &GeneratorConfig {
3921 &self.generator_config
3922 }
3923
3924 /// Parses a SQL string into a list of [`Expression`] AST nodes.
3925 ///
3926 /// The input may contain multiple semicolon-separated statements; each one
3927 /// produces a separate element in the returned vector. Tokenization uses
3928 /// this dialect's configured tokenizer, and parsing uses the dialect-aware parser.
3929 pub fn parse(&self, sql: &str) -> Result<Vec<Expression>> {
3930 let tokens = self.tokenizer.tokenize(sql)?;
3931 let config = crate::parser::ParserConfig {
3932 dialect: Some(self.dialect_type),
3933 ..Default::default()
3934 };
3935 let mut parser = Parser::with_source(tokens, config, sql.to_string());
3936 parser.parse()
3937 }
3938
3939 /// Parse a standalone SQL data type using this dialect's tokenizer and parser.
3940 ///
3941 /// This accepts type strings such as `DECIMAL(10, 2)`, `INT[]`, or
3942 /// `STRUCT(a INT, b VARCHAR)` without requiring a surrounding statement.
3943 pub fn parse_data_type(&self, sql: &str) -> Result<DataType> {
3944 let tokens = self.tokenizer.tokenize(sql)?;
3945 let config = crate::parser::ParserConfig {
3946 dialect: Some(self.dialect_type),
3947 ..Default::default()
3948 };
3949 let mut parser = Parser::with_source(tokens, config, sql.to_string());
3950 parser.parse_standalone_data_type()
3951 }
3952
3953 /// Tokenize SQL using this dialect's tokenizer configuration.
3954 pub fn tokenize(&self, sql: &str) -> Result<Vec<Token>> {
3955 self.tokenizer.tokenize(sql)
3956 }
3957
3958 /// Get the generator config for a specific expression (supports hybrid dialects).
3959 /// Returns an owned `GeneratorConfig` suitable for mutation before generation.
3960 #[cfg(feature = "generate")]
3961 fn get_config_for_expr(&self, expr: &Expression) -> GeneratorConfig {
3962 if let Some(ref config_fn) = self.generator_config_for_expr {
3963 config_fn(expr)
3964 } else {
3965 (*self.generator_config).clone()
3966 }
3967 }
3968
3969 /// Generates a SQL string from an [`Expression`] AST node.
3970 ///
3971 /// The output uses this dialect's generator configuration for identifier quoting,
3972 /// keyword casing, function name normalization, and syntax style. The result is
3973 /// a single-line (non-pretty) SQL string.
3974 #[cfg(feature = "generate")]
3975 pub fn generate(&self, expr: &Expression) -> Result<String> {
3976 // Fast path: when no per-expression config override, share the Arc cheaply.
3977 if self.generator_config_for_expr.is_none() {
3978 let mut generator = Generator::with_arc_config(self.generator_config.clone());
3979 return generator.generate(expr);
3980 }
3981 let config = self.get_config_for_expr(expr);
3982 let mut generator = Generator::with_config(config);
3983 generator.generate(expr)
3984 }
3985
3986 /// Generate SQL from an expression with pretty printing enabled
3987 #[cfg(feature = "generate")]
3988 pub fn generate_pretty(&self, expr: &Expression) -> Result<String> {
3989 let mut config = self.get_config_for_expr(expr);
3990 config.pretty = true;
3991 let mut generator = Generator::with_config(config);
3992 generator.generate(expr)
3993 }
3994
3995 /// Generate SQL from an expression with source dialect info (for transpilation)
3996 #[cfg(feature = "generate")]
3997 pub fn generate_with_source(&self, expr: &Expression, source: DialectType) -> Result<String> {
3998 let mut config = self.get_config_for_expr(expr);
3999 config.source_dialect = Some(source);
4000 let mut generator = Generator::with_config(config);
4001 generator.generate(expr)
4002 }
4003
4004 /// Generate SQL from an expression with pretty printing and source dialect info
4005 #[cfg(feature = "generate")]
4006 pub fn generate_pretty_with_source(
4007 &self,
4008 expr: &Expression,
4009 source: DialectType,
4010 ) -> Result<String> {
4011 let mut config = self.get_config_for_expr(expr);
4012 config.pretty = true;
4013 config.source_dialect = Some(source);
4014 let mut generator = Generator::with_config(config);
4015 generator.generate(expr)
4016 }
4017
4018 /// Generate SQL from an expression with source dialect and transpile options.
4019 #[cfg(all(feature = "generate", feature = "transpile"))]
4020 fn generate_with_transpile_options(
4021 &self,
4022 expr: &Expression,
4023 source: DialectType,
4024 opts: &TranspileOptions,
4025 ) -> Result<String> {
4026 let mut config = self.get_config_for_expr(expr);
4027 config.source_dialect = Some(source);
4028 config.pretty = opts.pretty;
4029 config.unsupported_level = opts.unsupported_level;
4030 config.max_unsupported = opts.max_unsupported.max(1);
4031 let mut generator = Generator::with_config(config);
4032 generator.generate(expr)
4033 }
4034
4035 /// Generate SQL from an expression with forced identifier quoting (identify=True)
4036 #[cfg(feature = "generate")]
4037 pub fn generate_with_identify(&self, expr: &Expression) -> Result<String> {
4038 let mut config = self.get_config_for_expr(expr);
4039 config.always_quote_identifiers = true;
4040 let mut generator = Generator::with_config(config);
4041 generator.generate(expr)
4042 }
4043
4044 /// Generate SQL from an expression with pretty printing and forced identifier quoting
4045 #[cfg(feature = "generate")]
4046 pub fn generate_pretty_with_identify(&self, expr: &Expression) -> Result<String> {
4047 let mut config = (*self.generator_config).clone();
4048 config.pretty = true;
4049 config.always_quote_identifiers = true;
4050 let mut generator = Generator::with_config(config);
4051 generator.generate(expr)
4052 }
4053
4054 /// Generate SQL from an expression with caller-specified config overrides
4055 #[cfg(feature = "generate")]
4056 pub fn generate_with_overrides(
4057 &self,
4058 expr: &Expression,
4059 overrides: impl FnOnce(&mut GeneratorConfig),
4060 ) -> Result<String> {
4061 let mut config = self.get_config_for_expr(expr);
4062 overrides(&mut config);
4063 let mut generator = Generator::with_config(config);
4064 generator.generate(expr)
4065 }
4066
4067 /// Transforms an expression tree to conform to this dialect's syntax and semantics.
4068 ///
4069 /// The transformation proceeds in two phases:
4070 /// 1. **Preprocessing** -- whole-tree structural rewrites such as eliminating QUALIFY,
4071 /// ensuring boolean predicates, or converting DISTINCT ON to a window-function pattern.
4072 /// 2. **Recursive per-node transform** -- a bottom-up pass via [`transform_recursive`]
4073 /// that applies this dialect's [`DialectImpl::transform_expr`] to every node.
4074 ///
4075 /// This method is used both during transpilation (to rewrite an AST for a target dialect)
4076 /// and for identity transforms (normalizing SQL within the same dialect).
4077 #[cfg(feature = "transpile")]
4078 pub fn transform(&self, expr: Expression) -> Result<Expression> {
4079 // Apply preprocessing transforms based on dialect
4080 let preprocessed = self.preprocess(expr)?;
4081 // Then apply recursive transformation
4082 transform_recursive(preprocessed, &self.transformer)
4083 }
4084
4085 /// Apply dialect-specific preprocessing transforms
4086 #[cfg(feature = "transpile")]
4087 fn preprocess(&self, expr: Expression) -> Result<Expression> {
4088 // If a custom preprocess function is set, use it instead of the built-in logic
4089 if let Some(ref custom_preprocess) = self.custom_preprocess {
4090 return custom_preprocess(expr);
4091 }
4092
4093 #[cfg(any(
4094 feature = "dialect-mysql",
4095 feature = "dialect-postgresql",
4096 feature = "dialect-bigquery",
4097 feature = "dialect-snowflake",
4098 feature = "dialect-tsql",
4099 feature = "dialect-spark",
4100 feature = "dialect-databricks",
4101 feature = "dialect-hive",
4102 feature = "dialect-sqlite",
4103 feature = "dialect-trino",
4104 feature = "dialect-presto",
4105 feature = "dialect-duckdb",
4106 feature = "dialect-redshift",
4107 feature = "dialect-starrocks",
4108 feature = "dialect-oracle",
4109 feature = "dialect-clickhouse",
4110 ))]
4111 use crate::transforms;
4112
4113 match self.dialect_type {
4114 // MySQL doesn't support QUALIFY, DISTINCT ON, FULL OUTER JOIN
4115 // MySQL doesn't natively support GENERATE_DATE_ARRAY (expand to recursive CTE)
4116 #[cfg(feature = "dialect-mysql")]
4117 DialectType::MySQL => {
4118 let expr = transforms::eliminate_qualify(expr)?;
4119 let expr = transforms::eliminate_full_outer_join(expr)?;
4120 let expr = transforms::eliminate_semi_and_anti_joins(expr)?;
4121 let expr = transforms::unnest_generate_date_array_using_recursive_cte(expr)?;
4122 Ok(expr)
4123 }
4124 // PostgreSQL doesn't support QUALIFY
4125 // PostgreSQL: UNNEST(GENERATE_SERIES) -> subquery wrapping
4126 // PostgreSQL: Normalize SET ... TO to SET ... = in CREATE FUNCTION
4127 #[cfg(feature = "dialect-postgresql")]
4128 DialectType::PostgreSQL => {
4129 let expr = transforms::eliminate_qualify(expr)?;
4130 let expr = transforms::eliminate_semi_and_anti_joins(expr)?;
4131 let expr = transforms::unwrap_unnest_generate_series_for_postgres(expr)?;
4132 // Normalize SET ... TO to SET ... = in CREATE FUNCTION
4133 // Only normalize when sqlglot would fully parse (no body) —
4134 // sqlglot falls back to Command for complex function bodies,
4135 // preserving the original text including TO.
4136 let expr = if let Expression::CreateFunction(mut cf) = expr {
4137 if cf.body.is_none() {
4138 for opt in &mut cf.set_options {
4139 if let crate::expressions::FunctionSetValue::Value { use_to, .. } =
4140 &mut opt.value
4141 {
4142 *use_to = false;
4143 }
4144 }
4145 }
4146 Expression::CreateFunction(cf)
4147 } else {
4148 expr
4149 };
4150 Ok(expr)
4151 }
4152 // BigQuery doesn't support DISTINCT ON or CTE column aliases
4153 #[cfg(feature = "dialect-bigquery")]
4154 DialectType::BigQuery => {
4155 let expr = transforms::eliminate_semi_and_anti_joins(expr)?;
4156 let expr = transforms::pushdown_cte_column_names(expr)?;
4157 let expr = transforms::explode_projection_to_unnest(expr, DialectType::BigQuery)?;
4158 Ok(expr)
4159 }
4160 // Snowflake
4161 #[cfg(feature = "dialect-snowflake")]
4162 DialectType::Snowflake => {
4163 let expr = transforms::eliminate_semi_and_anti_joins(expr)?;
4164 let expr = transforms::eliminate_window_clause(expr)?;
4165 let expr = transforms::snowflake_flatten_projection_to_unnest(expr)?;
4166 Ok(expr)
4167 }
4168 // TSQL doesn't support QUALIFY
4169 // TSQL requires boolean expressions in WHERE/HAVING (no implicit truthiness)
4170 // TSQL doesn't support CTEs in subqueries (hoist to top level)
4171 // NOTE: no_limit_order_by_union is handled in cross_dialect_normalize (not preprocess)
4172 // to avoid breaking TSQL identity tests where ORDER BY on UNION is valid
4173 #[cfg(feature = "dialect-tsql")]
4174 DialectType::TSQL => {
4175 let expr = transforms::eliminate_qualify(expr)?;
4176 let expr = transforms::eliminate_semi_and_anti_joins(expr)?;
4177 let expr = transforms::ensure_bools(expr)?;
4178 let expr = transforms::unnest_generate_date_array_using_recursive_cte(expr)?;
4179 let expr = transforms::move_ctes_to_top_level(expr)?;
4180 let expr = transforms::qualify_derived_table_outputs(expr)?;
4181 Ok(expr)
4182 }
4183 // Spark doesn't support QUALIFY (but Databricks does)
4184 // Spark doesn't support CTEs in subqueries (hoist to top level)
4185 #[cfg(feature = "dialect-spark")]
4186 DialectType::Spark => {
4187 let expr = transforms::eliminate_qualify(expr)?;
4188 let expr = transforms::add_auto_table_alias(expr)?;
4189 let expr = transforms::simplify_nested_paren_values(expr)?;
4190 let expr = transforms::move_ctes_to_top_level(expr)?;
4191 Ok(expr)
4192 }
4193 // Databricks supports QUALIFY natively
4194 // Databricks doesn't support CTEs in subqueries (hoist to top level)
4195 #[cfg(feature = "dialect-databricks")]
4196 DialectType::Databricks => {
4197 let expr = transforms::add_auto_table_alias(expr)?;
4198 let expr = transforms::simplify_nested_paren_values(expr)?;
4199 let expr = transforms::move_ctes_to_top_level(expr)?;
4200 Ok(expr)
4201 }
4202 // Hive doesn't support QUALIFY or CTEs in subqueries
4203 #[cfg(feature = "dialect-hive")]
4204 DialectType::Hive => {
4205 let expr = transforms::eliminate_qualify(expr)?;
4206 let expr = transforms::move_ctes_to_top_level(expr)?;
4207 Ok(expr)
4208 }
4209 // SQLite doesn't support QUALIFY
4210 #[cfg(feature = "dialect-sqlite")]
4211 DialectType::SQLite => {
4212 let expr = transforms::eliminate_qualify(expr)?;
4213 Ok(expr)
4214 }
4215 // Trino doesn't support QUALIFY
4216 #[cfg(feature = "dialect-trino")]
4217 DialectType::Trino => {
4218 let expr = transforms::eliminate_qualify(expr)?;
4219 let expr = transforms::explode_projection_to_unnest(expr, DialectType::Trino)?;
4220 Ok(expr)
4221 }
4222 // Presto doesn't support QUALIFY or WINDOW clause
4223 #[cfg(feature = "dialect-presto")]
4224 DialectType::Presto => {
4225 let expr = transforms::eliminate_qualify(expr)?;
4226 let expr = transforms::eliminate_window_clause(expr)?;
4227 let expr = transforms::explode_projection_to_unnest(expr, DialectType::Presto)?;
4228 Ok(expr)
4229 }
4230 // DuckDB supports QUALIFY - no elimination needed
4231 // Expand POSEXPLODE to GENERATE_SUBSCRIPTS + UNNEST
4232 // Expand LIKE ANY / ILIKE ANY to OR chains (DuckDB doesn't support quantifiers)
4233 #[cfg(feature = "dialect-duckdb")]
4234 DialectType::DuckDB => {
4235 let expr = transforms::expand_posexplode_duckdb(expr)?;
4236 let expr = transforms::expand_like_any(expr)?;
4237 Ok(expr)
4238 }
4239 // Redshift doesn't support QUALIFY, WINDOW clause, or GENERATE_DATE_ARRAY
4240 #[cfg(feature = "dialect-redshift")]
4241 DialectType::Redshift => {
4242 let expr = transforms::eliminate_qualify(expr)?;
4243 let expr = transforms::eliminate_window_clause(expr)?;
4244 let expr = transforms::unnest_generate_date_array_using_recursive_cte(expr)?;
4245 Ok(expr)
4246 }
4247 // StarRocks doesn't support BETWEEN in DELETE statements or QUALIFY
4248 #[cfg(feature = "dialect-starrocks")]
4249 DialectType::StarRocks => {
4250 let expr = transforms::eliminate_qualify(expr)?;
4251 let expr = transforms::expand_between_in_delete(expr)?;
4252 let expr = transforms::eliminate_distinct_on_for_dialect(
4253 expr,
4254 Some(DialectType::StarRocks),
4255 Some(DialectType::StarRocks),
4256 )?;
4257 let expr = transforms::unnest_generate_date_array_using_recursive_cte(expr)?;
4258 Ok(expr)
4259 }
4260 // DataFusion supports QUALIFY and semi/anti joins natively
4261 #[cfg(feature = "dialect-datafusion")]
4262 DialectType::DataFusion => Ok(expr),
4263 // Oracle doesn't support QUALIFY
4264 #[cfg(feature = "dialect-oracle")]
4265 DialectType::Oracle => {
4266 let expr = transforms::eliminate_qualify(expr)?;
4267 Ok(expr)
4268 }
4269 // Drill - no special preprocessing needed
4270 #[cfg(feature = "dialect-drill")]
4271 DialectType::Drill => Ok(expr),
4272 // Teradata - no special preprocessing needed
4273 #[cfg(feature = "dialect-teradata")]
4274 DialectType::Teradata => Ok(expr),
4275 // ClickHouse doesn't support ORDER BY/LIMIT directly on UNION
4276 #[cfg(feature = "dialect-clickhouse")]
4277 DialectType::ClickHouse => {
4278 let expr = transforms::no_limit_order_by_union(expr)?;
4279 Ok(expr)
4280 }
4281 // Other dialects - no preprocessing
4282 _ => Ok(expr),
4283 }
4284 }
4285
4286 /// Transpile SQL from this dialect to the given target dialect.
4287 ///
4288 /// The target may be specified as either a built-in [`DialectType`] enum variant
4289 /// or as a reference to a [`Dialect`] handle (built-in or custom). Both work:
4290 ///
4291 /// ```rust,ignore
4292 /// let pg = Dialect::get(DialectType::PostgreSQL);
4293 /// pg.transpile("SELECT NOW()", DialectType::BigQuery)?; // enum
4294 /// pg.transpile("SELECT NOW()", &custom_dialect)?; // handle
4295 /// ```
4296 ///
4297 /// For pretty-printing or other options, use [`transpile_with`](Self::transpile_with).
4298 #[cfg(feature = "transpile")]
4299 pub fn transpile<T: TranspileTarget>(&self, sql: &str, target: T) -> Result<Vec<String>> {
4300 self.transpile_with(sql, target, TranspileOptions::default())
4301 }
4302
4303 /// Transpile SQL with configurable [`TranspileOptions`] (e.g. pretty-printing).
4304 #[cfg(feature = "transpile")]
4305 pub fn transpile_with<T: TranspileTarget>(
4306 &self,
4307 sql: &str,
4308 target: T,
4309 opts: TranspileOptions,
4310 ) -> Result<Vec<String>> {
4311 target.with_dialect(|td| self.transpile_inner(sql, td, &opts))
4312 }
4313
4314 #[cfg(feature = "transpile")]
4315 fn transpile_inner(
4316 &self,
4317 sql: &str,
4318 target_dialect: &Dialect,
4319 opts: &TranspileOptions,
4320 ) -> Result<Vec<String>> {
4321 let target = target_dialect.dialect_type;
4322 if matches!(self.dialect_type, DialectType::PostgreSQL)
4323 && matches!(target, DialectType::SQLite)
4324 {
4325 self.reject_pgvector_distance_operators_for_sqlite(sql)?;
4326 }
4327 let expressions = self.parse(sql)?;
4328 let generic_identity =
4329 self.dialect_type == DialectType::Generic && target == DialectType::Generic;
4330
4331 if generic_identity {
4332 return expressions
4333 .into_iter()
4334 .map(|expr| {
4335 Self::reject_strict_unsupported(&expr, self.dialect_type, target, opts)?;
4336 target_dialect.generate_with_transpile_options(&expr, self.dialect_type, opts)
4337 })
4338 .collect();
4339 }
4340
4341 expressions
4342 .into_iter()
4343 .map(|expr| {
4344 // DuckDB source: normalize VARCHAR/CHAR to TEXT (DuckDB doesn't support
4345 // VARCHAR length constraints). This emulates Python sqlglot's DuckDB parser
4346 // where VARCHAR_LENGTH = None and VARCHAR maps to TEXT.
4347 let expr = if matches!(self.dialect_type, DialectType::DuckDB) {
4348 use crate::expressions::DataType as DT;
4349 transform_recursive(expr, &|e| match e {
4350 Expression::DataType(DT::VarChar { .. }) => {
4351 Ok(Expression::DataType(DT::Text))
4352 }
4353 Expression::DataType(DT::Char { .. }) => Ok(Expression::DataType(DT::Text)),
4354 _ => Ok(e),
4355 })?
4356 } else {
4357 expr
4358 };
4359
4360 // When source and target differ, first normalize the source dialect's
4361 // AST constructs to standard SQL, so that the target dialect can handle them.
4362 // This handles cases like Snowflake's SQUARE -> POWER, DIV0 -> CASE, etc.
4363 let normalized =
4364 if self.dialect_type != target && self.dialect_type != DialectType::Generic {
4365 self.transform(expr)?
4366 } else {
4367 expr
4368 };
4369
4370 // For TSQL source targeting non-TSQL: unwrap ISNULL(JSON_QUERY(...), JSON_VALUE(...))
4371 // to just JSON_QUERY(...) so cross_dialect_normalize can convert it cleanly.
4372 // The TSQL read transform wraps JsonQuery in ISNULL for identity, but for
4373 // cross-dialect transpilation we need the unwrapped JSON_QUERY.
4374 let normalized =
4375 if matches!(self.dialect_type, DialectType::TSQL | DialectType::Fabric)
4376 && !matches!(target, DialectType::TSQL | DialectType::Fabric)
4377 {
4378 transform_recursive(normalized, &|e| {
4379 if let Expression::Function(ref f) = e {
4380 if f.name.eq_ignore_ascii_case("ISNULL") && f.args.len() == 2 {
4381 // Check if first arg is JSON_QUERY and second is JSON_VALUE
4382 if let (
4383 Expression::Function(ref jq),
4384 Expression::Function(ref jv),
4385 ) = (&f.args[0], &f.args[1])
4386 {
4387 if jq.name.eq_ignore_ascii_case("JSON_QUERY")
4388 && jv.name.eq_ignore_ascii_case("JSON_VALUE")
4389 {
4390 // Unwrap: return just JSON_QUERY(...)
4391 return Ok(f.args[0].clone());
4392 }
4393 }
4394 }
4395 }
4396 Ok(e)
4397 })?
4398 } else {
4399 normalized
4400 };
4401
4402 // Snowflake source to non-Snowflake target: CURRENT_TIME -> LOCALTIME
4403 // Snowflake's CURRENT_TIME is equivalent to LOCALTIME in other dialects.
4404 // Python sqlglot parses Snowflake's CURRENT_TIME as Localtime expression.
4405 let normalized = if matches!(self.dialect_type, DialectType::Snowflake)
4406 && !matches!(target, DialectType::Snowflake)
4407 {
4408 transform_recursive(normalized, &|e| {
4409 if let Expression::Function(ref f) = e {
4410 if f.name.eq_ignore_ascii_case("CURRENT_TIME") {
4411 return Ok(Expression::Localtime(Box::new(
4412 crate::expressions::Localtime { this: None },
4413 )));
4414 }
4415 }
4416 Ok(e)
4417 })?
4418 } else {
4419 normalized
4420 };
4421
4422 // Snowflake source to DuckDB target: REPEAT(' ', n) -> REPEAT(' ', CAST(n AS BIGINT))
4423 // Snowflake's SPACE(n) is converted to REPEAT(' ', n) by the Snowflake source
4424 // transform. DuckDB requires the count argument to be BIGINT.
4425 let normalized = if matches!(self.dialect_type, DialectType::Snowflake)
4426 && matches!(target, DialectType::DuckDB)
4427 {
4428 transform_recursive(normalized, &|e| {
4429 if let Expression::Function(ref f) = e {
4430 if f.name.eq_ignore_ascii_case("REPEAT") && f.args.len() == 2 {
4431 // Check if first arg is space string literal
4432 if let Expression::Literal(ref lit) = f.args[0] {
4433 if let crate::expressions::Literal::String(ref s) = lit.as_ref()
4434 {
4435 if s == " " {
4436 // Wrap second arg in CAST(... AS BIGINT) if not already
4437 if !matches!(f.args[1], Expression::Cast(_)) {
4438 let mut new_args = f.args.clone();
4439 new_args[1] = Expression::Cast(Box::new(
4440 crate::expressions::Cast {
4441 this: new_args[1].clone(),
4442 to: crate::expressions::DataType::BigInt {
4443 length: None,
4444 },
4445 trailing_comments: Vec::new(),
4446 double_colon_syntax: false,
4447 format: None,
4448 default: None,
4449 inferred_type: None,
4450 },
4451 ));
4452 return Ok(Expression::Function(Box::new(
4453 crate::expressions::Function {
4454 name: f.name.clone(),
4455 args: new_args,
4456 distinct: f.distinct,
4457 trailing_comments: f
4458 .trailing_comments
4459 .clone(),
4460 use_bracket_syntax: f.use_bracket_syntax,
4461 no_parens: f.no_parens,
4462 quoted: f.quoted,
4463 span: None,
4464 inferred_type: None,
4465 },
4466 )));
4467 }
4468 }
4469 }
4470 }
4471 }
4472 }
4473 Ok(e)
4474 })?
4475 } else {
4476 normalized
4477 };
4478
4479 // Propagate struct field names in arrays (for BigQuery source to non-BigQuery target)
4480 // BigQuery->BigQuery should NOT propagate names (BigQuery handles implicit inheritance)
4481 let normalized = if matches!(self.dialect_type, DialectType::BigQuery)
4482 && !matches!(target, DialectType::BigQuery)
4483 {
4484 crate::transforms::propagate_struct_field_names(normalized)?
4485 } else {
4486 normalized
4487 };
4488
4489 // Snowflake source to DuckDB target: RANDOM()/RANDOM(seed) -> scaled RANDOM()
4490 // Snowflake RANDOM() returns integer in [-2^63, 2^63-1], DuckDB RANDOM() returns float [0, 1)
4491 // Skip RANDOM inside UNIFORM/NORMAL/ZIPF/RANDSTR generator args since those
4492 // functions handle their generator args differently (as float seeds).
4493 let normalized = if matches!(self.dialect_type, DialectType::Snowflake)
4494 && matches!(target, DialectType::DuckDB)
4495 {
4496 fn make_scaled_random() -> Expression {
4497 let lower =
4498 Expression::Literal(Box::new(crate::expressions::Literal::Number(
4499 "-9.223372036854776E+18".to_string(),
4500 )));
4501 let upper =
4502 Expression::Literal(Box::new(crate::expressions::Literal::Number(
4503 "9.223372036854776e+18".to_string(),
4504 )));
4505 let random_call = Expression::Random(crate::expressions::Random);
4506 let range_size = Expression::Paren(Box::new(crate::expressions::Paren {
4507 this: Expression::Sub(Box::new(crate::expressions::BinaryOp {
4508 left: upper,
4509 right: lower.clone(),
4510 left_comments: vec![],
4511 operator_comments: vec![],
4512 trailing_comments: vec![],
4513 inferred_type: None,
4514 })),
4515 trailing_comments: vec![],
4516 }));
4517 let scaled = Expression::Mul(Box::new(crate::expressions::BinaryOp {
4518 left: random_call,
4519 right: range_size,
4520 left_comments: vec![],
4521 operator_comments: vec![],
4522 trailing_comments: vec![],
4523 inferred_type: None,
4524 }));
4525 let shifted = Expression::Add(Box::new(crate::expressions::BinaryOp {
4526 left: lower,
4527 right: scaled,
4528 left_comments: vec![],
4529 operator_comments: vec![],
4530 trailing_comments: vec![],
4531 inferred_type: None,
4532 }));
4533 Expression::Cast(Box::new(crate::expressions::Cast {
4534 this: shifted,
4535 to: crate::expressions::DataType::BigInt { length: None },
4536 trailing_comments: vec![],
4537 double_colon_syntax: false,
4538 format: None,
4539 default: None,
4540 inferred_type: None,
4541 }))
4542 }
4543
4544 // Pre-process: protect seeded RANDOM(seed) inside UNIFORM/NORMAL/ZIPF/RANDSTR
4545 // by converting Rand{seed: Some(s)} to Function{name:"RANDOM", args:[s]}.
4546 // This prevents transform_recursive (which is bottom-up) from expanding
4547 // seeded RANDOM into make_scaled_random() and losing the seed value.
4548 // Unseeded RANDOM()/Rand{seed:None} is left as-is so it gets expanded
4549 // and then un-expanded back to Expression::Random by the code below.
4550 let normalized = transform_recursive(normalized, &|e| {
4551 if let Expression::Function(ref f) = e {
4552 let n = f.name.to_ascii_uppercase();
4553 if n == "UNIFORM" || n == "NORMAL" || n == "ZIPF" || n == "RANDSTR" {
4554 if let Expression::Function(mut f) = e {
4555 for arg in f.args.iter_mut() {
4556 if let Expression::Rand(ref r) = arg {
4557 if r.lower.is_none() && r.upper.is_none() {
4558 if let Some(ref seed) = r.seed {
4559 // Convert Rand{seed: Some(s)} to Function("RANDOM", [s])
4560 // so it won't be expanded by the RANDOM expansion below
4561 *arg = Expression::Function(Box::new(
4562 crate::expressions::Function::new(
4563 "RANDOM".to_string(),
4564 vec![*seed.clone()],
4565 ),
4566 ));
4567 }
4568 }
4569 }
4570 }
4571 return Ok(Expression::Function(f));
4572 }
4573 }
4574 }
4575 Ok(e)
4576 })?;
4577
4578 // transform_recursive processes bottom-up, so RANDOM() (unseeded) inside
4579 // generator functions (UNIFORM, NORMAL, ZIPF) gets expanded before
4580 // we see the parent. We detect this and undo the expansion by replacing
4581 // the expanded pattern back with Expression::Random.
4582 // Seeded RANDOM(seed) was already protected above as Function("RANDOM", [seed]).
4583 // Note: RANDSTR is NOT included here — it needs the expanded form for unseeded
4584 // RANDOM() since the DuckDB handler uses the expanded SQL as-is in the hash.
4585 transform_recursive(normalized, &|e| {
4586 if let Expression::Function(ref f) = e {
4587 let n = f.name.to_ascii_uppercase();
4588 if n == "UNIFORM" || n == "NORMAL" || n == "ZIPF" {
4589 if let Expression::Function(mut f) = e {
4590 for arg in f.args.iter_mut() {
4591 // Detect expanded RANDOM pattern: CAST(-9.22... + RANDOM() * (...) AS BIGINT)
4592 if let Expression::Cast(ref cast) = arg {
4593 if matches!(
4594 cast.to,
4595 crate::expressions::DataType::BigInt { .. }
4596 ) {
4597 if let Expression::Add(ref add) = cast.this {
4598 if let Expression::Literal(ref lit) = add.left {
4599 if let crate::expressions::Literal::Number(
4600 ref num,
4601 ) = lit.as_ref()
4602 {
4603 if num == "-9.223372036854776E+18" {
4604 *arg = Expression::Random(
4605 crate::expressions::Random,
4606 );
4607 }
4608 }
4609 }
4610 }
4611 }
4612 }
4613 }
4614 return Ok(Expression::Function(f));
4615 }
4616 return Ok(e);
4617 }
4618 }
4619 match e {
4620 Expression::Random(_) => Ok(make_scaled_random()),
4621 // Rand(seed) with no bounds: drop seed and expand
4622 // (DuckDB RANDOM doesn't support seeds)
4623 Expression::Rand(ref r) if r.lower.is_none() && r.upper.is_none() => {
4624 Ok(make_scaled_random())
4625 }
4626 _ => Ok(e),
4627 }
4628 })?
4629 } else {
4630 normalized
4631 };
4632
4633 // Apply cross-dialect semantic normalizations
4634 let normalized =
4635 Self::cross_dialect_normalize(normalized, self.dialect_type, target)?;
4636
4637 let normalized = if matches!(self.dialect_type, DialectType::SQLite)
4638 && !matches!(target, DialectType::SQLite)
4639 {
4640 Self::normalize_sqlite_double_quoted_defaults(normalized)?
4641 } else {
4642 normalized
4643 };
4644
4645 let normalized = if matches!(self.dialect_type, DialectType::PostgreSQL)
4646 && matches!(target, DialectType::SQLite)
4647 {
4648 Self::normalize_postgres_to_sqlite_types(normalized)?
4649 } else {
4650 normalized
4651 };
4652
4653 let normalized = if matches!(self.dialect_type, DialectType::PostgreSQL)
4654 && matches!(target, DialectType::Fabric)
4655 {
4656 Self::normalize_postgres_to_fabric_decimal_types(normalized)?
4657 } else {
4658 normalized
4659 };
4660
4661 // For DuckDB target from BigQuery source: wrap UNNEST of struct arrays in
4662 // (SELECT UNNEST(..., max_depth => 2)) subquery
4663 // Must run BEFORE unnest_alias_to_column_alias since it changes alias structure
4664 let normalized = if matches!(self.dialect_type, DialectType::BigQuery)
4665 && matches!(target, DialectType::DuckDB)
4666 {
4667 crate::transforms::wrap_duckdb_unnest_struct(normalized)?
4668 } else {
4669 normalized
4670 };
4671
4672 // Convert BigQuery UNNEST aliases to column-alias format for DuckDB/Presto/Spark
4673 // UNNEST(arr) AS x -> UNNEST(arr) AS _t0(x)
4674 let normalized = if matches!(self.dialect_type, DialectType::BigQuery)
4675 && matches!(
4676 target,
4677 DialectType::DuckDB
4678 | DialectType::Presto
4679 | DialectType::Trino
4680 | DialectType::Athena
4681 | DialectType::Spark
4682 | DialectType::Databricks
4683 ) {
4684 crate::transforms::unnest_alias_to_column_alias(normalized)?
4685 } else if matches!(self.dialect_type, DialectType::BigQuery)
4686 && matches!(target, DialectType::BigQuery | DialectType::Redshift)
4687 {
4688 // For BigQuery/Redshift targets: move UNNEST FROM items to CROSS JOINs
4689 // but don't convert alias format (no _t0 wrapper)
4690 let result = crate::transforms::unnest_from_to_cross_join(normalized)?;
4691 // For Redshift: strip UNNEST when arg is a column reference path
4692 if matches!(target, DialectType::Redshift) {
4693 crate::transforms::strip_unnest_column_refs(result)?
4694 } else {
4695 result
4696 }
4697 } else {
4698 normalized
4699 };
4700
4701 // For Presto/Trino targets from PostgreSQL/Redshift source:
4702 // Wrap UNNEST aliases from GENERATE_SERIES conversion: AS s -> AS _u(s)
4703 let normalized = if matches!(
4704 self.dialect_type,
4705 DialectType::PostgreSQL | DialectType::Redshift
4706 ) && matches!(
4707 target,
4708 DialectType::Presto | DialectType::Trino | DialectType::Athena
4709 ) {
4710 crate::transforms::wrap_unnest_join_aliases(normalized)?
4711 } else {
4712 normalized
4713 };
4714
4715 // Eliminate DISTINCT ON with target-dialect awareness
4716 // This must happen after source transform (which may produce DISTINCT ON)
4717 // and before target transform, with knowledge of the target dialect's NULL ordering behavior
4718 let normalized = crate::transforms::eliminate_distinct_on_for_dialect(
4719 normalized,
4720 Some(target),
4721 Some(self.dialect_type),
4722 )?;
4723
4724 // GENERATE_DATE_ARRAY in UNNEST -> Snowflake ARRAY_GENERATE_RANGE + DATEADD
4725 let normalized = if matches!(target, DialectType::Snowflake) {
4726 Self::transform_generate_date_array_snowflake(normalized)?
4727 } else {
4728 normalized
4729 };
4730
4731 // CROSS JOIN UNNEST -> LATERAL VIEW EXPLODE/INLINE for Spark/Hive/Databricks
4732 let normalized = if matches!(
4733 target,
4734 DialectType::Spark | DialectType::Databricks | DialectType::Hive
4735 ) {
4736 crate::transforms::unnest_to_explode_select(normalized)?
4737 } else {
4738 normalized
4739 };
4740
4741 // Wrap UNION with ORDER BY/LIMIT in a subquery for dialects that require it
4742 let normalized = if matches!(target, DialectType::ClickHouse | DialectType::TSQL) {
4743 crate::transforms::no_limit_order_by_union(normalized)?
4744 } else {
4745 normalized
4746 };
4747
4748 // TSQL: Convert COUNT(*) -> COUNT_BIG(*) when source is not TSQL/Fabric
4749 // Python sqlglot does this in the TSQL generator, but we can't do it there
4750 // because it would break TSQL -> TSQL identity
4751 let normalized = if matches!(target, DialectType::TSQL | DialectType::Fabric)
4752 && !matches!(self.dialect_type, DialectType::TSQL | DialectType::Fabric)
4753 {
4754 transform_recursive(normalized, &|e| {
4755 if let Expression::Count(ref c) = e {
4756 // Build COUNT_BIG(...) as an AggregateFunction
4757 let args = if c.star {
4758 vec![Expression::Star(crate::expressions::Star {
4759 table: None,
4760 except: None,
4761 replace: None,
4762 rename: None,
4763 trailing_comments: Vec::new(),
4764 span: None,
4765 })]
4766 } else if let Some(ref this) = c.this {
4767 vec![this.clone()]
4768 } else {
4769 vec![]
4770 };
4771 Ok(Expression::AggregateFunction(Box::new(
4772 crate::expressions::AggregateFunction {
4773 name: "COUNT_BIG".to_string(),
4774 args,
4775 distinct: c.distinct,
4776 filter: c.filter.clone(),
4777 order_by: Vec::new(),
4778 limit: None,
4779 ignore_nulls: None,
4780 inferred_type: None,
4781 },
4782 )))
4783 } else {
4784 Ok(e)
4785 }
4786 })?
4787 } else {
4788 normalized
4789 };
4790
4791 // T-SQL/Fabric do not have a scalar boolean type. Keep predicate
4792 // contexts intact, but materialize boolean-valued expressions used
4793 // as values before target transforms add ORDER BY null sort keys.
4794 let normalized = if matches!(target, DialectType::TSQL | DialectType::Fabric)
4795 && !matches!(self.dialect_type, DialectType::TSQL | DialectType::Fabric)
4796 {
4797 Self::rewrite_boolean_values_for_tsql(normalized)?
4798 } else {
4799 normalized
4800 };
4801
4802 let transformed = target_dialect.transform(normalized)?;
4803
4804 // T-SQL and Fabric do not support aggregate FILTER clauses. Rewrite any
4805 // remaining filters after target transforms so special aggregate rewrites
4806 // (for example BOOL_OR/BOOL_AND) can consume their filters first.
4807 let transformed = if matches!(target, DialectType::TSQL | DialectType::Fabric) {
4808 Self::rewrite_aggregate_filters_for_tsql(transformed)?
4809 } else {
4810 transformed
4811 };
4812
4813 // DuckDB target: when FROM is RANGE(n), replace SEQ's ROW_NUMBER pattern with `range`
4814 let transformed = if matches!(target, DialectType::DuckDB) {
4815 Self::seq_rownum_to_range(transformed)?
4816 } else {
4817 transformed
4818 };
4819
4820 Self::reject_strict_unsupported(&transformed, self.dialect_type, target, opts)?;
4821
4822 let mut sql = target_dialect.generate_with_transpile_options(
4823 &transformed,
4824 self.dialect_type,
4825 opts,
4826 )?;
4827
4828 // Align a known Snowflake pretty-print edge case with Python sqlglot output.
4829 if opts.pretty && target == DialectType::Snowflake {
4830 sql = Self::normalize_snowflake_pretty(sql);
4831 }
4832
4833 Ok(sql)
4834 })
4835 .collect()
4836 }
4837}
4838
4839// Transpile-only methods: cross-dialect normalization and helpers
4840#[cfg(feature = "transpile")]
4841impl Dialect {
4842 fn reject_strict_unsupported(
4843 expr: &Expression,
4844 source: DialectType,
4845 target: DialectType,
4846 opts: &TranspileOptions,
4847 ) -> Result<()> {
4848 if !matches!(
4849 opts.unsupported_level,
4850 UnsupportedLevel::Raise | UnsupportedLevel::Immediate
4851 ) {
4852 return Ok(());
4853 }
4854
4855 let mut diagnostics = Vec::new();
4856
4857 for node in expr.dfs() {
4858 if matches!(target, DialectType::Fabric | DialectType::Hive)
4859 && Self::node_has_recursive_with(node)
4860 {
4861 Self::push_unsupported_diagnostic(&mut diagnostics, "recursive CTEs");
4862 }
4863
4864 if matches!(target, DialectType::TSQL | DialectType::Fabric)
4865 && Self::node_has_lateral(node)
4866 {
4867 Self::push_unsupported_diagnostic(&mut diagnostics, "LATERAL joins and subqueries");
4868 }
4869
4870 if !Self::target_supports_remaining_unnest(target) && Self::node_is_unnest(node) {
4871 Self::push_unsupported_diagnostic(&mut diagnostics, "UNNEST");
4872 }
4873
4874 if !Self::target_supports_remaining_explode(target) && Self::node_is_explode(node) {
4875 Self::push_unsupported_diagnostic(&mut diagnostics, "EXPLODE");
4876 }
4877
4878 if Self::target_lacks_array_agg(target) && Self::node_is_array_agg(node) {
4879 Self::push_unsupported_diagnostic(&mut diagnostics, "ARRAY_AGG");
4880 }
4881
4882 if matches!(source, DialectType::PostgreSQL | DialectType::CockroachDB)
4883 && !matches!(target, DialectType::PostgreSQL | DialectType::CockroachDB)
4884 {
4885 if Self::node_is_function_named(node, "JSONB_BUILD_OBJECT") {
4886 Self::push_unsupported_diagnostic(
4887 &mut diagnostics,
4888 "PostgreSQL JSONB_BUILD_OBJECT",
4889 );
4890 }
4891 if Self::node_is_function_named(node, "TO_TSVECTOR") {
4892 Self::push_unsupported_diagnostic(&mut diagnostics, "PostgreSQL TO_TSVECTOR");
4893 }
4894 }
4895
4896 if opts.unsupported_level == UnsupportedLevel::Immediate && !diagnostics.is_empty() {
4897 break;
4898 }
4899 }
4900
4901 if diagnostics.is_empty() {
4902 return Ok(());
4903 }
4904
4905 let limit = if opts.unsupported_level == UnsupportedLevel::Immediate {
4906 1
4907 } else {
4908 opts.max_unsupported.max(1)
4909 };
4910 let mut messages = diagnostics.iter().take(limit).cloned().collect::<Vec<_>>();
4911 if diagnostics.len() > limit {
4912 messages.push(format!("... and {} more", diagnostics.len() - limit));
4913 }
4914
4915 Err(crate::error::Error::unsupported(
4916 messages.join("; "),
4917 target.to_string(),
4918 ))
4919 }
4920
4921 fn push_unsupported_diagnostic(diagnostics: &mut Vec<String>, message: &str) {
4922 if !diagnostics.iter().any(|existing| existing == message) {
4923 diagnostics.push(message.to_string());
4924 }
4925 }
4926
4927 fn node_has_recursive_with(expr: &Expression) -> bool {
4928 fn recursive(with: &Option<With>) -> bool {
4929 with.as_ref().is_some_and(|with| with.recursive)
4930 }
4931
4932 match expr {
4933 Expression::With(with) => with.recursive,
4934 Expression::Select(select) => recursive(&select.with),
4935 Expression::Union(union) => recursive(&union.with),
4936 Expression::Intersect(intersect) => recursive(&intersect.with),
4937 Expression::Except(except) => recursive(&except.with),
4938 Expression::Pivot(pivot) => recursive(&pivot.with),
4939 Expression::Insert(insert) => recursive(&insert.with),
4940 Expression::Update(update) => recursive(&update.with),
4941 Expression::Delete(delete) => recursive(&delete.with),
4942 _ => false,
4943 }
4944 }
4945
4946 fn node_has_lateral(expr: &Expression) -> bool {
4947 fn joins_have_lateral(joins: &[Join]) -> bool {
4948 joins.iter().any(|join| {
4949 matches!(
4950 join.kind,
4951 crate::expressions::JoinKind::Lateral
4952 | crate::expressions::JoinKind::LeftLateral
4953 )
4954 })
4955 }
4956
4957 match expr {
4958 Expression::Subquery(subquery) => subquery.lateral,
4959 Expression::Lateral(_) | Expression::LateralView(_) => true,
4960 Expression::Join(join) => matches!(
4961 join.kind,
4962 crate::expressions::JoinKind::Lateral | crate::expressions::JoinKind::LeftLateral
4963 ),
4964 Expression::Select(select) => {
4965 !select.lateral_views.is_empty() || joins_have_lateral(&select.joins)
4966 }
4967 Expression::JoinedTable(joined) => {
4968 !joined.lateral_views.is_empty() || joins_have_lateral(&joined.joins)
4969 }
4970 Expression::Update(update) => {
4971 joins_have_lateral(&update.table_joins) || joins_have_lateral(&update.from_joins)
4972 }
4973 _ => false,
4974 }
4975 }
4976
4977 fn target_supports_remaining_unnest(target: DialectType) -> bool {
4978 matches!(
4979 target,
4980 DialectType::PostgreSQL
4981 | DialectType::BigQuery
4982 | DialectType::DuckDB
4983 | DialectType::Presto
4984 | DialectType::Trino
4985 | DialectType::Athena
4986 )
4987 }
4988
4989 fn target_supports_remaining_explode(target: DialectType) -> bool {
4990 matches!(
4991 target,
4992 DialectType::Spark | DialectType::Databricks | DialectType::Hive
4993 )
4994 }
4995
4996 fn target_lacks_array_agg(target: DialectType) -> bool {
4997 matches!(
4998 target,
4999 DialectType::Fabric
5000 | DialectType::TSQL
5001 | DialectType::MySQL
5002 | DialectType::SQLite
5003 | DialectType::Oracle
5004 )
5005 }
5006
5007 fn node_is_unnest(expr: &Expression) -> bool {
5008 matches!(expr, Expression::Unnest(_)) || Self::node_is_function_named(expr, "UNNEST")
5009 }
5010
5011 fn node_is_explode(expr: &Expression) -> bool {
5012 matches!(expr, Expression::Explode(_) | Expression::ExplodeOuter(_))
5013 || Self::node_is_function_named(expr, "EXPLODE")
5014 || Self::node_is_function_named(expr, "EXPLODE_OUTER")
5015 }
5016
5017 fn node_is_array_agg(expr: &Expression) -> bool {
5018 matches!(expr, Expression::ArrayAgg(_)) || Self::node_is_function_named(expr, "ARRAY_AGG")
5019 }
5020
5021 fn node_is_function_named(expr: &Expression, name: &str) -> bool {
5022 match expr {
5023 Expression::Function(function) => function.name.eq_ignore_ascii_case(name),
5024 Expression::AggregateFunction(function) => function.name.eq_ignore_ascii_case(name),
5025 _ => false,
5026 }
5027 }
5028
5029 fn rewrite_boolean_values_for_tsql(expr: Expression) -> Result<Expression> {
5030 match expr {
5031 Expression::Select(select) => Self::rewrite_boolean_values_in_tsql_select(select),
5032 Expression::Subquery(mut subquery) => {
5033 subquery.this = Self::rewrite_boolean_values_for_tsql(subquery.this)?;
5034 Ok(Expression::Subquery(subquery))
5035 }
5036 Expression::Union(mut union) => {
5037 let left = std::mem::replace(&mut union.left, Expression::null());
5038 let right = std::mem::replace(&mut union.right, Expression::null());
5039 union.left = Self::rewrite_boolean_values_for_tsql(left)?;
5040 union.right = Self::rewrite_boolean_values_for_tsql(right)?;
5041 if let Some(mut with) = union.with.take() {
5042 with.ctes = with
5043 .ctes
5044 .into_iter()
5045 .map(|mut cte| {
5046 cte.this = Self::rewrite_boolean_values_for_tsql(cte.this)?;
5047 Ok(cte)
5048 })
5049 .collect::<Result<Vec<_>>>()?;
5050 union.with = Some(with);
5051 }
5052 Ok(Expression::Union(union))
5053 }
5054 Expression::Intersect(mut intersect) => {
5055 let left = std::mem::replace(&mut intersect.left, Expression::null());
5056 let right = std::mem::replace(&mut intersect.right, Expression::null());
5057 intersect.left = Self::rewrite_boolean_values_for_tsql(left)?;
5058 intersect.right = Self::rewrite_boolean_values_for_tsql(right)?;
5059 Ok(Expression::Intersect(intersect))
5060 }
5061 Expression::Except(mut except) => {
5062 let left = std::mem::replace(&mut except.left, Expression::null());
5063 let right = std::mem::replace(&mut except.right, Expression::null());
5064 except.left = Self::rewrite_boolean_values_for_tsql(left)?;
5065 except.right = Self::rewrite_boolean_values_for_tsql(right)?;
5066 Ok(Expression::Except(except))
5067 }
5068 other => Self::rewrite_tsql_boolean_embedded_queries(other),
5069 }
5070 }
5071
5072 fn rewrite_boolean_values_in_tsql_select(
5073 mut select: Box<crate::expressions::Select>,
5074 ) -> Result<Expression> {
5075 if let Some(mut with) = select.with.take() {
5076 with.ctes = with
5077 .ctes
5078 .into_iter()
5079 .map(|mut cte| {
5080 cte.this = Self::rewrite_boolean_values_for_tsql(cte.this)?;
5081 Ok(cte)
5082 })
5083 .collect::<Result<Vec<_>>>()?;
5084 select.with = Some(with);
5085 }
5086
5087 select.expressions = select
5088 .expressions
5089 .into_iter()
5090 .map(Self::rewrite_tsql_boolean_scalar_value)
5091 .collect::<Result<Vec<_>>>()?;
5092
5093 if let Some(mut from) = select.from.take() {
5094 from.expressions = from
5095 .expressions
5096 .into_iter()
5097 .map(Self::rewrite_tsql_boolean_embedded_queries)
5098 .collect::<Result<Vec<_>>>()?;
5099 select.from = Some(from);
5100 }
5101
5102 select.joins = select
5103 .joins
5104 .into_iter()
5105 .map(|mut join| {
5106 join.this = Self::rewrite_tsql_boolean_embedded_queries(join.this)?;
5107 if let Some(on) = join.on.take() {
5108 join.on = Some(Self::rewrite_tsql_boolean_predicate_context(on)?);
5109 }
5110 if let Some(match_condition) = join.match_condition.take() {
5111 join.match_condition = Some(Self::rewrite_tsql_boolean_predicate_context(
5112 match_condition,
5113 )?);
5114 }
5115 join.pivots = join
5116 .pivots
5117 .into_iter()
5118 .map(Self::rewrite_tsql_boolean_embedded_queries)
5119 .collect::<Result<Vec<_>>>()?;
5120 Ok(join)
5121 })
5122 .collect::<Result<Vec<_>>>()?;
5123
5124 select.lateral_views = select
5125 .lateral_views
5126 .into_iter()
5127 .map(|mut lateral_view| {
5128 lateral_view.this = Self::rewrite_tsql_boolean_embedded_queries(lateral_view.this)?;
5129 Ok(lateral_view)
5130 })
5131 .collect::<Result<Vec<_>>>()?;
5132
5133 if let Some(prewhere) = select.prewhere.take() {
5134 select.prewhere = Some(Self::rewrite_tsql_boolean_predicate_context(prewhere)?);
5135 }
5136
5137 if let Some(mut where_clause) = select.where_clause.take() {
5138 where_clause.this = Self::rewrite_tsql_boolean_predicate_context(where_clause.this)?;
5139 select.where_clause = Some(where_clause);
5140 }
5141
5142 if let Some(mut group_by) = select.group_by.take() {
5143 group_by.expressions = group_by
5144 .expressions
5145 .into_iter()
5146 .map(Self::rewrite_tsql_boolean_scalar_value)
5147 .collect::<Result<Vec<_>>>()?;
5148 select.group_by = Some(group_by);
5149 }
5150
5151 if let Some(mut having) = select.having.take() {
5152 having.this = Self::rewrite_tsql_boolean_predicate_context(having.this)?;
5153 select.having = Some(having);
5154 }
5155
5156 if let Some(mut qualify) = select.qualify.take() {
5157 qualify.this = Self::rewrite_tsql_boolean_predicate_context(qualify.this)?;
5158 select.qualify = Some(qualify);
5159 }
5160
5161 if let Some(mut order_by) = select.order_by.take() {
5162 order_by.expressions = Self::rewrite_tsql_boolean_ordered_values(order_by.expressions)?;
5163 select.order_by = Some(order_by);
5164 }
5165
5166 if let Some(mut distribute_by) = select.distribute_by.take() {
5167 distribute_by.expressions = distribute_by
5168 .expressions
5169 .into_iter()
5170 .map(Self::rewrite_tsql_boolean_scalar_value)
5171 .collect::<Result<Vec<_>>>()?;
5172 select.distribute_by = Some(distribute_by);
5173 }
5174
5175 if let Some(mut cluster_by) = select.cluster_by.take() {
5176 cluster_by.expressions =
5177 Self::rewrite_tsql_boolean_ordered_values(cluster_by.expressions)?;
5178 select.cluster_by = Some(cluster_by);
5179 }
5180
5181 if let Some(mut sort_by) = select.sort_by.take() {
5182 sort_by.expressions = Self::rewrite_tsql_boolean_ordered_values(sort_by.expressions)?;
5183 select.sort_by = Some(sort_by);
5184 }
5185
5186 if let Some(limit_by) = select.limit_by.take() {
5187 select.limit_by = Some(
5188 limit_by
5189 .into_iter()
5190 .map(Self::rewrite_tsql_boolean_scalar_value)
5191 .collect::<Result<Vec<_>>>()?,
5192 );
5193 }
5194
5195 if let Some(distinct_on) = select.distinct_on.take() {
5196 select.distinct_on = Some(
5197 distinct_on
5198 .into_iter()
5199 .map(Self::rewrite_tsql_boolean_scalar_value)
5200 .collect::<Result<Vec<_>>>()?,
5201 );
5202 }
5203
5204 if let Some(mut sample) = select.sample.take() {
5205 sample.size = Self::rewrite_tsql_boolean_embedded_queries(sample.size)?;
5206 if let Some(offset) = sample.offset.take() {
5207 sample.offset = Some(Self::rewrite_tsql_boolean_embedded_queries(offset)?);
5208 }
5209 if let Some(bucket_numerator) = sample.bucket_numerator.take() {
5210 sample.bucket_numerator = Some(Box::new(
5211 Self::rewrite_tsql_boolean_embedded_queries(*bucket_numerator)?,
5212 ));
5213 }
5214 if let Some(bucket_denominator) = sample.bucket_denominator.take() {
5215 sample.bucket_denominator = Some(Box::new(
5216 Self::rewrite_tsql_boolean_embedded_queries(*bucket_denominator)?,
5217 ));
5218 }
5219 if let Some(bucket_field) = sample.bucket_field.take() {
5220 sample.bucket_field = Some(Box::new(Self::rewrite_tsql_boolean_embedded_queries(
5221 *bucket_field,
5222 )?));
5223 }
5224 select.sample = Some(sample);
5225 }
5226
5227 if let Some(settings) = select.settings.take() {
5228 select.settings = Some(
5229 settings
5230 .into_iter()
5231 .map(Self::rewrite_tsql_boolean_embedded_queries)
5232 .collect::<Result<Vec<_>>>()?,
5233 );
5234 }
5235
5236 if let Some(format) = select.format.take() {
5237 select.format = Some(Self::rewrite_tsql_boolean_embedded_queries(format)?);
5238 }
5239
5240 if let Some(mut windows) = select.windows.take() {
5241 for window in windows.iter_mut() {
5242 Self::rewrite_tsql_boolean_over_values(&mut window.spec)?;
5243 }
5244 select.windows = Some(windows);
5245 }
5246
5247 Ok(Expression::Select(select))
5248 }
5249
5250 fn rewrite_tsql_boolean_scalar_value(expr: Expression) -> Result<Expression> {
5251 if Self::is_tsql_boolean_value_expression(&expr) {
5252 return Ok(Self::tsql_boolean_value_case(expr));
5253 }
5254
5255 match expr {
5256 Expression::Alias(mut alias) => {
5257 alias.this = Self::rewrite_tsql_boolean_scalar_value(alias.this)?;
5258 Ok(Expression::Alias(alias))
5259 }
5260 Expression::Paren(mut paren) => {
5261 paren.this = Self::rewrite_tsql_boolean_scalar_value(paren.this)?;
5262 Ok(Expression::Paren(paren))
5263 }
5264 Expression::Cast(mut cast) => {
5265 cast.this = Self::rewrite_tsql_boolean_scalar_value(cast.this)?;
5266 if let Some(format) = cast.format.take() {
5267 cast.format = Some(Box::new(Self::rewrite_tsql_boolean_embedded_queries(
5268 *format,
5269 )?));
5270 }
5271 if let Some(default) = cast.default.take() {
5272 cast.default =
5273 Some(Box::new(Self::rewrite_tsql_boolean_scalar_value(*default)?));
5274 }
5275 Ok(Expression::Cast(cast))
5276 }
5277 Expression::TryCast(mut cast) => {
5278 cast.this = Self::rewrite_tsql_boolean_scalar_value(cast.this)?;
5279 if let Some(format) = cast.format.take() {
5280 cast.format = Some(Box::new(Self::rewrite_tsql_boolean_embedded_queries(
5281 *format,
5282 )?));
5283 }
5284 if let Some(default) = cast.default.take() {
5285 cast.default =
5286 Some(Box::new(Self::rewrite_tsql_boolean_scalar_value(*default)?));
5287 }
5288 Ok(Expression::TryCast(cast))
5289 }
5290 Expression::SafeCast(mut cast) => {
5291 cast.this = Self::rewrite_tsql_boolean_scalar_value(cast.this)?;
5292 if let Some(format) = cast.format.take() {
5293 cast.format = Some(Box::new(Self::rewrite_tsql_boolean_embedded_queries(
5294 *format,
5295 )?));
5296 }
5297 if let Some(default) = cast.default.take() {
5298 cast.default =
5299 Some(Box::new(Self::rewrite_tsql_boolean_scalar_value(*default)?));
5300 }
5301 Ok(Expression::SafeCast(cast))
5302 }
5303 Expression::Case(mut case) => {
5304 if let Some(operand) = case.operand.take() {
5305 case.operand = Some(Self::rewrite_tsql_boolean_scalar_value(operand)?);
5306 }
5307 case.whens = case
5308 .whens
5309 .into_iter()
5310 .map(|(condition, result)| {
5311 Ok((
5312 Self::rewrite_tsql_boolean_predicate_context(condition)?,
5313 Self::rewrite_tsql_boolean_scalar_value(result)?,
5314 ))
5315 })
5316 .collect::<Result<Vec<_>>>()?;
5317 if let Some(else_) = case.else_.take() {
5318 case.else_ = Some(Self::rewrite_tsql_boolean_scalar_value(else_)?);
5319 }
5320 Ok(Expression::Case(case))
5321 }
5322 Expression::IfFunc(mut if_func) => {
5323 if_func.condition =
5324 Self::rewrite_tsql_boolean_predicate_context(if_func.condition)?;
5325 if_func.true_value = Self::rewrite_tsql_boolean_scalar_value(if_func.true_value)?;
5326 if let Some(false_value) = if_func.false_value.take() {
5327 if_func.false_value =
5328 Some(Self::rewrite_tsql_boolean_scalar_value(false_value)?);
5329 }
5330 Ok(Expression::IfFunc(if_func))
5331 }
5332 Expression::WindowFunction(mut window_function) => {
5333 window_function.this =
5334 Self::rewrite_tsql_boolean_embedded_queries(window_function.this)?;
5335 Self::rewrite_tsql_boolean_over_values(&mut window_function.over)?;
5336 if let Some(mut keep) = window_function.keep.take() {
5337 keep.order_by = Self::rewrite_tsql_boolean_ordered_values(keep.order_by)?;
5338 window_function.keep = Some(keep);
5339 }
5340 Ok(Expression::WindowFunction(window_function))
5341 }
5342 Expression::WithinGroup(mut within_group) => {
5343 within_group.this = Self::rewrite_tsql_boolean_embedded_queries(within_group.this)?;
5344 within_group.order_by =
5345 Self::rewrite_tsql_boolean_ordered_values(within_group.order_by)?;
5346 Ok(Expression::WithinGroup(within_group))
5347 }
5348 Expression::Subquery(mut subquery) => {
5349 subquery.this = Self::rewrite_boolean_values_for_tsql(subquery.this)?;
5350 Ok(Expression::Subquery(subquery))
5351 }
5352 Expression::Select(select) => Self::rewrite_boolean_values_in_tsql_select(select),
5353 other => Self::rewrite_tsql_boolean_embedded_queries(other),
5354 }
5355 }
5356
5357 fn rewrite_tsql_boolean_predicate_context(expr: Expression) -> Result<Expression> {
5358 Self::rewrite_tsql_boolean_embedded_queries(expr)
5359 }
5360
5361 fn rewrite_tsql_boolean_embedded_queries(expr: Expression) -> Result<Expression> {
5362 transform_recursive(expr, &|e| match e {
5363 Expression::Select(select) => Self::rewrite_boolean_values_in_tsql_select(select),
5364 Expression::Subquery(mut subquery) => {
5365 subquery.this = Self::rewrite_boolean_values_for_tsql(subquery.this)?;
5366 Ok(Expression::Subquery(subquery))
5367 }
5368 Expression::Union(_) | Expression::Intersect(_) | Expression::Except(_) => {
5369 Self::rewrite_boolean_values_for_tsql(e)
5370 }
5371 other => Ok(other),
5372 })
5373 }
5374
5375 fn rewrite_tsql_boolean_ordered_values(
5376 ordered: Vec<crate::expressions::Ordered>,
5377 ) -> Result<Vec<crate::expressions::Ordered>> {
5378 ordered
5379 .into_iter()
5380 .map(|mut ordered| {
5381 ordered.this = Self::rewrite_tsql_boolean_scalar_value(ordered.this)?;
5382 if let Some(with_fill) = ordered.with_fill.take() {
5383 ordered.with_fill = Some(Box::new(
5384 Self::rewrite_tsql_boolean_with_fill_values(*with_fill)?,
5385 ));
5386 }
5387 Ok(ordered)
5388 })
5389 .collect()
5390 }
5391
5392 fn rewrite_tsql_boolean_with_fill_values(
5393 mut with_fill: crate::expressions::WithFill,
5394 ) -> Result<crate::expressions::WithFill> {
5395 if let Some(from) = with_fill.from_.take() {
5396 with_fill.from_ = Some(Box::new(Self::rewrite_tsql_boolean_scalar_value(*from)?));
5397 }
5398 if let Some(to) = with_fill.to.take() {
5399 with_fill.to = Some(Box::new(Self::rewrite_tsql_boolean_scalar_value(*to)?));
5400 }
5401 if let Some(step) = with_fill.step.take() {
5402 with_fill.step = Some(Box::new(Self::rewrite_tsql_boolean_scalar_value(*step)?));
5403 }
5404 if let Some(staleness) = with_fill.staleness.take() {
5405 with_fill.staleness = Some(Box::new(Self::rewrite_tsql_boolean_scalar_value(
5406 *staleness,
5407 )?));
5408 }
5409 if let Some(interpolate) = with_fill.interpolate.take() {
5410 with_fill.interpolate = Some(Box::new(Self::rewrite_tsql_boolean_scalar_value(
5411 *interpolate,
5412 )?));
5413 }
5414 Ok(with_fill)
5415 }
5416
5417 fn rewrite_tsql_boolean_over_values(over: &mut crate::expressions::Over) -> Result<()> {
5418 over.partition_by = std::mem::take(&mut over.partition_by)
5419 .into_iter()
5420 .map(Self::rewrite_tsql_boolean_scalar_value)
5421 .collect::<Result<Vec<_>>>()?;
5422 over.order_by =
5423 Self::rewrite_tsql_boolean_ordered_values(std::mem::take(&mut over.order_by))?;
5424 Ok(())
5425 }
5426
5427 fn is_tsql_boolean_value_expression(expr: &Expression) -> bool {
5428 match expr {
5429 Expression::Paren(paren) => Self::is_tsql_boolean_value_expression(&paren.this),
5430 Expression::Eq(_)
5431 | Expression::Neq(_)
5432 | Expression::Lt(_)
5433 | Expression::Lte(_)
5434 | Expression::Gt(_)
5435 | Expression::Gte(_)
5436 | Expression::Is(_)
5437 | Expression::IsNull(_)
5438 | Expression::IsTrue(_)
5439 | Expression::IsFalse(_)
5440 | Expression::Like(_)
5441 | Expression::ILike(_)
5442 | Expression::SimilarTo(_)
5443 | Expression::Glob(_)
5444 | Expression::RegexpLike(_)
5445 | Expression::In(_)
5446 | Expression::Between(_)
5447 | Expression::Exists(_)
5448 | Expression::And(_)
5449 | Expression::Or(_)
5450 | Expression::Not(_)
5451 | Expression::Any(_)
5452 | Expression::All(_)
5453 | Expression::EqualNull(_) => true,
5454 _ => false,
5455 }
5456 }
5457
5458 fn tsql_boolean_value_case(predicate: Expression) -> Expression {
5459 Expression::Case(Box::new(crate::expressions::Case {
5460 operand: None,
5461 whens: vec![
5462 (predicate.clone(), Expression::number(1)),
5463 (
5464 Expression::Not(Box::new(crate::expressions::UnaryOp {
5465 this: predicate,
5466 inferred_type: None,
5467 })),
5468 Expression::number(0),
5469 ),
5470 ],
5471 else_: None,
5472 comments: Vec::new(),
5473 inferred_type: None,
5474 }))
5475 }
5476
5477 fn rewrite_aggregate_filters_for_tsql(expr: Expression) -> Result<Expression> {
5478 transform_recursive(expr, &|e| Self::rewrite_aggregate_filter_for_tsql(e))
5479 }
5480
5481 fn rewrite_aggregate_filter_for_tsql(expr: Expression) -> Result<Expression> {
5482 macro_rules! rewrite_agg_filter {
5483 ($variant:ident, $agg:expr) => {{
5484 let mut agg = $agg;
5485 if let Some(filter) = agg.filter.take() {
5486 let this = std::mem::replace(&mut agg.this, Expression::null());
5487 agg.this = Self::conditional_aggregate_value_for_tsql(filter, this);
5488 }
5489 Ok(Expression::$variant(agg))
5490 }};
5491 }
5492
5493 match expr {
5494 Expression::Filter(filter) => {
5495 let condition = match *filter.expression {
5496 Expression::Where(where_) => where_.this,
5497 other => other,
5498 };
5499 Ok(Self::push_filter_into_tsql_aggregate(
5500 *filter.this,
5501 condition,
5502 ))
5503 }
5504 Expression::AggregateFunction(mut agg) => {
5505 if let Some(filter) = agg.filter.take() {
5506 Self::rewrite_generic_aggregate_filter_for_tsql(&mut agg, filter);
5507 }
5508 Ok(Expression::AggregateFunction(agg))
5509 }
5510 Expression::Count(mut count) => {
5511 if let Some(filter) = count.filter.take() {
5512 let value = if count.star {
5513 Expression::number(1)
5514 } else {
5515 count.this.take().unwrap_or_else(|| Expression::number(1))
5516 };
5517 count.star = false;
5518 count.this = Some(Self::conditional_aggregate_value_for_tsql(filter, value));
5519 }
5520 Ok(Expression::Count(count))
5521 }
5522 Expression::Sum(agg) => rewrite_agg_filter!(Sum, agg),
5523 Expression::Avg(agg) => rewrite_agg_filter!(Avg, agg),
5524 Expression::Min(agg) => rewrite_agg_filter!(Min, agg),
5525 Expression::Max(agg) => rewrite_agg_filter!(Max, agg),
5526 Expression::ArrayAgg(agg) => rewrite_agg_filter!(ArrayAgg, agg),
5527 Expression::CountIf(agg) => rewrite_agg_filter!(CountIf, agg),
5528 Expression::Stddev(agg) => rewrite_agg_filter!(Stddev, agg),
5529 Expression::StddevPop(agg) => rewrite_agg_filter!(StddevPop, agg),
5530 Expression::StddevSamp(agg) => rewrite_agg_filter!(StddevSamp, agg),
5531 Expression::Variance(agg) => rewrite_agg_filter!(Variance, agg),
5532 Expression::VarPop(agg) => rewrite_agg_filter!(VarPop, agg),
5533 Expression::VarSamp(agg) => rewrite_agg_filter!(VarSamp, agg),
5534 Expression::Median(agg) => rewrite_agg_filter!(Median, agg),
5535 Expression::Mode(agg) => rewrite_agg_filter!(Mode, agg),
5536 Expression::First(agg) => rewrite_agg_filter!(First, agg),
5537 Expression::Last(agg) => rewrite_agg_filter!(Last, agg),
5538 Expression::AnyValue(agg) => rewrite_agg_filter!(AnyValue, agg),
5539 Expression::ApproxDistinct(agg) => rewrite_agg_filter!(ApproxDistinct, agg),
5540 Expression::ApproxCountDistinct(agg) => {
5541 rewrite_agg_filter!(ApproxCountDistinct, agg)
5542 }
5543 Expression::LogicalAnd(agg) => rewrite_agg_filter!(LogicalAnd, agg),
5544 Expression::LogicalOr(agg) => rewrite_agg_filter!(LogicalOr, agg),
5545 Expression::Skewness(agg) => rewrite_agg_filter!(Skewness, agg),
5546 Expression::ArrayConcatAgg(agg) => rewrite_agg_filter!(ArrayConcatAgg, agg),
5547 Expression::ArrayUniqueAgg(agg) => rewrite_agg_filter!(ArrayUniqueAgg, agg),
5548 Expression::BoolXorAgg(agg) => rewrite_agg_filter!(BoolXorAgg, agg),
5549 Expression::BitwiseAndAgg(agg) => rewrite_agg_filter!(BitwiseAndAgg, agg),
5550 Expression::BitwiseOrAgg(agg) => rewrite_agg_filter!(BitwiseOrAgg, agg),
5551 Expression::BitwiseXorAgg(agg) => rewrite_agg_filter!(BitwiseXorAgg, agg),
5552 Expression::StringAgg(mut agg) => {
5553 if let Some(filter) = agg.filter.take() {
5554 let this = std::mem::replace(&mut agg.this, Expression::null());
5555 agg.this = Self::conditional_aggregate_value_for_tsql(filter, this);
5556 }
5557 Ok(Expression::StringAgg(agg))
5558 }
5559 Expression::GroupConcat(mut agg) => {
5560 if let Some(filter) = agg.filter.take() {
5561 let this = std::mem::replace(&mut agg.this, Expression::null());
5562 agg.this = Self::conditional_aggregate_value_for_tsql(filter, this);
5563 }
5564 Ok(Expression::GroupConcat(agg))
5565 }
5566 Expression::ListAgg(mut agg) => {
5567 if let Some(filter) = agg.filter.take() {
5568 let this = std::mem::replace(&mut agg.this, Expression::null());
5569 agg.this = Self::conditional_aggregate_value_for_tsql(filter, this);
5570 }
5571 Ok(Expression::ListAgg(agg))
5572 }
5573 Expression::WithinGroup(mut within_group) => {
5574 within_group.this = Self::rewrite_aggregate_filters_for_tsql(within_group.this)?;
5575 Ok(Expression::WithinGroup(within_group))
5576 }
5577 other => Ok(other),
5578 }
5579 }
5580
5581 fn push_filter_into_tsql_aggregate(expr: Expression, filter: Expression) -> Expression {
5582 macro_rules! push_agg_filter {
5583 ($variant:ident, $agg:expr) => {{
5584 let mut agg = $agg;
5585 let this = std::mem::replace(&mut agg.this, Expression::null());
5586 agg.this = Self::conditional_aggregate_value_for_tsql(filter, this);
5587 agg.filter = None;
5588 Expression::$variant(agg)
5589 }};
5590 }
5591
5592 match expr {
5593 Expression::AggregateFunction(mut agg) => {
5594 Self::rewrite_generic_aggregate_filter_for_tsql(&mut agg, filter);
5595 Expression::AggregateFunction(agg)
5596 }
5597 Expression::Count(mut count) => {
5598 let value = if count.star {
5599 Expression::number(1)
5600 } else {
5601 count.this.take().unwrap_or_else(|| Expression::number(1))
5602 };
5603 count.star = false;
5604 count.filter = None;
5605 count.this = Some(Self::conditional_aggregate_value_for_tsql(filter, value));
5606 Expression::Count(count)
5607 }
5608 Expression::Sum(agg) => push_agg_filter!(Sum, agg),
5609 Expression::Avg(agg) => push_agg_filter!(Avg, agg),
5610 Expression::Min(agg) => push_agg_filter!(Min, agg),
5611 Expression::Max(agg) => push_agg_filter!(Max, agg),
5612 Expression::ArrayAgg(agg) => push_agg_filter!(ArrayAgg, agg),
5613 Expression::CountIf(agg) => push_agg_filter!(CountIf, agg),
5614 Expression::Stddev(agg) => push_agg_filter!(Stddev, agg),
5615 Expression::StddevPop(agg) => push_agg_filter!(StddevPop, agg),
5616 Expression::StddevSamp(agg) => push_agg_filter!(StddevSamp, agg),
5617 Expression::Variance(agg) => push_agg_filter!(Variance, agg),
5618 Expression::VarPop(agg) => push_agg_filter!(VarPop, agg),
5619 Expression::VarSamp(agg) => push_agg_filter!(VarSamp, agg),
5620 Expression::Median(agg) => push_agg_filter!(Median, agg),
5621 Expression::Mode(agg) => push_agg_filter!(Mode, agg),
5622 Expression::First(agg) => push_agg_filter!(First, agg),
5623 Expression::Last(agg) => push_agg_filter!(Last, agg),
5624 Expression::AnyValue(agg) => push_agg_filter!(AnyValue, agg),
5625 Expression::ApproxDistinct(agg) => push_agg_filter!(ApproxDistinct, agg),
5626 Expression::ApproxCountDistinct(agg) => {
5627 push_agg_filter!(ApproxCountDistinct, agg)
5628 }
5629 Expression::LogicalAnd(agg) => push_agg_filter!(LogicalAnd, agg),
5630 Expression::LogicalOr(agg) => push_agg_filter!(LogicalOr, agg),
5631 Expression::Skewness(agg) => push_agg_filter!(Skewness, agg),
5632 Expression::ArrayConcatAgg(agg) => push_agg_filter!(ArrayConcatAgg, agg),
5633 Expression::ArrayUniqueAgg(agg) => push_agg_filter!(ArrayUniqueAgg, agg),
5634 Expression::BoolXorAgg(agg) => push_agg_filter!(BoolXorAgg, agg),
5635 Expression::BitwiseAndAgg(agg) => push_agg_filter!(BitwiseAndAgg, agg),
5636 Expression::BitwiseOrAgg(agg) => push_agg_filter!(BitwiseOrAgg, agg),
5637 Expression::BitwiseXorAgg(agg) => push_agg_filter!(BitwiseXorAgg, agg),
5638 Expression::StringAgg(mut agg) => {
5639 let this = std::mem::replace(&mut agg.this, Expression::null());
5640 agg.this = Self::conditional_aggregate_value_for_tsql(filter, this);
5641 agg.filter = None;
5642 Expression::StringAgg(agg)
5643 }
5644 Expression::GroupConcat(mut agg) => {
5645 let this = std::mem::replace(&mut agg.this, Expression::null());
5646 agg.this = Self::conditional_aggregate_value_for_tsql(filter, this);
5647 agg.filter = None;
5648 Expression::GroupConcat(agg)
5649 }
5650 Expression::ListAgg(mut agg) => {
5651 let this = std::mem::replace(&mut agg.this, Expression::null());
5652 agg.this = Self::conditional_aggregate_value_for_tsql(filter, this);
5653 agg.filter = None;
5654 Expression::ListAgg(agg)
5655 }
5656 Expression::WithinGroup(mut within_group) => {
5657 within_group.this =
5658 Self::push_filter_into_tsql_aggregate(within_group.this, filter);
5659 Expression::WithinGroup(within_group)
5660 }
5661 other => Expression::Filter(Box::new(crate::expressions::Filter {
5662 this: Box::new(other),
5663 expression: Box::new(filter),
5664 })),
5665 }
5666 }
5667
5668 fn rewrite_generic_aggregate_filter_for_tsql(
5669 agg: &mut crate::expressions::AggregateFunction,
5670 filter: Expression,
5671 ) {
5672 let is_count =
5673 agg.name.eq_ignore_ascii_case("COUNT") || agg.name.eq_ignore_ascii_case("COUNT_BIG");
5674 let is_count_star = is_count
5675 && (agg.args.is_empty()
5676 || (agg.args.len() == 1 && matches!(agg.args[0], Expression::Star(_))));
5677
5678 if is_count_star {
5679 agg.args = vec![Self::conditional_aggregate_value_for_tsql(
5680 filter,
5681 Expression::number(1),
5682 )];
5683 } else if !agg.args.is_empty() {
5684 agg.args = agg
5685 .args
5686 .drain(..)
5687 .map(|arg| Self::conditional_aggregate_value_for_tsql(filter.clone(), arg))
5688 .collect();
5689 } else {
5690 agg.filter = Some(filter);
5691 }
5692 }
5693
5694 fn conditional_aggregate_value_for_tsql(filter: Expression, value: Expression) -> Expression {
5695 Expression::Case(Box::new(crate::expressions::Case {
5696 operand: None,
5697 whens: vec![(filter, value)],
5698 else_: None,
5699 comments: Vec::new(),
5700 inferred_type: None,
5701 }))
5702 }
5703
5704 fn reject_pgvector_distance_operators_for_sqlite(&self, sql: &str) -> Result<()> {
5705 let tokens = self.tokenize(sql)?;
5706 for (i, token) in tokens.iter().enumerate() {
5707 if token.token_type == TokenType::NullsafeEq {
5708 return Err(crate::error::Error::unsupported(
5709 "PostgreSQL pgvector cosine distance operator <=>",
5710 "SQLite",
5711 ));
5712 }
5713 if token.token_type == TokenType::Lt
5714 && tokens
5715 .get(i + 1)
5716 .is_some_and(|token| token.token_type == TokenType::Tilde)
5717 && tokens
5718 .get(i + 2)
5719 .is_some_and(|token| token.token_type == TokenType::Gt)
5720 {
5721 return Err(crate::error::Error::unsupported(
5722 "PostgreSQL pgvector Hamming distance operator <~>",
5723 "SQLite",
5724 ));
5725 }
5726 }
5727 Ok(())
5728 }
5729
5730 fn normalize_sqlite_double_quoted_defaults(expr: Expression) -> Result<Expression> {
5731 fn normalize_default_expr(expr: Expression) -> Result<Expression> {
5732 transform_recursive(expr, &|e| match e {
5733 Expression::Column(col)
5734 if col.table.is_none() && col.name.quoted && !col.join_mark =>
5735 {
5736 Ok(Expression::Literal(Box::new(Literal::String(
5737 col.name.name,
5738 ))))
5739 }
5740 Expression::Identifier(id) if id.quoted => {
5741 Ok(Expression::Literal(Box::new(Literal::String(id.name))))
5742 }
5743 _ => Ok(e),
5744 })
5745 }
5746
5747 fn normalize_column_default(col: &mut crate::expressions::ColumnDef) -> Result<()> {
5748 if let Some(default) = col.default.take() {
5749 col.default = Some(normalize_default_expr(default)?);
5750 }
5751
5752 for constraint in &mut col.constraints {
5753 if let ColumnConstraint::Default(default) = constraint {
5754 *default = normalize_default_expr(default.clone())?;
5755 }
5756 }
5757
5758 Ok(())
5759 }
5760
5761 transform_recursive(expr, &|e| match e {
5762 Expression::CreateTable(mut ct) => {
5763 for column in &mut ct.columns {
5764 normalize_column_default(column)?;
5765 }
5766 Ok(Expression::CreateTable(ct))
5767 }
5768 Expression::ColumnDef(mut col) => {
5769 normalize_column_default(&mut col)?;
5770 Ok(Expression::ColumnDef(col))
5771 }
5772 _ => Ok(e),
5773 })
5774 }
5775
5776 fn normalize_postgres_to_sqlite_types(expr: Expression) -> Result<Expression> {
5777 fn sqlite_type(dt: crate::expressions::DataType) -> crate::expressions::DataType {
5778 use crate::expressions::DataType;
5779
5780 match dt {
5781 DataType::Bit { .. } => DataType::Int {
5782 length: None,
5783 integer_spelling: true,
5784 },
5785 DataType::TextWithLength { .. } => DataType::Text,
5786 DataType::VarChar { .. } => DataType::Text,
5787 DataType::Char { .. } => DataType::Text,
5788 DataType::Timestamp { timezone: true, .. } => DataType::Text,
5789 DataType::Custom { name } => {
5790 let base = name
5791 .split_once('(')
5792 .map_or(name.as_str(), |(base, _)| base)
5793 .trim();
5794 if base.eq_ignore_ascii_case("TSVECTOR")
5795 || base.eq_ignore_ascii_case("TIMESTAMPTZ")
5796 || base.eq_ignore_ascii_case("TIMESTAMP WITH TIME ZONE")
5797 || base.eq_ignore_ascii_case("NVARCHAR")
5798 || base.eq_ignore_ascii_case("NCHAR")
5799 {
5800 DataType::Text
5801 } else {
5802 DataType::Custom { name }
5803 }
5804 }
5805 _ => dt,
5806 }
5807 }
5808
5809 transform_recursive(expr, &|e| match e {
5810 Expression::DataType(dt) => Ok(Expression::DataType(sqlite_type(dt))),
5811 Expression::CreateTable(mut ct) => {
5812 for column in &mut ct.columns {
5813 column.data_type = sqlite_type(column.data_type.clone());
5814 }
5815 Ok(Expression::CreateTable(ct))
5816 }
5817 _ => Ok(e),
5818 })
5819 }
5820
5821 fn normalize_postgres_to_fabric_decimal_types(expr: Expression) -> Result<Expression> {
5822 fn fabric_decimal_type(dt: crate::expressions::DataType) -> crate::expressions::DataType {
5823 use crate::expressions::DataType;
5824
5825 match dt {
5826 DataType::Decimal {
5827 precision: None,
5828 scale: None,
5829 } => DataType::Decimal {
5830 precision: Some(38),
5831 scale: Some(10),
5832 },
5833 _ => dt,
5834 }
5835 }
5836
5837 transform_recursive(expr, &|e| match e {
5838 Expression::DataType(dt) => Ok(Expression::DataType(fabric_decimal_type(dt))),
5839 Expression::CreateTable(mut ct) => {
5840 for column in &mut ct.columns {
5841 column.data_type = fabric_decimal_type(column.data_type.clone());
5842 }
5843 Ok(Expression::CreateTable(ct))
5844 }
5845 Expression::ColumnDef(mut col) => {
5846 col.data_type = fabric_decimal_type(col.data_type);
5847 Ok(Expression::ColumnDef(col))
5848 }
5849 _ => Ok(e),
5850 })
5851 }
5852
5853 /// For DuckDB target: when FROM clause contains RANGE(n), replace
5854 /// `(ROW_NUMBER() OVER (ORDER BY 1 NULLS FIRST) - 1)` with `range` in select expressions.
5855 /// This handles SEQ1/2/4/8 → RANGE transpilation from Snowflake.
5856 fn seq_rownum_to_range(expr: Expression) -> Result<Expression> {
5857 if let Expression::Select(mut select) = expr {
5858 // Check if FROM contains a RANGE function
5859 let has_range_from = if let Some(ref from) = select.from {
5860 from.expressions.iter().any(|e| {
5861 // Check for direct RANGE(...) or aliased RANGE(...)
5862 match e {
5863 Expression::Function(f) => f.name.eq_ignore_ascii_case("RANGE"),
5864 Expression::Alias(a) => {
5865 matches!(&a.this, Expression::Function(f) if f.name.eq_ignore_ascii_case("RANGE"))
5866 }
5867 _ => false,
5868 }
5869 })
5870 } else {
5871 false
5872 };
5873
5874 if has_range_from {
5875 // Replace the ROW_NUMBER pattern in select expressions
5876 select.expressions = select
5877 .expressions
5878 .into_iter()
5879 .map(|e| Self::replace_rownum_with_range(e))
5880 .collect();
5881 }
5882
5883 Ok(Expression::Select(select))
5884 } else {
5885 Ok(expr)
5886 }
5887 }
5888
5889 /// Replace `(ROW_NUMBER() OVER (...) - 1)` with `range` column reference
5890 fn replace_rownum_with_range(expr: Expression) -> Expression {
5891 match expr {
5892 // Match: (ROW_NUMBER() OVER (...) - 1) % N → range % N
5893 Expression::Mod(op) => {
5894 let new_left = Self::try_replace_rownum_paren(&op.left);
5895 Expression::Mod(Box::new(crate::expressions::BinaryOp {
5896 left: new_left,
5897 right: op.right,
5898 left_comments: op.left_comments,
5899 operator_comments: op.operator_comments,
5900 trailing_comments: op.trailing_comments,
5901 inferred_type: op.inferred_type,
5902 }))
5903 }
5904 // Match: (CASE WHEN (ROW...) % N >= ... THEN ... ELSE ... END)
5905 Expression::Paren(p) => {
5906 let inner = Self::replace_rownum_with_range(p.this);
5907 Expression::Paren(Box::new(crate::expressions::Paren {
5908 this: inner,
5909 trailing_comments: p.trailing_comments,
5910 }))
5911 }
5912 Expression::Case(mut c) => {
5913 // Replace ROW_NUMBER in WHEN conditions and THEN expressions
5914 c.whens = c
5915 .whens
5916 .into_iter()
5917 .map(|(cond, then)| {
5918 (
5919 Self::replace_rownum_with_range(cond),
5920 Self::replace_rownum_with_range(then),
5921 )
5922 })
5923 .collect();
5924 if let Some(else_) = c.else_ {
5925 c.else_ = Some(Self::replace_rownum_with_range(else_));
5926 }
5927 Expression::Case(c)
5928 }
5929 Expression::Gte(op) => Expression::Gte(Box::new(crate::expressions::BinaryOp {
5930 left: Self::replace_rownum_with_range(op.left),
5931 right: op.right,
5932 left_comments: op.left_comments,
5933 operator_comments: op.operator_comments,
5934 trailing_comments: op.trailing_comments,
5935 inferred_type: op.inferred_type,
5936 })),
5937 Expression::Sub(op) => Expression::Sub(Box::new(crate::expressions::BinaryOp {
5938 left: Self::replace_rownum_with_range(op.left),
5939 right: op.right,
5940 left_comments: op.left_comments,
5941 operator_comments: op.operator_comments,
5942 trailing_comments: op.trailing_comments,
5943 inferred_type: op.inferred_type,
5944 })),
5945 Expression::Alias(mut a) => {
5946 a.this = Self::replace_rownum_with_range(a.this);
5947 Expression::Alias(a)
5948 }
5949 other => other,
5950 }
5951 }
5952
5953 /// Check if an expression is `(ROW_NUMBER() OVER (...) - 1)` and replace with `range`
5954 fn try_replace_rownum_paren(expr: &Expression) -> Expression {
5955 if let Expression::Paren(ref p) = expr {
5956 if let Expression::Sub(ref sub) = p.this {
5957 if let Expression::WindowFunction(ref wf) = sub.left {
5958 if let Expression::Function(ref f) = wf.this {
5959 if f.name.eq_ignore_ascii_case("ROW_NUMBER") {
5960 if let Expression::Literal(ref lit) = sub.right {
5961 if let crate::expressions::Literal::Number(ref n) = lit.as_ref() {
5962 if n == "1" {
5963 return Expression::column("range");
5964 }
5965 }
5966 }
5967 }
5968 }
5969 }
5970 }
5971 }
5972 expr.clone()
5973 }
5974
5975 /// Transform BigQuery GENERATE_DATE_ARRAY in UNNEST for Snowflake target.
5976 /// Converts:
5977 /// SELECT ..., alias, ... FROM t CROSS JOIN UNNEST(GENERATE_DATE_ARRAY(start, end, INTERVAL '1' unit)) AS alias
5978 /// To:
5979 /// SELECT ..., DATEADD(unit, CAST(alias AS INT), CAST(start AS DATE)) AS alias, ...
5980 /// FROM t, LATERAL FLATTEN(INPUT => ARRAY_GENERATE_RANGE(0, DATEDIFF(unit, start, end) + 1)) AS _t0(seq, key, path, index, alias, this)
5981 fn transform_generate_date_array_snowflake(expr: Expression) -> Result<Expression> {
5982 use crate::expressions::*;
5983 transform_recursive(expr, &|e| {
5984 // Handle ARRAY_SIZE(GENERATE_DATE_ARRAY(...)) -> ARRAY_SIZE((SELECT ARRAY_AGG(*) FROM subquery))
5985 if let Expression::ArraySize(ref af) = e {
5986 if let Expression::Function(ref f) = af.this {
5987 if f.name.eq_ignore_ascii_case("GENERATE_DATE_ARRAY") && f.args.len() >= 2 {
5988 let result = Self::convert_array_size_gda_snowflake(f)?;
5989 return Ok(result);
5990 }
5991 }
5992 }
5993
5994 let Expression::Select(mut sel) = e else {
5995 return Ok(e);
5996 };
5997
5998 // Find joins with UNNEST containing GenerateSeries (from GENERATE_DATE_ARRAY conversion)
5999 let mut gda_info: Option<(String, Expression, Expression, String)> = None; // (alias_name, start_expr, end_expr, unit)
6000 let mut gda_join_idx: Option<usize> = None;
6001
6002 for (idx, join) in sel.joins.iter().enumerate() {
6003 // The join.this may be:
6004 // 1. Unnest(UnnestFunc { alias: Some("mnth"), ... })
6005 // 2. Alias(Alias { this: Unnest(UnnestFunc { alias: None, ... }), alias: "mnth", ... })
6006 let (unnest_ref, alias_name) = match &join.this {
6007 Expression::Unnest(ref unnest) => {
6008 let alias = unnest.alias.as_ref().map(|id| id.name.clone());
6009 (Some(unnest.as_ref()), alias)
6010 }
6011 Expression::Alias(ref a) => {
6012 if let Expression::Unnest(ref unnest) = a.this {
6013 (Some(unnest.as_ref()), Some(a.alias.name.clone()))
6014 } else {
6015 (None, None)
6016 }
6017 }
6018 _ => (None, None),
6019 };
6020
6021 if let (Some(unnest), Some(alias)) = (unnest_ref, alias_name) {
6022 // Check the main expression (this) of the UNNEST for GENERATE_DATE_ARRAY function
6023 if let Expression::Function(ref f) = unnest.this {
6024 if f.name.eq_ignore_ascii_case("GENERATE_DATE_ARRAY") && f.args.len() >= 2 {
6025 let start_expr = f.args[0].clone();
6026 let end_expr = f.args[1].clone();
6027 let step = f.args.get(2).cloned();
6028
6029 // Extract unit from step interval
6030 let unit = if let Some(Expression::Interval(ref iv)) = step {
6031 if let Some(IntervalUnitSpec::Simple { ref unit, .. }) = iv.unit {
6032 Some(format!("{:?}", unit).to_ascii_uppercase())
6033 } else if let Some(ref this) = iv.this {
6034 // The interval may be stored as a string like "1 MONTH"
6035 if let Expression::Literal(lit) = this {
6036 if let Literal::String(ref s) = lit.as_ref() {
6037 let parts: Vec<&str> = s.split_whitespace().collect();
6038 if parts.len() == 2 {
6039 Some(parts[1].to_ascii_uppercase())
6040 } else if parts.len() == 1 {
6041 // Single word like "MONTH" or just "1"
6042 let upper = parts[0].to_ascii_uppercase();
6043 if matches!(
6044 upper.as_str(),
6045 "YEAR"
6046 | "QUARTER"
6047 | "MONTH"
6048 | "WEEK"
6049 | "DAY"
6050 | "HOUR"
6051 | "MINUTE"
6052 | "SECOND"
6053 ) {
6054 Some(upper)
6055 } else {
6056 None
6057 }
6058 } else {
6059 None
6060 }
6061 } else {
6062 None
6063 }
6064 } else {
6065 None
6066 }
6067 } else {
6068 None
6069 }
6070 } else {
6071 None
6072 };
6073
6074 if let Some(unit_str) = unit {
6075 gda_info = Some((alias, start_expr, end_expr, unit_str));
6076 gda_join_idx = Some(idx);
6077 }
6078 }
6079 }
6080 }
6081 if gda_info.is_some() {
6082 break;
6083 }
6084 }
6085
6086 let Some((alias_name, start_expr, end_expr, unit_str)) = gda_info else {
6087 // Also check FROM clause for UNNEST(GENERATE_DATE_ARRAY(...)) patterns
6088 // This handles Generic->Snowflake where GENERATE_DATE_ARRAY is in FROM, not in JOIN
6089 let result = Self::try_transform_from_gda_snowflake(sel);
6090 return result;
6091 };
6092 let join_idx = gda_join_idx.unwrap();
6093
6094 // Build ARRAY_GENERATE_RANGE(0, DATEDIFF(unit, start, end) + 1)
6095 // ARRAY_GENERATE_RANGE uses exclusive end, and we need DATEDIFF + 1 values
6096 // (inclusive date range), so the exclusive end is DATEDIFF + 1.
6097 let datediff = Expression::Function(Box::new(Function::new(
6098 "DATEDIFF".to_string(),
6099 vec![
6100 Expression::boxed_column(Column {
6101 name: Identifier::new(&unit_str),
6102 table: None,
6103 join_mark: false,
6104 trailing_comments: vec![],
6105 span: None,
6106 inferred_type: None,
6107 }),
6108 start_expr.clone(),
6109 end_expr.clone(),
6110 ],
6111 )));
6112 let datediff_plus_one = Expression::Add(Box::new(BinaryOp {
6113 left: datediff,
6114 right: Expression::Literal(Box::new(Literal::Number("1".to_string()))),
6115 left_comments: vec![],
6116 operator_comments: vec![],
6117 trailing_comments: vec![],
6118 inferred_type: None,
6119 }));
6120
6121 let array_gen_range = Expression::Function(Box::new(Function::new(
6122 "ARRAY_GENERATE_RANGE".to_string(),
6123 vec![
6124 Expression::Literal(Box::new(Literal::Number("0".to_string()))),
6125 datediff_plus_one,
6126 ],
6127 )));
6128
6129 // Build FLATTEN(INPUT => ARRAY_GENERATE_RANGE(...))
6130 let flatten_input = Expression::NamedArgument(Box::new(NamedArgument {
6131 name: Identifier::new("INPUT"),
6132 value: array_gen_range,
6133 separator: crate::expressions::NamedArgSeparator::DArrow,
6134 }));
6135 let flatten = Expression::Function(Box::new(Function::new(
6136 "FLATTEN".to_string(),
6137 vec![flatten_input],
6138 )));
6139
6140 // Build LATERAL FLATTEN(...) AS _t0(seq, key, path, index, alias, this)
6141 let alias_table = Alias {
6142 this: flatten,
6143 alias: Identifier::new("_t0"),
6144 column_aliases: vec![
6145 Identifier::new("seq"),
6146 Identifier::new("key"),
6147 Identifier::new("path"),
6148 Identifier::new("index"),
6149 Identifier::new(&alias_name),
6150 Identifier::new("this"),
6151 ],
6152 alias_explicit_as: false,
6153 alias_keyword: None,
6154 pre_alias_comments: vec![],
6155 trailing_comments: vec![],
6156 inferred_type: None,
6157 };
6158 let lateral_expr = Expression::Lateral(Box::new(Lateral {
6159 this: Box::new(Expression::Alias(Box::new(alias_table))),
6160 view: None,
6161 outer: None,
6162 alias: None,
6163 alias_quoted: false,
6164 cross_apply: None,
6165 ordinality: None,
6166 column_aliases: vec![],
6167 }));
6168
6169 // Remove the original join and add to FROM expressions
6170 sel.joins.remove(join_idx);
6171 if let Some(ref mut from) = sel.from {
6172 from.expressions.push(lateral_expr);
6173 }
6174
6175 // Build DATEADD(unit, CAST(alias AS INT), CAST(start AS DATE))
6176 let dateadd_expr = Expression::Function(Box::new(Function::new(
6177 "DATEADD".to_string(),
6178 vec![
6179 Expression::boxed_column(Column {
6180 name: Identifier::new(&unit_str),
6181 table: None,
6182 join_mark: false,
6183 trailing_comments: vec![],
6184 span: None,
6185 inferred_type: None,
6186 }),
6187 Expression::Cast(Box::new(Cast {
6188 this: Expression::boxed_column(Column {
6189 name: Identifier::new(&alias_name),
6190 table: None,
6191 join_mark: false,
6192 trailing_comments: vec![],
6193 span: None,
6194 inferred_type: None,
6195 }),
6196 to: DataType::Int {
6197 length: None,
6198 integer_spelling: false,
6199 },
6200 trailing_comments: vec![],
6201 double_colon_syntax: false,
6202 format: None,
6203 default: None,
6204 inferred_type: None,
6205 })),
6206 Expression::Cast(Box::new(Cast {
6207 this: start_expr.clone(),
6208 to: DataType::Date,
6209 trailing_comments: vec![],
6210 double_colon_syntax: false,
6211 format: None,
6212 default: None,
6213 inferred_type: None,
6214 })),
6215 ],
6216 )));
6217
6218 // Replace references to the alias in the SELECT list
6219 let new_exprs: Vec<Expression> = sel
6220 .expressions
6221 .iter()
6222 .map(|expr| Self::replace_column_ref_with_dateadd(expr, &alias_name, &dateadd_expr))
6223 .collect();
6224 sel.expressions = new_exprs;
6225
6226 Ok(Expression::Select(sel))
6227 })
6228 }
6229
6230 /// Helper: replace column references to `alias_name` with dateadd expression
6231 fn replace_column_ref_with_dateadd(
6232 expr: &Expression,
6233 alias_name: &str,
6234 dateadd: &Expression,
6235 ) -> Expression {
6236 use crate::expressions::*;
6237 match expr {
6238 Expression::Column(c) if c.name.name == alias_name && c.table.is_none() => {
6239 // Plain column reference -> DATEADD(...) AS alias_name
6240 Expression::Alias(Box::new(Alias {
6241 this: dateadd.clone(),
6242 alias: Identifier::new(alias_name),
6243 column_aliases: vec![],
6244 alias_explicit_as: false,
6245 alias_keyword: None,
6246 pre_alias_comments: vec![],
6247 trailing_comments: vec![],
6248 inferred_type: None,
6249 }))
6250 }
6251 Expression::Alias(a) => {
6252 // Check if the inner expression references the alias
6253 let new_this = Self::replace_column_ref_inner(&a.this, alias_name, dateadd);
6254 Expression::Alias(Box::new(Alias {
6255 this: new_this,
6256 alias: a.alias.clone(),
6257 column_aliases: a.column_aliases.clone(),
6258 alias_explicit_as: false,
6259 alias_keyword: None,
6260 pre_alias_comments: a.pre_alias_comments.clone(),
6261 trailing_comments: a.trailing_comments.clone(),
6262 inferred_type: None,
6263 }))
6264 }
6265 _ => expr.clone(),
6266 }
6267 }
6268
6269 /// Helper: replace column references in inner expression (not top-level)
6270 fn replace_column_ref_inner(
6271 expr: &Expression,
6272 alias_name: &str,
6273 dateadd: &Expression,
6274 ) -> Expression {
6275 use crate::expressions::*;
6276 match expr {
6277 Expression::Column(c) if c.name.name == alias_name && c.table.is_none() => {
6278 dateadd.clone()
6279 }
6280 Expression::Add(op) => {
6281 let left = Self::replace_column_ref_inner(&op.left, alias_name, dateadd);
6282 let right = Self::replace_column_ref_inner(&op.right, alias_name, dateadd);
6283 Expression::Add(Box::new(BinaryOp {
6284 left,
6285 right,
6286 left_comments: op.left_comments.clone(),
6287 operator_comments: op.operator_comments.clone(),
6288 trailing_comments: op.trailing_comments.clone(),
6289 inferred_type: None,
6290 }))
6291 }
6292 Expression::Sub(op) => {
6293 let left = Self::replace_column_ref_inner(&op.left, alias_name, dateadd);
6294 let right = Self::replace_column_ref_inner(&op.right, alias_name, dateadd);
6295 Expression::Sub(Box::new(BinaryOp {
6296 left,
6297 right,
6298 left_comments: op.left_comments.clone(),
6299 operator_comments: op.operator_comments.clone(),
6300 trailing_comments: op.trailing_comments.clone(),
6301 inferred_type: None,
6302 }))
6303 }
6304 Expression::Mul(op) => {
6305 let left = Self::replace_column_ref_inner(&op.left, alias_name, dateadd);
6306 let right = Self::replace_column_ref_inner(&op.right, alias_name, dateadd);
6307 Expression::Mul(Box::new(BinaryOp {
6308 left,
6309 right,
6310 left_comments: op.left_comments.clone(),
6311 operator_comments: op.operator_comments.clone(),
6312 trailing_comments: op.trailing_comments.clone(),
6313 inferred_type: None,
6314 }))
6315 }
6316 _ => expr.clone(),
6317 }
6318 }
6319
6320 /// Handle UNNEST(GENERATE_DATE_ARRAY(...)) in FROM clause for Snowflake target.
6321 /// Converts to a subquery with DATEADD + TABLE(FLATTEN(ARRAY_GENERATE_RANGE(...))).
6322 fn try_transform_from_gda_snowflake(
6323 mut sel: Box<crate::expressions::Select>,
6324 ) -> Result<Expression> {
6325 use crate::expressions::*;
6326
6327 // Extract GDA info from FROM clause
6328 let mut gda_info: Option<(
6329 usize,
6330 String,
6331 Expression,
6332 Expression,
6333 String,
6334 Option<(String, Vec<Identifier>)>,
6335 )> = None; // (from_idx, col_name, start, end, unit, outer_alias)
6336
6337 if let Some(ref from) = sel.from {
6338 for (idx, table_expr) in from.expressions.iter().enumerate() {
6339 // Pattern 1: UNNEST(GENERATE_DATE_ARRAY(...))
6340 // Pattern 2: Alias(UNNEST(GENERATE_DATE_ARRAY(...))) AS _q(date_week)
6341 let (unnest_opt, outer_alias_info) = match table_expr {
6342 Expression::Unnest(ref unnest) => (Some(unnest.as_ref()), None),
6343 Expression::Alias(ref a) => {
6344 if let Expression::Unnest(ref unnest) = a.this {
6345 let alias_info = (a.alias.name.clone(), a.column_aliases.clone());
6346 (Some(unnest.as_ref()), Some(alias_info))
6347 } else {
6348 (None, None)
6349 }
6350 }
6351 _ => (None, None),
6352 };
6353
6354 if let Some(unnest) = unnest_opt {
6355 // Check for GENERATE_DATE_ARRAY function
6356 let func_opt = match &unnest.this {
6357 Expression::Function(ref f)
6358 if f.name.eq_ignore_ascii_case("GENERATE_DATE_ARRAY")
6359 && f.args.len() >= 2 =>
6360 {
6361 Some(f)
6362 }
6363 // Also check for GenerateSeries (from earlier normalization)
6364 _ => None,
6365 };
6366
6367 if let Some(f) = func_opt {
6368 let start_expr = f.args[0].clone();
6369 let end_expr = f.args[1].clone();
6370 let step = f.args.get(2).cloned();
6371
6372 // Extract unit and column name
6373 let unit = Self::extract_interval_unit_str(&step);
6374 let col_name = outer_alias_info
6375 .as_ref()
6376 .and_then(|(_, cols)| cols.first().map(|id| id.name.clone()))
6377 .unwrap_or_else(|| "value".to_string());
6378
6379 if let Some(unit_str) = unit {
6380 gda_info = Some((
6381 idx,
6382 col_name,
6383 start_expr,
6384 end_expr,
6385 unit_str,
6386 outer_alias_info,
6387 ));
6388 break;
6389 }
6390 }
6391 }
6392 }
6393 }
6394
6395 let Some((from_idx, col_name, start_expr, end_expr, unit_str, outer_alias_info)) = gda_info
6396 else {
6397 return Ok(Expression::Select(sel));
6398 };
6399
6400 // Build the Snowflake subquery:
6401 // (SELECT DATEADD(unit, CAST(col_name AS INT), CAST(start AS DATE)) AS col_name
6402 // FROM TABLE(FLATTEN(INPUT => ARRAY_GENERATE_RANGE(0, DATEDIFF(unit, start, end) + 1))) AS _t0(seq, key, path, index, col_name, this))
6403
6404 // DATEDIFF(unit, start, end)
6405 let datediff = Expression::Function(Box::new(Function::new(
6406 "DATEDIFF".to_string(),
6407 vec![
6408 Expression::boxed_column(Column {
6409 name: Identifier::new(&unit_str),
6410 table: None,
6411 join_mark: false,
6412 trailing_comments: vec![],
6413 span: None,
6414 inferred_type: None,
6415 }),
6416 start_expr.clone(),
6417 end_expr.clone(),
6418 ],
6419 )));
6420 // DATEDIFF(...) + 1
6421 let datediff_plus_one = Expression::Add(Box::new(BinaryOp {
6422 left: datediff,
6423 right: Expression::Literal(Box::new(Literal::Number("1".to_string()))),
6424 left_comments: vec![],
6425 operator_comments: vec![],
6426 trailing_comments: vec![],
6427 inferred_type: None,
6428 }));
6429
6430 let array_gen_range = Expression::Function(Box::new(Function::new(
6431 "ARRAY_GENERATE_RANGE".to_string(),
6432 vec![
6433 Expression::Literal(Box::new(Literal::Number("0".to_string()))),
6434 datediff_plus_one,
6435 ],
6436 )));
6437
6438 // TABLE(FLATTEN(INPUT => ...))
6439 let flatten_input = Expression::NamedArgument(Box::new(NamedArgument {
6440 name: Identifier::new("INPUT"),
6441 value: array_gen_range,
6442 separator: crate::expressions::NamedArgSeparator::DArrow,
6443 }));
6444 let flatten = Expression::Function(Box::new(Function::new(
6445 "FLATTEN".to_string(),
6446 vec![flatten_input],
6447 )));
6448
6449 // Determine alias name for the table: use outer alias or _t0
6450 let table_alias_name = outer_alias_info
6451 .as_ref()
6452 .map(|(name, _)| name.clone())
6453 .unwrap_or_else(|| "_t0".to_string());
6454
6455 // TABLE(FLATTEN(...)) AS _t0(seq, key, path, index, col_name, this)
6456 let table_func =
6457 Expression::Function(Box::new(Function::new("TABLE".to_string(), vec![flatten])));
6458 let flatten_aliased = Expression::Alias(Box::new(Alias {
6459 this: table_func,
6460 alias: Identifier::new(&table_alias_name),
6461 column_aliases: vec![
6462 Identifier::new("seq"),
6463 Identifier::new("key"),
6464 Identifier::new("path"),
6465 Identifier::new("index"),
6466 Identifier::new(&col_name),
6467 Identifier::new("this"),
6468 ],
6469 alias_explicit_as: false,
6470 alias_keyword: None,
6471 pre_alias_comments: vec![],
6472 trailing_comments: vec![],
6473 inferred_type: None,
6474 }));
6475
6476 // SELECT DATEADD(unit, CAST(col_name AS INT), CAST(start AS DATE)) AS col_name
6477 let dateadd_expr = Expression::Function(Box::new(Function::new(
6478 "DATEADD".to_string(),
6479 vec![
6480 Expression::boxed_column(Column {
6481 name: Identifier::new(&unit_str),
6482 table: None,
6483 join_mark: false,
6484 trailing_comments: vec![],
6485 span: None,
6486 inferred_type: None,
6487 }),
6488 Expression::Cast(Box::new(Cast {
6489 this: Expression::boxed_column(Column {
6490 name: Identifier::new(&col_name),
6491 table: None,
6492 join_mark: false,
6493 trailing_comments: vec![],
6494 span: None,
6495 inferred_type: None,
6496 }),
6497 to: DataType::Int {
6498 length: None,
6499 integer_spelling: false,
6500 },
6501 trailing_comments: vec![],
6502 double_colon_syntax: false,
6503 format: None,
6504 default: None,
6505 inferred_type: None,
6506 })),
6507 // Use start_expr directly - it's already been normalized (DATE literal -> CAST)
6508 start_expr.clone(),
6509 ],
6510 )));
6511 let dateadd_aliased = Expression::Alias(Box::new(Alias {
6512 this: dateadd_expr,
6513 alias: Identifier::new(&col_name),
6514 column_aliases: vec![],
6515 alias_explicit_as: false,
6516 alias_keyword: None,
6517 pre_alias_comments: vec![],
6518 trailing_comments: vec![],
6519 inferred_type: None,
6520 }));
6521
6522 // Build inner SELECT
6523 let mut inner_select = Select::new();
6524 inner_select.expressions = vec![dateadd_aliased];
6525 inner_select.from = Some(From {
6526 expressions: vec![flatten_aliased],
6527 });
6528
6529 let inner_select_expr = Expression::Select(Box::new(inner_select));
6530 let subquery = Expression::Subquery(Box::new(Subquery {
6531 this: inner_select_expr,
6532 alias: None,
6533 column_aliases: vec![],
6534 alias_explicit_as: false,
6535 alias_keyword: None,
6536 order_by: None,
6537 limit: None,
6538 offset: None,
6539 distribute_by: None,
6540 sort_by: None,
6541 cluster_by: None,
6542 lateral: false,
6543 modifiers_inside: false,
6544 trailing_comments: vec![],
6545 inferred_type: None,
6546 }));
6547
6548 // If there was an outer alias (e.g., AS _q(date_week)), wrap with alias
6549 let replacement = if let Some((alias_name, col_aliases)) = outer_alias_info {
6550 Expression::Alias(Box::new(Alias {
6551 this: subquery,
6552 alias: Identifier::new(&alias_name),
6553 column_aliases: col_aliases,
6554 alias_explicit_as: false,
6555 alias_keyword: None,
6556 pre_alias_comments: vec![],
6557 trailing_comments: vec![],
6558 inferred_type: None,
6559 }))
6560 } else {
6561 subquery
6562 };
6563
6564 // Replace the FROM expression
6565 if let Some(ref mut from) = sel.from {
6566 from.expressions[from_idx] = replacement;
6567 }
6568
6569 Ok(Expression::Select(sel))
6570 }
6571
6572 /// Convert ARRAY_SIZE(GENERATE_DATE_ARRAY(start, end, step)) for Snowflake.
6573 /// Produces: ARRAY_SIZE((SELECT ARRAY_AGG(*) FROM (SELECT DATEADD(unit, CAST(value AS INT), start) AS value
6574 /// FROM TABLE(FLATTEN(INPUT => ARRAY_GENERATE_RANGE(0, DATEDIFF(unit, start, end) + 1))) AS _t0(...))))
6575 fn convert_array_size_gda_snowflake(f: &crate::expressions::Function) -> Result<Expression> {
6576 use crate::expressions::*;
6577
6578 let start_expr = f.args[0].clone();
6579 let end_expr = f.args[1].clone();
6580 let step = f.args.get(2).cloned();
6581 let unit_str = Self::extract_interval_unit_str(&step).unwrap_or_else(|| "DAY".to_string());
6582 let col_name = "value";
6583
6584 // Build the inner subquery: same as try_transform_from_gda_snowflake
6585 let datediff = Expression::Function(Box::new(Function::new(
6586 "DATEDIFF".to_string(),
6587 vec![
6588 Expression::boxed_column(Column {
6589 name: Identifier::new(&unit_str),
6590 table: None,
6591 join_mark: false,
6592 trailing_comments: vec![],
6593 span: None,
6594 inferred_type: None,
6595 }),
6596 start_expr.clone(),
6597 end_expr.clone(),
6598 ],
6599 )));
6600 // DATEDIFF(...) + 1
6601 let datediff_plus_one = Expression::Add(Box::new(BinaryOp {
6602 left: datediff,
6603 right: Expression::Literal(Box::new(Literal::Number("1".to_string()))),
6604 left_comments: vec![],
6605 operator_comments: vec![],
6606 trailing_comments: vec![],
6607 inferred_type: None,
6608 }));
6609
6610 let array_gen_range = Expression::Function(Box::new(Function::new(
6611 "ARRAY_GENERATE_RANGE".to_string(),
6612 vec![
6613 Expression::Literal(Box::new(Literal::Number("0".to_string()))),
6614 datediff_plus_one,
6615 ],
6616 )));
6617
6618 let flatten_input = Expression::NamedArgument(Box::new(NamedArgument {
6619 name: Identifier::new("INPUT"),
6620 value: array_gen_range,
6621 separator: crate::expressions::NamedArgSeparator::DArrow,
6622 }));
6623 let flatten = Expression::Function(Box::new(Function::new(
6624 "FLATTEN".to_string(),
6625 vec![flatten_input],
6626 )));
6627
6628 let table_func =
6629 Expression::Function(Box::new(Function::new("TABLE".to_string(), vec![flatten])));
6630 let flatten_aliased = Expression::Alias(Box::new(Alias {
6631 this: table_func,
6632 alias: Identifier::new("_t0"),
6633 column_aliases: vec![
6634 Identifier::new("seq"),
6635 Identifier::new("key"),
6636 Identifier::new("path"),
6637 Identifier::new("index"),
6638 Identifier::new(col_name),
6639 Identifier::new("this"),
6640 ],
6641 alias_explicit_as: false,
6642 alias_keyword: None,
6643 pre_alias_comments: vec![],
6644 trailing_comments: vec![],
6645 inferred_type: None,
6646 }));
6647
6648 let dateadd_expr = Expression::Function(Box::new(Function::new(
6649 "DATEADD".to_string(),
6650 vec![
6651 Expression::boxed_column(Column {
6652 name: Identifier::new(&unit_str),
6653 table: None,
6654 join_mark: false,
6655 trailing_comments: vec![],
6656 span: None,
6657 inferred_type: None,
6658 }),
6659 Expression::Cast(Box::new(Cast {
6660 this: Expression::boxed_column(Column {
6661 name: Identifier::new(col_name),
6662 table: None,
6663 join_mark: false,
6664 trailing_comments: vec![],
6665 span: None,
6666 inferred_type: None,
6667 }),
6668 to: DataType::Int {
6669 length: None,
6670 integer_spelling: false,
6671 },
6672 trailing_comments: vec![],
6673 double_colon_syntax: false,
6674 format: None,
6675 default: None,
6676 inferred_type: None,
6677 })),
6678 start_expr.clone(),
6679 ],
6680 )));
6681 let dateadd_aliased = Expression::Alias(Box::new(Alias {
6682 this: dateadd_expr,
6683 alias: Identifier::new(col_name),
6684 column_aliases: vec![],
6685 alias_explicit_as: false,
6686 alias_keyword: None,
6687 pre_alias_comments: vec![],
6688 trailing_comments: vec![],
6689 inferred_type: None,
6690 }));
6691
6692 // Inner SELECT: SELECT DATEADD(...) AS value FROM TABLE(FLATTEN(...)) AS _t0(...)
6693 let mut inner_select = Select::new();
6694 inner_select.expressions = vec![dateadd_aliased];
6695 inner_select.from = Some(From {
6696 expressions: vec![flatten_aliased],
6697 });
6698
6699 // Wrap in subquery for the inner part
6700 let inner_subquery = Expression::Subquery(Box::new(Subquery {
6701 this: Expression::Select(Box::new(inner_select)),
6702 alias: None,
6703 column_aliases: vec![],
6704 alias_explicit_as: false,
6705 alias_keyword: None,
6706 order_by: None,
6707 limit: None,
6708 offset: None,
6709 distribute_by: None,
6710 sort_by: None,
6711 cluster_by: None,
6712 lateral: false,
6713 modifiers_inside: false,
6714 trailing_comments: vec![],
6715 inferred_type: None,
6716 }));
6717
6718 // Outer: SELECT ARRAY_AGG(*) FROM (inner_subquery)
6719 let star = Expression::Star(Star {
6720 table: None,
6721 except: None,
6722 replace: None,
6723 rename: None,
6724 trailing_comments: vec![],
6725 span: None,
6726 });
6727 let array_agg = Expression::ArrayAgg(Box::new(AggFunc {
6728 this: star,
6729 distinct: false,
6730 filter: None,
6731 order_by: vec![],
6732 name: Some("ARRAY_AGG".to_string()),
6733 ignore_nulls: None,
6734 having_max: None,
6735 limit: None,
6736 inferred_type: None,
6737 }));
6738
6739 let mut outer_select = Select::new();
6740 outer_select.expressions = vec![array_agg];
6741 outer_select.from = Some(From {
6742 expressions: vec![inner_subquery],
6743 });
6744
6745 // Wrap in a subquery
6746 let outer_subquery = Expression::Subquery(Box::new(Subquery {
6747 this: Expression::Select(Box::new(outer_select)),
6748 alias: None,
6749 column_aliases: vec![],
6750 alias_explicit_as: false,
6751 alias_keyword: None,
6752 order_by: None,
6753 limit: None,
6754 offset: None,
6755 distribute_by: None,
6756 sort_by: None,
6757 cluster_by: None,
6758 lateral: false,
6759 modifiers_inside: false,
6760 trailing_comments: vec![],
6761 inferred_type: None,
6762 }));
6763
6764 // ARRAY_SIZE(subquery)
6765 Ok(Expression::ArraySize(Box::new(UnaryFunc::new(
6766 outer_subquery,
6767 ))))
6768 }
6769
6770 /// Extract interval unit string from an optional step expression.
6771 fn extract_interval_unit_str(step: &Option<Expression>) -> Option<String> {
6772 use crate::expressions::*;
6773 if let Some(Expression::Interval(ref iv)) = step {
6774 if let Some(IntervalUnitSpec::Simple { ref unit, .. }) = iv.unit {
6775 return Some(format!("{:?}", unit).to_ascii_uppercase());
6776 }
6777 if let Some(ref this) = iv.this {
6778 if let Expression::Literal(lit) = this {
6779 if let Literal::String(ref s) = lit.as_ref() {
6780 let parts: Vec<&str> = s.split_whitespace().collect();
6781 if parts.len() == 2 {
6782 return Some(parts[1].to_ascii_uppercase());
6783 } else if parts.len() == 1 {
6784 let upper = parts[0].to_ascii_uppercase();
6785 if matches!(
6786 upper.as_str(),
6787 "YEAR"
6788 | "QUARTER"
6789 | "MONTH"
6790 | "WEEK"
6791 | "DAY"
6792 | "HOUR"
6793 | "MINUTE"
6794 | "SECOND"
6795 ) {
6796 return Some(upper);
6797 }
6798 }
6799 }
6800 }
6801 }
6802 }
6803 // Default to DAY if no step or no interval
6804 if step.is_none() {
6805 return Some("DAY".to_string());
6806 }
6807 None
6808 }
6809
6810 fn normalize_snowflake_pretty(mut sql: String) -> String {
6811 if sql.contains("LATERAL IFF(_u.pos = _u_2.pos_2, _u_2.entity, NULL) AS datasource(SEQ, KEY, PATH, INDEX, VALUE, THIS)")
6812 && sql.contains("ARRAY_GENERATE_RANGE(0, (GREATEST(ARRAY_SIZE(INPUT => PARSE_JSON(flags))) - 1) + 1)")
6813 {
6814 sql = sql.replace(
6815 "AND uc.user_id <> ALL (SELECT DISTINCT\n _id\n FROM users, LATERAL IFF(_u.pos = _u_2.pos_2, _u_2.entity, NULL) AS datasource(SEQ, KEY, PATH, INDEX, VALUE, THIS)\n WHERE\n GET_PATH(datasource.value, 'name') = 'something')",
6816 "AND uc.user_id <> ALL (\n SELECT DISTINCT\n _id\n FROM users, LATERAL IFF(_u.pos = _u_2.pos_2, _u_2.entity, NULL) AS datasource(SEQ, KEY, PATH, INDEX, VALUE, THIS)\n WHERE\n GET_PATH(datasource.value, 'name') = 'something'\n )",
6817 );
6818
6819 sql = sql.replace(
6820 "CROSS JOIN TABLE(FLATTEN(INPUT => ARRAY_GENERATE_RANGE(0, (GREATEST(ARRAY_SIZE(INPUT => PARSE_JSON(flags))) - 1) + 1))) AS _u(seq, key, path, index, pos, this)",
6821 "CROSS JOIN TABLE(FLATTEN(INPUT => ARRAY_GENERATE_RANGE(0, (\n GREATEST(ARRAY_SIZE(INPUT => PARSE_JSON(flags))) - 1\n) + 1))) AS _u(seq, key, path, index, pos, this)",
6822 );
6823
6824 sql = sql.replace(
6825 "OR (_u.pos > (ARRAY_SIZE(INPUT => PARSE_JSON(flags)) - 1)\n AND _u_2.pos_2 = (ARRAY_SIZE(INPUT => PARSE_JSON(flags)) - 1))",
6826 "OR (\n _u.pos > (\n ARRAY_SIZE(INPUT => PARSE_JSON(flags)) - 1\n )\n AND _u_2.pos_2 = (\n ARRAY_SIZE(INPUT => PARSE_JSON(flags)) - 1\n )\n )",
6827 );
6828 }
6829
6830 sql
6831 }
6832
6833 /// Apply cross-dialect semantic normalizations that depend on knowing both source and target.
6834 /// This handles cases where the same syntax has different semantics across dialects.
6835 fn cross_dialect_normalize(
6836 expr: Expression,
6837 source: DialectType,
6838 target: DialectType,
6839 ) -> Result<Expression> {
6840 use crate::expressions::{
6841 AggFunc, BinaryOp, Case, Cast, ConvertTimezone, DataType, DateTimeField, DateTruncFunc,
6842 Function, Identifier, IsNull, Literal, Null, Paren,
6843 };
6844
6845 // Helper to tag which kind of transform to apply
6846 #[derive(Debug)]
6847 enum Action {
6848 None,
6849 GreatestLeastNull,
6850 ArrayGenerateRange,
6851 Div0TypedDivision,
6852 ArrayAggCollectList,
6853 ArrayAggWithinGroupFilter,
6854 ArrayAggFilter,
6855 CastTimestampToDatetime,
6856 DateTruncWrapCast,
6857 ToDateToCast,
6858 ConvertTimezoneToExpr,
6859 SetToVariable,
6860 RegexpReplaceSnowflakeToDuckDB,
6861 BigQueryFunctionNormalize,
6862 BigQuerySafeDivide,
6863 BigQueryCastType,
6864 BigQueryToHexBare, // _BQ_TO_HEX(x) with no LOWER/UPPER wrapper
6865 BigQueryToHexLower, // LOWER(_BQ_TO_HEX(x))
6866 BigQueryToHexUpper, // UPPER(_BQ_TO_HEX(x))
6867 BigQueryLastDayStripUnit, // LAST_DAY(date, MONTH) -> LAST_DAY(date)
6868 BigQueryCastFormat, // CAST(x AS type FORMAT 'fmt') -> PARSE_DATE/PARSE_TIMESTAMP etc.
6869 BigQueryAnyValueHaving, // ANY_VALUE(x HAVING MAX/MIN y) -> ARG_MAX_NULL/ARG_MIN_NULL for DuckDB
6870 BigQueryApproxQuantiles, // APPROX_QUANTILES(x, n) -> APPROX_QUANTILE(x, [quantiles]) for DuckDB
6871 GenericFunctionNormalize, // Cross-dialect function renaming (non-BigQuery sources)
6872 RegexpLikeToDuckDB, // RegexpLike -> REGEXP_MATCHES for DuckDB target
6873 EpochConvert, // Expression::Epoch -> target-specific epoch function
6874 EpochMsConvert, // Expression::EpochMs -> target-specific epoch ms function
6875 TSQLTypeNormalize, // TSQL types (MONEY, SMALLMONEY, REAL, DATETIME2) -> standard types
6876 MySQLSafeDivide, // MySQL a/b -> a / NULLIF(b, 0) with optional CAST
6877 NullsOrdering, // Add NULLS FIRST/LAST for ORDER BY
6878 AlterTableRenameStripSchema, // ALTER TABLE db.t1 RENAME TO db.t2 -> ALTER TABLE db.t1 RENAME TO t2
6879 StringAggConvert, // STRING_AGG/WITHIN GROUP -> target-specific aggregate
6880 GroupConcatConvert, // GROUP_CONCAT -> target-specific aggregate
6881 TempTableHash, // TSQL #table -> temp table normalization
6882 ArrayLengthConvert, // CARDINALITY/ARRAY_LENGTH/ARRAY_SIZE -> target-specific
6883 DatePartUnquote, // DATE_PART('month', x) -> DATE_PART(month, x) for Snowflake target
6884 NvlClearOriginal, // Clear NVL original_name for cross-dialect transpilation
6885 HiveCastToTryCast, // Hive/Spark CAST -> TRY_CAST for targets that support it
6886 XorExpand, // MySQL XOR -> (a AND NOT b) OR (NOT a AND b) for non-XOR targets
6887 CastTimestampStripTz, // CAST(x AS TIMESTAMP WITH TIME ZONE) -> CAST(x AS TIMESTAMP) for Hive/Spark
6888 JsonExtractToGetJsonObject, // JSON_EXTRACT/JSON_EXTRACT_SCALAR -> GET_JSON_OBJECT for Hive/Spark
6889 JsonExtractScalarToGetJsonObject, // JSON_EXTRACT_SCALAR -> GET_JSON_OBJECT for Hive/Spark
6890 JsonQueryValueConvert, // JsonQuery/JsonValue -> target-specific (ISNULL wrapper for TSQL, GET_JSON_OBJECT for Spark, etc.)
6891 JsonLiteralToJsonParse, // JSON 'x' -> JSON_PARSE('x') for Presto, PARSE_JSON for Snowflake; also DuckDB CAST(x AS JSON)
6892 DuckDBCastJsonToVariant, // DuckDB CAST(x AS JSON) -> CAST(x AS VARIANT) for Snowflake
6893 DuckDBTryCastJsonToTryJsonParse, // DuckDB TRY_CAST(x AS JSON) -> TRY(JSON_PARSE(x)) for Trino/Presto/Athena
6894 DuckDBJsonFuncToJsonParse, // DuckDB json(x) -> JSON_PARSE(x) for Trino/Presto/Athena
6895 DuckDBJsonValidToIsJson, // DuckDB json_valid(x) -> x IS JSON for Trino/Presto/Athena
6896 ArraySyntaxConvert, // ARRAY[x] -> ARRAY(x) for Spark, [x] for BigQuery/DuckDB
6897 AtTimeZoneConvert, // AT TIME ZONE -> AT_TIMEZONE (Presto) / FROM_UTC_TIMESTAMP (Spark)
6898 DayOfWeekConvert, // DAY_OF_WEEK -> dialect-specific
6899 MaxByMinByConvert, // MAX_BY/MIN_BY -> argMax/argMin for ClickHouse
6900 ArrayAggToCollectList, // ARRAY_AGG(x ORDER BY ...) -> COLLECT_LIST(x) for Hive/Spark
6901 ArrayAggToGroupConcat, // ARRAY_AGG(x) -> GROUP_CONCAT(x) for MySQL-like targets
6902 ElementAtConvert, // ELEMENT_AT(arr, idx) -> arr[idx] for PostgreSQL, arr[SAFE_ORDINAL(idx)] for BigQuery
6903 CurrentUserParens, // CURRENT_USER -> CURRENT_USER() for Snowflake
6904 CastToJsonForSpark, // CAST(x AS JSON) -> TO_JSON(x) for Spark
6905 CastJsonToFromJson, // CAST(JSON_PARSE(literal) AS ARRAY/MAP) -> FROM_JSON(literal, type_string)
6906 ToJsonConvert, // TO_JSON(x) -> JSON_FORMAT(CAST(x AS JSON)) for Presto etc.
6907 ArrayAggNullFilter, // ARRAY_AGG(x) FILTER(WHERE cond) -> add AND NOT x IS NULL for DuckDB
6908 ArrayAggIgnoreNullsDuckDB, // ARRAY_AGG(x IGNORE NULLS ORDER BY ...) -> ARRAY_AGG(x ORDER BY a NULLS FIRST, ...) for DuckDB
6909 BigQueryPercentileContToDuckDB, // PERCENTILE_CONT(x, frac RESPECT NULLS) -> QUANTILE_CONT(x, frac) for DuckDB
6910 BigQueryArraySelectAsStructToSnowflake, // ARRAY(SELECT AS STRUCT ...) -> (SELECT ARRAY_AGG(OBJECT_CONSTRUCT(...)))
6911 CountDistinctMultiArg, // COUNT(DISTINCT a, b) -> COUNT(DISTINCT CASE WHEN ... END)
6912 VarianceToClickHouse, // Expression::Variance -> varSamp for ClickHouse
6913 StddevToClickHouse, // Expression::Stddev -> stddevSamp for ClickHouse
6914 ApproxQuantileConvert, // Expression::ApproxQuantile -> APPROX_PERCENTILE for Snowflake
6915 ArrayIndexConvert, // array[1] -> array[0] for BigQuery (1-based to 0-based)
6916 DollarParamConvert, // $foo -> @foo for BigQuery
6917 TablesampleReservoir, // TABLESAMPLE (n ROWS) -> TABLESAMPLE RESERVOIR (n ROWS) for DuckDB
6918 BitAggFloatCast, // BIT_OR/BIT_AND/BIT_XOR float arg -> CAST(ROUND(CAST(arg)) AS INT) for DuckDB
6919 BitAggSnowflakeRename, // BIT_OR -> BITORAGG, BIT_AND -> BITANDAGG etc. for Snowflake
6920 StrftimeCastTimestamp, // CAST TIMESTAMP -> TIMESTAMP_NTZ for Spark in STRFTIME
6921 AnyValueIgnoreNulls, // ANY_VALUE(x) -> ANY_VALUE(x) IGNORE NULLS for Spark
6922 CreateTableStripComment, // Strip COMMENT column constraint, USING, PARTITIONED BY for DuckDB
6923 EscapeStringNormalize, // e'Hello\nworld' literal newline -> \n
6924 AnyToExists, // PostgreSQL x <op> ANY(array) -> EXISTS(array, x -> ...)
6925 ArrayConcatBracketConvert, // [1,2] -> ARRAY[1,2] for PostgreSQL in ARRAY_CAT
6926 SnowflakeIntervalFormat, // INTERVAL '2' HOUR -> INTERVAL '2 HOUR' for Snowflake
6927 AlterTableToSpRename, // ALTER TABLE RENAME -> EXEC sp_rename for TSQL
6928 StraightJoinCase, // STRAIGHT_JOIN -> straight_join for DuckDB
6929 RespectNullsConvert, // RESPECT NULLS window function handling
6930 MysqlNullsOrdering, // MySQL doesn't support NULLS ordering
6931 MysqlNullsLastRewrite, // Add CASE WHEN to ORDER BY for DuckDB -> MySQL (NULLS LAST simulation)
6932 BigQueryNullsOrdering, // BigQuery doesn't support NULLS FIRST/LAST - strip
6933 SnowflakeFloatProtect, // Protect FLOAT from being converted to DOUBLE by Snowflake target transform
6934 JsonToGetPath, // JSON arrow -> GET_PATH/PARSE_JSON for Snowflake
6935 FilterToIff, // FILTER(WHERE) -> IFF wrapping for Snowflake
6936 AggFilterToIff, // AggFunc.filter -> IFF wrapping for Snowflake (e.g., AVG(x) FILTER(WHERE cond))
6937 StructToRow, // DuckDB struct -> Presto ROW / BigQuery STRUCT
6938 SparkStructConvert, // Spark STRUCT(x AS col1, ...) -> ROW/DuckDB struct
6939 DecimalDefaultPrecision, // DECIMAL -> DECIMAL(18, 3) for Snowflake in BIT agg
6940 ApproxCountDistinctToApproxDistinct, // APPROX_COUNT_DISTINCT -> APPROX_DISTINCT for Presto/Trino
6941 CollectListToArrayAgg, // COLLECT_LIST -> ARRAY_AGG for Presto/DuckDB
6942 CollectSetConvert, // COLLECT_SET -> SET_AGG/ARRAY_AGG(DISTINCT)/ARRAY_UNIQUE_AGG
6943 PercentileConvert, // PERCENTILE -> QUANTILE/APPROX_PERCENTILE
6944 CorrIsnanWrap, // CORR(a,b) -> CASE WHEN ISNAN(CORR(a,b)) THEN NULL ELSE CORR(a,b) END
6945 TruncToDateTrunc, // TRUNC(ts, unit) -> DATE_TRUNC(unit, ts)
6946 ArrayContainsConvert, // ARRAY_CONTAINS -> CONTAINS/target-specific
6947 StrPositionExpand, // StrPosition with position -> complex STRPOS expansion for Presto/DuckDB
6948 TablesampleSnowflakeStrip, // Strip method and PERCENT for Snowflake target
6949 FirstToAnyValue, // FIRST(col) IGNORE NULLS -> ANY_VALUE(col) for DuckDB
6950 MonthsBetweenConvert, // Expression::MonthsBetween -> target-specific
6951 CurrentUserSparkParens, // CURRENT_USER -> CURRENT_USER() for Spark
6952 SparkDateFuncCast, // MONTH/YEAR/DAY('str') -> MONTH/YEAR/DAY(CAST('str' AS DATE)) from Spark
6953 MapFromArraysConvert, // Expression::MapFromArrays -> MAP/OBJECT_CONSTRUCT/MAP_FROM_ARRAYS
6954 AddMonthsConvert, // Expression::AddMonths -> target-specific DATEADD/DATE_ADD
6955 PercentileContConvert, // PERCENTILE_CONT/DISC WITHIN GROUP -> APPROX_PERCENTILE/PERCENTILE_APPROX
6956 GenerateSeriesConvert, // GENERATE_SERIES -> SEQUENCE/UNNEST(SEQUENCE)/EXPLODE(SEQUENCE)
6957 ConcatCoalesceWrap, // CONCAT(a, b) -> CONCAT(COALESCE(CAST(a), ''), ...) for Presto/ClickHouse
6958 PipeConcatToConcat, // a || b -> CONCAT(CAST(a), CAST(b)) for Presto
6959 DivFuncConvert, // DIV(a, b) -> a // b for DuckDB, CAST for BigQuery
6960 JsonObjectAggConvert, // JSON_OBJECT_AGG -> JSON_GROUP_OBJECT for DuckDB
6961 JsonbExistsConvert, // JSONB_EXISTS -> JSON_EXISTS for DuckDB
6962 DateBinConvert, // DATE_BIN -> TIME_BUCKET for DuckDB
6963 MysqlCastCharToText, // MySQL CAST(x AS CHAR) -> CAST(x AS TEXT/VARCHAR/STRING) for targets
6964 SparkCastVarcharToString, // Spark CAST(x AS VARCHAR/CHAR) -> CAST(x AS STRING) for Spark targets
6965 JsonExtractToArrow, // JSON_EXTRACT(x, path) -> x -> path for SQLite/DuckDB
6966 JsonExtractToTsql, // JSON_EXTRACT/JSON_EXTRACT_SCALAR -> ISNULL(JSON_QUERY, JSON_VALUE) for TSQL
6967 JsonExtractToClickHouse, // JSON_EXTRACT/JSON_EXTRACT_SCALAR -> JSONExtractString for ClickHouse
6968 JsonExtractScalarConvert, // JSON_EXTRACT_SCALAR -> target-specific (PostgreSQL, Snowflake, SQLite)
6969 JsonPathNormalize, // Normalize JSON path format (brackets, wildcards, quotes) for various dialects
6970 MinMaxToLeastGreatest, // Multi-arg MIN(a,b,c) -> LEAST(a,b,c), MAX(a,b,c) -> GREATEST(a,b,c)
6971 ClickHouseUniqToApproxCountDistinct, // uniq(x) -> APPROX_COUNT_DISTINCT(x) for non-ClickHouse targets
6972 ClickHouseAnyToAnyValue, // any(x) -> ANY_VALUE(x) for non-ClickHouse targets
6973 OracleVarchar2ToVarchar, // VARCHAR2(N CHAR/BYTE) -> VARCHAR(N) for non-Oracle targets
6974 Nvl2Expand, // NVL2(a, b, c) -> CASE WHEN NOT a IS NULL THEN b ELSE c END
6975 IfnullToCoalesce, // IFNULL(a, b) -> COALESCE(a, b)
6976 IsAsciiConvert, // IS_ASCII(x) -> dialect-specific ASCII check
6977 StrPositionConvert, // STR_POSITION(haystack, needle[, pos]) -> dialect-specific
6978 DecodeSimplify, // DECODE with null-safe -> simple = comparison
6979 ArraySumConvert, // ARRAY_SUM -> target-specific
6980 ArraySizeConvert, // ARRAY_SIZE -> target-specific
6981 ArrayAnyConvert, // ARRAY_ANY -> target-specific
6982 CastTimestamptzToFunc, // CAST(x AS TIMESTAMPTZ) -> TIMESTAMP(x) for MySQL/StarRocks
6983 TsOrDsToDateConvert, // TS_OR_DS_TO_DATE(x[, fmt]) -> dialect-specific
6984 TsOrDsToDateStrConvert, // TS_OR_DS_TO_DATE_STR(x) -> SUBSTRING(CAST(x AS type), 1, 10)
6985 DateStrToDateConvert, // DATE_STR_TO_DATE(x) -> CAST(x AS DATE)
6986 TimeStrToDateConvert, // TIME_STR_TO_DATE(x) -> CAST(x AS DATE)
6987 TimeStrToTimeConvert, // TIME_STR_TO_TIME(x) -> CAST(x AS TIMESTAMP)
6988 DateToDateStrConvert, // DATE_TO_DATE_STR(x) -> CAST(x AS TEXT/VARCHAR/STRING)
6989 DateToDiConvert, // DATE_TO_DI(x) -> dialect-specific (CAST date to YYYYMMDD integer)
6990 DiToDateConvert, // DI_TO_DATE(x) -> dialect-specific (integer YYYYMMDD to date)
6991 TsOrDiToDiConvert, // TS_OR_DI_TO_DI(x) -> dialect-specific
6992 UnixToStrConvert, // UNIX_TO_STR(x, fmt) -> dialect-specific
6993 UnixToTimeConvert, // UNIX_TO_TIME(x) -> dialect-specific
6994 UnixToTimeStrConvert, // UNIX_TO_TIME_STR(x) -> dialect-specific
6995 TimeToUnixConvert, // TIME_TO_UNIX(x) -> dialect-specific
6996 TimeToStrConvert, // TIME_TO_STR(x, fmt) -> dialect-specific
6997 StrToUnixConvert, // STR_TO_UNIX(x, fmt) -> dialect-specific
6998 DateTruncSwapArgs, // DATE_TRUNC('unit', x) -> DATE_TRUNC(x, unit) / TRUNC(x, unit)
6999 TimestampTruncConvert, // TIMESTAMP_TRUNC(x, UNIT[, tz]) -> dialect-specific
7000 StrToDateConvert, // STR_TO_DATE(x, fmt) from Generic -> CAST(StrToTime(x,fmt) AS DATE)
7001 TsOrDsAddConvert, // TS_OR_DS_ADD(x, n, 'UNIT') from Generic -> DATE_ADD per dialect
7002 DateFromUnixDateConvert, // DATE_FROM_UNIX_DATE(n) -> DATEADD(DAY, n, '1970-01-01')
7003 TimeStrToUnixConvert, // TIME_STR_TO_UNIX(x) -> dialect-specific
7004 TimeToTimeStrConvert, // TIME_TO_TIME_STR(x) -> CAST(x AS type)
7005 CreateTableLikeToCtas, // CREATE TABLE a LIKE b -> CREATE TABLE a AS SELECT * FROM b LIMIT 0
7006 CreateTableLikeToSelectInto, // CREATE TABLE a LIKE b -> SELECT TOP 0 * INTO a FROM b AS temp
7007 CreateTableLikeToAs, // CREATE TABLE a LIKE b -> CREATE TABLE a AS b (ClickHouse)
7008 ArrayRemoveConvert, // ARRAY_REMOVE(arr, target) -> LIST_FILTER/arrayFilter/ARRAY subquery
7009 ArrayReverseConvert, // ARRAY_REVERSE(x) -> arrayReverse(x) for ClickHouse
7010 JsonKeysConvert, // JSON_KEYS -> JSON_OBJECT_KEYS/OBJECT_KEYS
7011 ParseJsonStrip, // PARSE_JSON(x) -> x (strip wrapper)
7012 ArraySizeDrill, // ARRAY_SIZE -> REPEATED_COUNT for Drill
7013 WeekOfYearToWeekIso, // WEEKOFYEAR -> WEEKISO for Snowflake cross-dialect
7014 RegexpSubstrSnowflakeToDuckDB, // REGEXP_SUBSTR(s, p, ...) -> REGEXP_EXTRACT variants for DuckDB
7015 RegexpSubstrSnowflakeIdentity, // REGEXP_SUBSTR/REGEXP_SUBSTR_ALL strip trailing group=0 for Snowflake identity
7016 RegexpSubstrAllSnowflakeToDuckDB, // REGEXP_SUBSTR_ALL(s, p, ...) -> REGEXP_EXTRACT_ALL variants for DuckDB
7017 RegexpCountSnowflakeToDuckDB, // REGEXP_COUNT(s, p, ...) -> LENGTH(REGEXP_EXTRACT_ALL(...)) for DuckDB
7018 RegexpInstrSnowflakeToDuckDB, // REGEXP_INSTR(s, p, ...) -> complex CASE expression for DuckDB
7019 RegexpReplacePositionSnowflakeToDuckDB, // REGEXP_REPLACE(s, p, r, pos, occ) -> DuckDB form
7020 RlikeSnowflakeToDuckDB, // RLIKE(a, b[, flags]) -> REGEXP_FULL_MATCH(a, b[, flags]) for DuckDB
7021 RegexpExtractAllToSnowflake, // BigQuery REGEXP_EXTRACT_ALL -> REGEXP_SUBSTR_ALL for Snowflake
7022 ArrayExceptConvert, // ARRAY_EXCEPT -> DuckDB complex CASE / Snowflake ARRAY_EXCEPT / Presto ARRAY_EXCEPT
7023 ArrayPositionSnowflakeSwap, // ARRAY_POSITION(arr, elem) -> ARRAY_POSITION(elem, arr) for Snowflake
7024 RegexpLikeExasolAnchor, // RegexpLike -> Exasol REGEXP_LIKE with .*pattern.* anchoring
7025 ArrayDistinctConvert, // ARRAY_DISTINCT -> DuckDB LIST_DISTINCT with NULL-aware CASE
7026 ArrayDistinctClickHouse, // ARRAY_DISTINCT -> arrayDistinct for ClickHouse
7027 ArrayContainsDuckDBConvert, // ARRAY_CONTAINS -> DuckDB CASE with NULL-aware check
7028 SnowflakeWindowFrameStrip, // Strip default ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING for Snowflake target
7029 SnowflakeWindowFrameAdd, // Add default ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING for non-Snowflake target
7030 SnowflakeArrayPositionToDuckDB, // ARRAY_POSITION(val, arr) -> ARRAY_POSITION(arr, val) - 1 for DuckDB
7031 }
7032
7033 // Handle SELECT INTO -> CREATE TABLE AS for DuckDB/Snowflake/etc.
7034 let expr = if matches!(source, DialectType::TSQL | DialectType::Fabric) {
7035 Self::transform_select_into(expr, source, target)
7036 } else {
7037 expr
7038 };
7039
7040 // Strip OFFSET ROWS for non-TSQL/Oracle targets
7041 let expr = if !matches!(
7042 target,
7043 DialectType::TSQL | DialectType::Oracle | DialectType::Fabric
7044 ) {
7045 if let Expression::Select(mut select) = expr {
7046 if let Some(ref mut offset) = select.offset {
7047 offset.rows = None;
7048 }
7049 Expression::Select(select)
7050 } else {
7051 expr
7052 }
7053 } else {
7054 expr
7055 };
7056
7057 // Oracle: LIMIT -> FETCH FIRST, OFFSET -> OFFSET ROWS
7058 let expr = if matches!(target, DialectType::Oracle) {
7059 if let Expression::Select(mut select) = expr {
7060 if let Some(limit) = select.limit.take() {
7061 // Convert LIMIT to FETCH FIRST n ROWS ONLY
7062 select.fetch = Some(crate::expressions::Fetch {
7063 direction: "FIRST".to_string(),
7064 count: Some(limit.this),
7065 percent: false,
7066 rows: true,
7067 with_ties: false,
7068 });
7069 }
7070 // Add ROWS to OFFSET if present
7071 if let Some(ref mut offset) = select.offset {
7072 offset.rows = Some(true);
7073 }
7074 Expression::Select(select)
7075 } else {
7076 expr
7077 }
7078 } else {
7079 expr
7080 };
7081
7082 // Handle CreateTable WITH properties transformation before recursive transforms
7083 let expr = if let Expression::CreateTable(mut ct) = expr {
7084 Self::transform_create_table_properties(&mut ct, source, target);
7085
7086 // Handle Hive-style PARTITIONED BY (col_name type, ...) -> target-specific
7087 // When the PARTITIONED BY clause contains column definitions, merge them into the
7088 // main column list and adjust the PARTITIONED BY clause for the target dialect.
7089 if matches!(
7090 source,
7091 DialectType::Hive | DialectType::Spark | DialectType::Databricks
7092 ) {
7093 let mut partition_col_names: Vec<String> = Vec::new();
7094 let mut partition_col_defs: Vec<crate::expressions::ColumnDef> = Vec::new();
7095 let mut has_col_def_partitions = false;
7096
7097 // Check if any PARTITIONED BY property contains ColumnDef expressions
7098 for prop in &ct.properties {
7099 if let Expression::PartitionedByProperty(ref pbp) = prop {
7100 if let Expression::Tuple(ref tuple) = *pbp.this {
7101 for expr in &tuple.expressions {
7102 if let Expression::ColumnDef(ref cd) = expr {
7103 has_col_def_partitions = true;
7104 partition_col_names.push(cd.name.name.clone());
7105 partition_col_defs.push(*cd.clone());
7106 }
7107 }
7108 }
7109 }
7110 }
7111
7112 if has_col_def_partitions && !matches!(target, DialectType::Hive) {
7113 // Merge partition columns into main column list
7114 for cd in partition_col_defs {
7115 ct.columns.push(cd);
7116 }
7117
7118 // Replace PARTITIONED BY property with column-name-only version
7119 ct.properties
7120 .retain(|p| !matches!(p, Expression::PartitionedByProperty(_)));
7121
7122 if matches!(
7123 target,
7124 DialectType::Presto | DialectType::Trino | DialectType::Athena
7125 ) {
7126 // Presto: WITH (PARTITIONED_BY=ARRAY['y', 'z'])
7127 let array_elements: Vec<String> = partition_col_names
7128 .iter()
7129 .map(|n| format!("'{}'", n))
7130 .collect();
7131 let array_value = format!("ARRAY[{}]", array_elements.join(", "));
7132 ct.with_properties
7133 .push(("PARTITIONED_BY".to_string(), array_value));
7134 } else if matches!(target, DialectType::Spark | DialectType::Databricks) {
7135 // Spark: PARTITIONED BY (y, z) - just column names
7136 let name_exprs: Vec<Expression> = partition_col_names
7137 .iter()
7138 .map(|n| {
7139 Expression::Column(Box::new(crate::expressions::Column {
7140 name: crate::expressions::Identifier::new(n.clone()),
7141 table: None,
7142 join_mark: false,
7143 trailing_comments: Vec::new(),
7144 span: None,
7145 inferred_type: None,
7146 }))
7147 })
7148 .collect();
7149 ct.properties.insert(
7150 0,
7151 Expression::PartitionedByProperty(Box::new(
7152 crate::expressions::PartitionedByProperty {
7153 this: Box::new(Expression::Tuple(Box::new(
7154 crate::expressions::Tuple {
7155 expressions: name_exprs,
7156 },
7157 ))),
7158 },
7159 )),
7160 );
7161 }
7162 // For DuckDB and other targets, just drop the PARTITIONED BY (already retained above)
7163 }
7164
7165 // Note: Non-ColumnDef partitions (e.g., function expressions like MONTHS(y))
7166 // are handled by transform_create_table_properties which runs first
7167 }
7168
7169 // Strip LOCATION property for Presto/Trino (not supported)
7170 if matches!(
7171 target,
7172 DialectType::Presto | DialectType::Trino | DialectType::Athena
7173 ) {
7174 ct.properties
7175 .retain(|p| !matches!(p, Expression::LocationProperty(_)));
7176 }
7177
7178 // Strip table-level constraints for Spark/Hive/Databricks
7179 // Keep PRIMARY KEY and LIKE constraints but strip TSQL-specific modifiers; remove all others
7180 if matches!(
7181 target,
7182 DialectType::Spark | DialectType::Databricks | DialectType::Hive
7183 ) {
7184 ct.constraints.retain(|c| {
7185 matches!(
7186 c,
7187 crate::expressions::TableConstraint::PrimaryKey { .. }
7188 | crate::expressions::TableConstraint::Like { .. }
7189 )
7190 });
7191 for constraint in &mut ct.constraints {
7192 if let crate::expressions::TableConstraint::PrimaryKey {
7193 columns,
7194 modifiers,
7195 ..
7196 } = constraint
7197 {
7198 // Strip ASC/DESC from column names
7199 for col in columns.iter_mut() {
7200 if col.name.ends_with(" ASC") {
7201 col.name = col.name[..col.name.len() - 4].to_string();
7202 } else if col.name.ends_with(" DESC") {
7203 col.name = col.name[..col.name.len() - 5].to_string();
7204 }
7205 }
7206 // Strip TSQL-specific modifiers
7207 modifiers.clustered = None;
7208 modifiers.with_options.clear();
7209 modifiers.on_filegroup = None;
7210 }
7211 }
7212 }
7213
7214 // Databricks: IDENTITY columns with INT/INTEGER -> BIGINT
7215 if matches!(target, DialectType::Databricks) {
7216 for col in &mut ct.columns {
7217 if col.auto_increment {
7218 if matches!(col.data_type, crate::expressions::DataType::Int { .. }) {
7219 col.data_type = crate::expressions::DataType::BigInt { length: None };
7220 }
7221 }
7222 }
7223 }
7224
7225 // Spark/Databricks: INTEGER -> INT in column definitions
7226 // Python sqlglot always outputs INT for Spark/Databricks
7227 if matches!(target, DialectType::Spark | DialectType::Databricks) {
7228 for col in &mut ct.columns {
7229 if let crate::expressions::DataType::Int {
7230 integer_spelling, ..
7231 } = &mut col.data_type
7232 {
7233 *integer_spelling = false;
7234 }
7235 }
7236 }
7237
7238 // Strip explicit NULL constraints for Hive/Spark (B INTEGER NULL -> B INTEGER)
7239 if matches!(target, DialectType::Hive | DialectType::Spark) {
7240 for col in &mut ct.columns {
7241 // If nullable is explicitly true (NULL), change to None (omit it)
7242 if col.nullable == Some(true) {
7243 col.nullable = None;
7244 }
7245 // Also remove from constraints if stored there
7246 col.constraints
7247 .retain(|c| !matches!(c, crate::expressions::ColumnConstraint::Null));
7248 }
7249 }
7250
7251 // Strip TSQL ON filegroup for non-TSQL/Fabric targets
7252 if ct.on_property.is_some()
7253 && !matches!(target, DialectType::TSQL | DialectType::Fabric)
7254 {
7255 ct.on_property = None;
7256 }
7257
7258 // Snowflake: strip ARRAY type parameters (ARRAY<INT> -> ARRAY, ARRAY<ARRAY<INT>> -> ARRAY)
7259 // Snowflake doesn't support typed arrays in DDL
7260 if matches!(target, DialectType::Snowflake) {
7261 fn strip_array_type_params(dt: &mut crate::expressions::DataType) {
7262 if let crate::expressions::DataType::Array { .. } = dt {
7263 *dt = crate::expressions::DataType::Custom {
7264 name: "ARRAY".to_string(),
7265 };
7266 }
7267 }
7268 for col in &mut ct.columns {
7269 strip_array_type_params(&mut col.data_type);
7270 }
7271 }
7272
7273 // PostgreSQL target: ensure IDENTITY columns have NOT NULL
7274 // If NOT NULL was explicit in source (present in constraint_order), preserve original order.
7275 // If NOT NULL was not explicit, add it after IDENTITY (GENERATED BY DEFAULT AS IDENTITY NOT NULL).
7276 if matches!(target, DialectType::PostgreSQL) {
7277 for col in &mut ct.columns {
7278 if col.auto_increment && !col.constraint_order.is_empty() {
7279 use crate::expressions::ConstraintType;
7280 let has_explicit_not_null = col
7281 .constraint_order
7282 .iter()
7283 .any(|ct| *ct == ConstraintType::NotNull);
7284
7285 if has_explicit_not_null {
7286 // Source had explicit NOT NULL - preserve original order
7287 // Just ensure nullable is set
7288 if col.nullable != Some(false) {
7289 col.nullable = Some(false);
7290 }
7291 } else {
7292 // Source didn't have explicit NOT NULL - build order with
7293 // AutoIncrement + NotNull first, then remaining constraints
7294 let mut new_order = Vec::new();
7295 // Put AutoIncrement (IDENTITY) first, followed by synthetic NotNull
7296 new_order.push(ConstraintType::AutoIncrement);
7297 new_order.push(ConstraintType::NotNull);
7298 // Add remaining constraints in original order (except AutoIncrement)
7299 for ct_type in &col.constraint_order {
7300 if *ct_type != ConstraintType::AutoIncrement {
7301 new_order.push(ct_type.clone());
7302 }
7303 }
7304 col.constraint_order = new_order;
7305 col.nullable = Some(false);
7306 }
7307 }
7308 }
7309 }
7310
7311 Expression::CreateTable(ct)
7312 } else {
7313 expr
7314 };
7315
7316 // Handle CreateView column stripping for Presto/Trino target
7317 let expr = if let Expression::CreateView(mut cv) = expr {
7318 // Presto/Trino: drop column list when view has a SELECT body
7319 if matches!(target, DialectType::Presto | DialectType::Trino) && !cv.columns.is_empty()
7320 {
7321 if !matches!(&cv.query, Expression::Null(_)) {
7322 cv.columns.clear();
7323 }
7324 }
7325 Expression::CreateView(cv)
7326 } else {
7327 expr
7328 };
7329
7330 // Wrap bare VALUES in CTE bodies with SELECT * FROM (...) AS _values for generic/non-Presto targets
7331 let expr = if !matches!(
7332 target,
7333 DialectType::Presto | DialectType::Trino | DialectType::Athena
7334 ) {
7335 if let Expression::Select(mut select) = expr {
7336 if let Some(ref mut with) = select.with {
7337 for cte in &mut with.ctes {
7338 if let Expression::Values(ref vals) = cte.this {
7339 // Build: SELECT * FROM (VALUES ...) AS _values
7340 let values_subquery =
7341 Expression::Subquery(Box::new(crate::expressions::Subquery {
7342 this: Expression::Values(vals.clone()),
7343 alias: Some(Identifier::new("_values".to_string())),
7344 column_aliases: Vec::new(),
7345 alias_explicit_as: false,
7346 alias_keyword: None,
7347 order_by: None,
7348 limit: None,
7349 offset: None,
7350 distribute_by: None,
7351 sort_by: None,
7352 cluster_by: None,
7353 lateral: false,
7354 modifiers_inside: false,
7355 trailing_comments: Vec::new(),
7356 inferred_type: None,
7357 }));
7358 let mut new_select = crate::expressions::Select::new();
7359 new_select.expressions =
7360 vec![Expression::Star(crate::expressions::Star {
7361 table: None,
7362 except: None,
7363 replace: None,
7364 rename: None,
7365 trailing_comments: Vec::new(),
7366 span: None,
7367 })];
7368 new_select.from = Some(crate::expressions::From {
7369 expressions: vec![values_subquery],
7370 });
7371 cte.this = Expression::Select(Box::new(new_select));
7372 }
7373 }
7374 }
7375 Expression::Select(select)
7376 } else {
7377 expr
7378 }
7379 } else {
7380 expr
7381 };
7382
7383 // PostgreSQL CREATE INDEX: add NULLS FIRST to index columns that don't have nulls ordering
7384 let expr = if matches!(target, DialectType::PostgreSQL) {
7385 if let Expression::CreateIndex(mut ci) = expr {
7386 for col in &mut ci.columns {
7387 if col.nulls_first.is_none() {
7388 col.nulls_first = Some(true);
7389 }
7390 }
7391 Expression::CreateIndex(ci)
7392 } else {
7393 expr
7394 }
7395 } else {
7396 expr
7397 };
7398
7399 transform_recursive(expr, &|e| {
7400 // BigQuery CAST(ARRAY[STRUCT(...)] AS STRUCT_TYPE[]) -> DuckDB: convert unnamed Structs to ROW()
7401 // This converts auto-named struct literals {'_0': x, '_1': y} inside typed arrays to ROW(x, y)
7402 if matches!(source, DialectType::BigQuery) && matches!(target, DialectType::DuckDB) {
7403 if let Expression::Cast(ref c) = e {
7404 // Check if this is a CAST of an array to a struct array type
7405 let is_struct_array_cast =
7406 matches!(&c.to, crate::expressions::DataType::Array { .. });
7407 if is_struct_array_cast {
7408 let has_auto_named_structs = match &c.this {
7409 Expression::Array(arr) => arr.expressions.iter().any(|elem| {
7410 if let Expression::Struct(s) = elem {
7411 s.fields.iter().all(|(name, _)| {
7412 name.as_ref().map_or(true, |n| {
7413 n.starts_with('_') && n[1..].parse::<usize>().is_ok()
7414 })
7415 })
7416 } else {
7417 false
7418 }
7419 }),
7420 Expression::ArrayFunc(arr) => arr.expressions.iter().any(|elem| {
7421 if let Expression::Struct(s) = elem {
7422 s.fields.iter().all(|(name, _)| {
7423 name.as_ref().map_or(true, |n| {
7424 n.starts_with('_') && n[1..].parse::<usize>().is_ok()
7425 })
7426 })
7427 } else {
7428 false
7429 }
7430 }),
7431 _ => false,
7432 };
7433 if has_auto_named_structs {
7434 let convert_struct_to_row = |elem: Expression| -> Expression {
7435 if let Expression::Struct(s) = elem {
7436 let row_args: Vec<Expression> =
7437 s.fields.into_iter().map(|(_, v)| v).collect();
7438 Expression::Function(Box::new(Function::new(
7439 "ROW".to_string(),
7440 row_args,
7441 )))
7442 } else {
7443 elem
7444 }
7445 };
7446 let mut c_clone = c.as_ref().clone();
7447 match &mut c_clone.this {
7448 Expression::Array(arr) => {
7449 arr.expressions = arr
7450 .expressions
7451 .drain(..)
7452 .map(convert_struct_to_row)
7453 .collect();
7454 }
7455 Expression::ArrayFunc(arr) => {
7456 arr.expressions = arr
7457 .expressions
7458 .drain(..)
7459 .map(convert_struct_to_row)
7460 .collect();
7461 }
7462 _ => {}
7463 }
7464 return Ok(Expression::Cast(Box::new(c_clone)));
7465 }
7466 }
7467 }
7468 }
7469
7470 // BigQuery SELECT AS STRUCT -> DuckDB struct literal {'key': value, ...}
7471 if matches!(source, DialectType::BigQuery) && matches!(target, DialectType::DuckDB) {
7472 if let Expression::Select(ref sel) = e {
7473 if sel.kind.as_deref() == Some("STRUCT") {
7474 let mut fields = Vec::new();
7475 for expr in &sel.expressions {
7476 match expr {
7477 Expression::Alias(a) => {
7478 fields.push((Some(a.alias.name.clone()), a.this.clone()));
7479 }
7480 Expression::Column(c) => {
7481 fields.push((Some(c.name.name.clone()), expr.clone()));
7482 }
7483 _ => {
7484 fields.push((None, expr.clone()));
7485 }
7486 }
7487 }
7488 let struct_lit =
7489 Expression::Struct(Box::new(crate::expressions::Struct { fields }));
7490 let mut new_select = sel.as_ref().clone();
7491 new_select.kind = None;
7492 new_select.expressions = vec![struct_lit];
7493 return Ok(Expression::Select(Box::new(new_select)));
7494 }
7495 }
7496 }
7497
7498 // Convert @variable -> ${variable} for Spark/Hive/Databricks
7499 if matches!(source, DialectType::TSQL | DialectType::Fabric)
7500 && matches!(
7501 target,
7502 DialectType::Spark | DialectType::Databricks | DialectType::Hive
7503 )
7504 {
7505 if let Expression::Parameter(ref p) = e {
7506 if p.style == crate::expressions::ParameterStyle::At {
7507 if let Some(ref name) = p.name {
7508 return Ok(Expression::Parameter(Box::new(
7509 crate::expressions::Parameter {
7510 name: Some(name.clone()),
7511 index: p.index,
7512 style: crate::expressions::ParameterStyle::DollarBrace,
7513 quoted: p.quoted,
7514 string_quoted: p.string_quoted,
7515 expression: None,
7516 },
7517 )));
7518 }
7519 }
7520 }
7521 // Also handle Column("@x") -> Parameter("x", DollarBrace) for TSQL vars
7522 if let Expression::Column(ref col) = e {
7523 if col.name.name.starts_with('@') && col.table.is_none() {
7524 let var_name = col.name.name.trim_start_matches('@').to_string();
7525 return Ok(Expression::Parameter(Box::new(
7526 crate::expressions::Parameter {
7527 name: Some(var_name),
7528 index: None,
7529 style: crate::expressions::ParameterStyle::DollarBrace,
7530 quoted: false,
7531 string_quoted: false,
7532 expression: None,
7533 },
7534 )));
7535 }
7536 }
7537 }
7538
7539 // Convert @variable -> variable in SET statements for Spark/Databricks
7540 if matches!(source, DialectType::TSQL | DialectType::Fabric)
7541 && matches!(target, DialectType::Spark | DialectType::Databricks)
7542 {
7543 if let Expression::SetStatement(ref s) = e {
7544 let mut new_items = s.items.clone();
7545 let mut changed = false;
7546 for item in &mut new_items {
7547 // Strip @ from the SET name (Parameter style)
7548 if let Expression::Parameter(ref p) = item.name {
7549 if p.style == crate::expressions::ParameterStyle::At {
7550 if let Some(ref name) = p.name {
7551 item.name = Expression::Identifier(Identifier::new(name));
7552 changed = true;
7553 }
7554 }
7555 }
7556 // Strip @ from the SET name (Identifier style - SET parser)
7557 if let Expression::Identifier(ref id) = item.name {
7558 if id.name.starts_with('@') {
7559 let var_name = id.name.trim_start_matches('@').to_string();
7560 item.name = Expression::Identifier(Identifier::new(&var_name));
7561 changed = true;
7562 }
7563 }
7564 // Strip @ from the SET name (Column style - alternative parsing)
7565 if let Expression::Column(ref col) = item.name {
7566 if col.name.name.starts_with('@') && col.table.is_none() {
7567 let var_name = col.name.name.trim_start_matches('@').to_string();
7568 item.name = Expression::Identifier(Identifier::new(&var_name));
7569 changed = true;
7570 }
7571 }
7572 }
7573 if changed {
7574 let mut new_set = (**s).clone();
7575 new_set.items = new_items;
7576 return Ok(Expression::SetStatement(Box::new(new_set)));
7577 }
7578 }
7579 }
7580
7581 // Strip NOLOCK hint for non-TSQL targets
7582 if matches!(source, DialectType::TSQL | DialectType::Fabric)
7583 && !matches!(target, DialectType::TSQL | DialectType::Fabric)
7584 {
7585 if let Expression::Table(ref tr) = e {
7586 if !tr.hints.is_empty() {
7587 let mut new_tr = tr.clone();
7588 new_tr.hints.clear();
7589 return Ok(Expression::Table(new_tr));
7590 }
7591 }
7592 }
7593
7594 // Snowflake: TRUE IS TRUE -> TRUE, FALSE IS FALSE -> FALSE
7595 // Snowflake simplifies IS TRUE/IS FALSE on boolean literals
7596 if matches!(target, DialectType::Snowflake) {
7597 if let Expression::IsTrue(ref itf) = e {
7598 if let Expression::Boolean(ref b) = itf.this {
7599 if !itf.not {
7600 return Ok(Expression::Boolean(crate::expressions::BooleanLiteral {
7601 value: b.value,
7602 }));
7603 } else {
7604 return Ok(Expression::Boolean(crate::expressions::BooleanLiteral {
7605 value: !b.value,
7606 }));
7607 }
7608 }
7609 }
7610 if let Expression::IsFalse(ref itf) = e {
7611 if let Expression::Boolean(ref b) = itf.this {
7612 if !itf.not {
7613 return Ok(Expression::Boolean(crate::expressions::BooleanLiteral {
7614 value: !b.value,
7615 }));
7616 } else {
7617 return Ok(Expression::Boolean(crate::expressions::BooleanLiteral {
7618 value: b.value,
7619 }));
7620 }
7621 }
7622 }
7623 }
7624
7625 // BigQuery: split dotted backtick identifiers in table names
7626 // e.g., `a.b.c` -> "a"."b"."c" when source is BigQuery and target is not BigQuery
7627 if matches!(source, DialectType::BigQuery) && !matches!(target, DialectType::BigQuery) {
7628 if let Expression::CreateTable(ref ct) = e {
7629 let mut changed = false;
7630 let mut new_ct = ct.clone();
7631 // Split the table name
7632 if ct.name.schema.is_none() && ct.name.name.name.contains('.') {
7633 let parts: Vec<&str> = ct.name.name.name.split('.').collect();
7634 // Use quoted identifiers when the original was quoted (backtick in BigQuery)
7635 let was_quoted = ct.name.name.quoted;
7636 let mk_id = |s: &str| {
7637 if was_quoted {
7638 Identifier::quoted(s)
7639 } else {
7640 Identifier::new(s)
7641 }
7642 };
7643 if parts.len() == 3 {
7644 new_ct.name.catalog = Some(mk_id(parts[0]));
7645 new_ct.name.schema = Some(mk_id(parts[1]));
7646 new_ct.name.name = mk_id(parts[2]);
7647 changed = true;
7648 } else if parts.len() == 2 {
7649 new_ct.name.schema = Some(mk_id(parts[0]));
7650 new_ct.name.name = mk_id(parts[1]);
7651 changed = true;
7652 }
7653 }
7654 // Split the clone source name
7655 if let Some(ref clone_src) = ct.clone_source {
7656 if clone_src.schema.is_none() && clone_src.name.name.contains('.') {
7657 let parts: Vec<&str> = clone_src.name.name.split('.').collect();
7658 let was_quoted = clone_src.name.quoted;
7659 let mk_id = |s: &str| {
7660 if was_quoted {
7661 Identifier::quoted(s)
7662 } else {
7663 Identifier::new(s)
7664 }
7665 };
7666 let mut new_src = clone_src.clone();
7667 if parts.len() == 3 {
7668 new_src.catalog = Some(mk_id(parts[0]));
7669 new_src.schema = Some(mk_id(parts[1]));
7670 new_src.name = mk_id(parts[2]);
7671 new_ct.clone_source = Some(new_src);
7672 changed = true;
7673 } else if parts.len() == 2 {
7674 new_src.schema = Some(mk_id(parts[0]));
7675 new_src.name = mk_id(parts[1]);
7676 new_ct.clone_source = Some(new_src);
7677 changed = true;
7678 }
7679 }
7680 }
7681 if changed {
7682 return Ok(Expression::CreateTable(new_ct));
7683 }
7684 }
7685 }
7686
7687 // BigQuery array subscript: a[1], b[OFFSET(1)], c[ORDINAL(1)], d[SAFE_OFFSET(1)], e[SAFE_ORDINAL(1)]
7688 // -> DuckDB/Presto: convert 0-based to 1-based, handle SAFE_* -> ELEMENT_AT for Presto
7689 if matches!(source, DialectType::BigQuery)
7690 && matches!(
7691 target,
7692 DialectType::DuckDB
7693 | DialectType::Presto
7694 | DialectType::Trino
7695 | DialectType::Athena
7696 )
7697 {
7698 if let Expression::Subscript(ref sub) = e {
7699 let (new_index, is_safe) = match &sub.index {
7700 // a[1] -> a[1+1] = a[2] (plain index is 0-based in BQ)
7701 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(_)) => {
7702 let Literal::Number(n) = lit.as_ref() else {
7703 unreachable!()
7704 };
7705 if let Ok(val) = n.parse::<i64>() {
7706 (
7707 Some(Expression::Literal(Box::new(Literal::Number(
7708 (val + 1).to_string(),
7709 )))),
7710 false,
7711 )
7712 } else {
7713 (None, false)
7714 }
7715 }
7716 // OFFSET(n) -> n+1 (0-based)
7717 Expression::Function(ref f)
7718 if f.name.eq_ignore_ascii_case("OFFSET") && f.args.len() == 1 =>
7719 {
7720 if let Expression::Literal(lit) = &f.args[0] {
7721 if let Literal::Number(n) = lit.as_ref() {
7722 if let Ok(val) = n.parse::<i64>() {
7723 (
7724 Some(Expression::Literal(Box::new(Literal::Number(
7725 (val + 1).to_string(),
7726 )))),
7727 false,
7728 )
7729 } else {
7730 (
7731 Some(Expression::Add(Box::new(
7732 crate::expressions::BinaryOp::new(
7733 f.args[0].clone(),
7734 Expression::number(1),
7735 ),
7736 ))),
7737 false,
7738 )
7739 }
7740 } else {
7741 (None, false)
7742 }
7743 } else {
7744 (
7745 Some(Expression::Add(Box::new(
7746 crate::expressions::BinaryOp::new(
7747 f.args[0].clone(),
7748 Expression::number(1),
7749 ),
7750 ))),
7751 false,
7752 )
7753 }
7754 }
7755 // ORDINAL(n) -> n (already 1-based)
7756 Expression::Function(ref f)
7757 if f.name.eq_ignore_ascii_case("ORDINAL") && f.args.len() == 1 =>
7758 {
7759 (Some(f.args[0].clone()), false)
7760 }
7761 // SAFE_OFFSET(n) -> n+1 (0-based, safe)
7762 Expression::Function(ref f)
7763 if f.name.eq_ignore_ascii_case("SAFE_OFFSET") && f.args.len() == 1 =>
7764 {
7765 if let Expression::Literal(lit) = &f.args[0] {
7766 if let Literal::Number(n) = lit.as_ref() {
7767 if let Ok(val) = n.parse::<i64>() {
7768 (
7769 Some(Expression::Literal(Box::new(Literal::Number(
7770 (val + 1).to_string(),
7771 )))),
7772 true,
7773 )
7774 } else {
7775 (
7776 Some(Expression::Add(Box::new(
7777 crate::expressions::BinaryOp::new(
7778 f.args[0].clone(),
7779 Expression::number(1),
7780 ),
7781 ))),
7782 true,
7783 )
7784 }
7785 } else {
7786 (None, false)
7787 }
7788 } else {
7789 (
7790 Some(Expression::Add(Box::new(
7791 crate::expressions::BinaryOp::new(
7792 f.args[0].clone(),
7793 Expression::number(1),
7794 ),
7795 ))),
7796 true,
7797 )
7798 }
7799 }
7800 // SAFE_ORDINAL(n) -> n (already 1-based, safe)
7801 Expression::Function(ref f)
7802 if f.name.eq_ignore_ascii_case("SAFE_ORDINAL") && f.args.len() == 1 =>
7803 {
7804 (Some(f.args[0].clone()), true)
7805 }
7806 _ => (None, false),
7807 };
7808 if let Some(idx) = new_index {
7809 if is_safe
7810 && matches!(
7811 target,
7812 DialectType::Presto | DialectType::Trino | DialectType::Athena
7813 )
7814 {
7815 // Presto: SAFE_OFFSET/SAFE_ORDINAL -> ELEMENT_AT(arr, idx)
7816 return Ok(Expression::Function(Box::new(Function::new(
7817 "ELEMENT_AT".to_string(),
7818 vec![sub.this.clone(), idx],
7819 ))));
7820 } else {
7821 // DuckDB or non-safe: just use subscript with converted index
7822 return Ok(Expression::Subscript(Box::new(
7823 crate::expressions::Subscript {
7824 this: sub.this.clone(),
7825 index: idx,
7826 },
7827 )));
7828 }
7829 }
7830 }
7831 }
7832
7833 // BigQuery LENGTH(x) -> DuckDB CASE TYPEOF(x) WHEN 'BLOB' THEN OCTET_LENGTH(...) ELSE LENGTH(...) END
7834 if matches!(source, DialectType::BigQuery) && matches!(target, DialectType::DuckDB) {
7835 if let Expression::Length(ref uf) = e {
7836 let arg = uf.this.clone();
7837 let typeof_func = Expression::Function(Box::new(Function::new(
7838 "TYPEOF".to_string(),
7839 vec![arg.clone()],
7840 )));
7841 let blob_cast = Expression::Cast(Box::new(Cast {
7842 this: arg.clone(),
7843 to: DataType::VarBinary { length: None },
7844 trailing_comments: vec![],
7845 double_colon_syntax: false,
7846 format: None,
7847 default: None,
7848 inferred_type: None,
7849 }));
7850 let octet_length = Expression::Function(Box::new(Function::new(
7851 "OCTET_LENGTH".to_string(),
7852 vec![blob_cast],
7853 )));
7854 let text_cast = Expression::Cast(Box::new(Cast {
7855 this: arg,
7856 to: DataType::Text,
7857 trailing_comments: vec![],
7858 double_colon_syntax: false,
7859 format: None,
7860 default: None,
7861 inferred_type: None,
7862 }));
7863 let length_text = Expression::Length(Box::new(crate::expressions::UnaryFunc {
7864 this: text_cast,
7865 original_name: None,
7866 inferred_type: None,
7867 }));
7868 return Ok(Expression::Case(Box::new(Case {
7869 operand: Some(typeof_func),
7870 whens: vec![(
7871 Expression::Literal(Box::new(Literal::String("BLOB".to_string()))),
7872 octet_length,
7873 )],
7874 else_: Some(length_text),
7875 comments: Vec::new(),
7876 inferred_type: None,
7877 })));
7878 }
7879 }
7880
7881 // BigQuery UNNEST alias handling (only for non-BigQuery sources):
7882 // UNNEST(...) AS x -> UNNEST(...) (drop unused table alias)
7883 // UNNEST(...) AS x(y) -> UNNEST(...) AS y (use column alias as main alias)
7884 if matches!(target, DialectType::BigQuery) && !matches!(source, DialectType::BigQuery) {
7885 if let Expression::Alias(ref a) = e {
7886 if matches!(&a.this, Expression::Unnest(_)) {
7887 if a.column_aliases.is_empty() {
7888 // Drop the entire alias, return just the UNNEST expression
7889 return Ok(a.this.clone());
7890 } else {
7891 // Use first column alias as the main alias
7892 let mut new_alias = a.as_ref().clone();
7893 new_alias.alias = a.column_aliases[0].clone();
7894 new_alias.column_aliases.clear();
7895 return Ok(Expression::Alias(Box::new(new_alias)));
7896 }
7897 }
7898 }
7899 }
7900
7901 // BigQuery IN UNNEST(expr) -> IN (SELECT UNNEST/EXPLODE(expr)) for non-BigQuery targets
7902 if matches!(source, DialectType::BigQuery) && !matches!(target, DialectType::BigQuery) {
7903 if let Expression::In(ref in_expr) = e {
7904 if let Some(ref unnest_inner) = in_expr.unnest {
7905 // Build the function call for the target dialect
7906 let func_expr = if matches!(
7907 target,
7908 DialectType::Hive | DialectType::Spark | DialectType::Databricks
7909 ) {
7910 // Use EXPLODE for Hive/Spark
7911 Expression::Function(Box::new(Function::new(
7912 "EXPLODE".to_string(),
7913 vec![*unnest_inner.clone()],
7914 )))
7915 } else {
7916 // Use UNNEST for Presto/Trino/DuckDB/etc.
7917 Expression::Unnest(Box::new(crate::expressions::UnnestFunc {
7918 this: *unnest_inner.clone(),
7919 expressions: Vec::new(),
7920 with_ordinality: false,
7921 alias: None,
7922 offset_alias: None,
7923 }))
7924 };
7925
7926 // Wrap in SELECT
7927 let mut inner_select = crate::expressions::Select::new();
7928 inner_select.expressions = vec![func_expr];
7929
7930 let subquery_expr = Expression::Select(Box::new(inner_select));
7931
7932 return Ok(Expression::In(Box::new(crate::expressions::In {
7933 this: in_expr.this.clone(),
7934 expressions: Vec::new(),
7935 query: Some(subquery_expr),
7936 not: in_expr.not,
7937 global: in_expr.global,
7938 unnest: None,
7939 is_field: false,
7940 })));
7941 }
7942 }
7943 }
7944
7945 // SQLite: GENERATE_SERIES AS t(i) -> (SELECT value AS i FROM GENERATE_SERIES(...)) AS t
7946 // This handles the subquery wrapping for RANGE -> GENERATE_SERIES in FROM context
7947 if matches!(target, DialectType::SQLite) && matches!(source, DialectType::DuckDB) {
7948 if let Expression::Alias(ref a) = e {
7949 if let Expression::Function(ref f) = a.this {
7950 if f.name.eq_ignore_ascii_case("GENERATE_SERIES")
7951 && !a.column_aliases.is_empty()
7952 {
7953 // Build: (SELECT value AS col_alias FROM GENERATE_SERIES(start, end)) AS table_alias
7954 let col_alias = a.column_aliases[0].clone();
7955 let mut inner_select = crate::expressions::Select::new();
7956 inner_select.expressions =
7957 vec![Expression::Alias(Box::new(crate::expressions::Alias::new(
7958 Expression::Identifier(Identifier::new("value".to_string())),
7959 col_alias,
7960 )))];
7961 inner_select.from = Some(crate::expressions::From {
7962 expressions: vec![a.this.clone()],
7963 });
7964 let subquery =
7965 Expression::Subquery(Box::new(crate::expressions::Subquery {
7966 this: Expression::Select(Box::new(inner_select)),
7967 alias: Some(a.alias.clone()),
7968 column_aliases: Vec::new(),
7969 alias_explicit_as: false,
7970 alias_keyword: None,
7971 order_by: None,
7972 limit: None,
7973 offset: None,
7974 lateral: false,
7975 modifiers_inside: false,
7976 trailing_comments: Vec::new(),
7977 distribute_by: None,
7978 sort_by: None,
7979 cluster_by: None,
7980 inferred_type: None,
7981 }));
7982 return Ok(subquery);
7983 }
7984 }
7985 }
7986 }
7987
7988 // BigQuery implicit UNNEST: comma-join on array path -> CROSS JOIN UNNEST
7989 // e.g., SELECT results FROM Coordinates, Coordinates.position AS results
7990 // -> SELECT results FROM Coordinates CROSS JOIN UNNEST(Coordinates.position) AS results
7991 if matches!(source, DialectType::BigQuery) {
7992 if let Expression::Select(ref s) = e {
7993 if let Some(ref from) = s.from {
7994 if from.expressions.len() >= 2 {
7995 // Collect table names from first expression
7996 let first_tables: Vec<String> = from
7997 .expressions
7998 .iter()
7999 .take(1)
8000 .filter_map(|expr| {
8001 if let Expression::Table(t) = expr {
8002 Some(t.name.name.to_ascii_lowercase())
8003 } else {
8004 None
8005 }
8006 })
8007 .collect();
8008
8009 // Check if any subsequent FROM expressions are schema-qualified with a matching table name
8010 // or have a dotted name matching a table
8011 let mut needs_rewrite = false;
8012 for expr in from.expressions.iter().skip(1) {
8013 if let Expression::Table(t) = expr {
8014 if let Some(ref schema) = t.schema {
8015 if first_tables.contains(&schema.name.to_ascii_lowercase())
8016 {
8017 needs_rewrite = true;
8018 break;
8019 }
8020 }
8021 // Also check dotted names in quoted identifiers (e.g., `Coordinates.position`)
8022 if t.schema.is_none() && t.name.name.contains('.') {
8023 let parts: Vec<&str> = t.name.name.split('.').collect();
8024 if parts.len() >= 2
8025 && first_tables.contains(&parts[0].to_ascii_lowercase())
8026 {
8027 needs_rewrite = true;
8028 break;
8029 }
8030 }
8031 }
8032 }
8033
8034 if needs_rewrite {
8035 let mut new_select = s.clone();
8036 let mut new_from_exprs = vec![from.expressions[0].clone()];
8037 let mut new_joins = s.joins.clone();
8038
8039 for expr in from.expressions.iter().skip(1) {
8040 if let Expression::Table(ref t) = expr {
8041 if let Some(ref schema) = t.schema {
8042 if first_tables
8043 .contains(&schema.name.to_ascii_lowercase())
8044 {
8045 // This is an array path reference, convert to CROSS JOIN UNNEST
8046 let col_expr = Expression::Column(Box::new(
8047 crate::expressions::Column {
8048 name: t.name.clone(),
8049 table: Some(schema.clone()),
8050 join_mark: false,
8051 trailing_comments: vec![],
8052 span: None,
8053 inferred_type: None,
8054 },
8055 ));
8056 let unnest_expr = Expression::Unnest(Box::new(
8057 crate::expressions::UnnestFunc {
8058 this: col_expr,
8059 expressions: Vec::new(),
8060 with_ordinality: false,
8061 alias: None,
8062 offset_alias: None,
8063 },
8064 ));
8065 let join_this = if let Some(ref alias) = t.alias {
8066 if matches!(
8067 target,
8068 DialectType::Presto
8069 | DialectType::Trino
8070 | DialectType::Athena
8071 ) {
8072 // Presto: UNNEST(x) AS _t0(results)
8073 Expression::Alias(Box::new(
8074 crate::expressions::Alias {
8075 this: unnest_expr,
8076 alias: Identifier::new("_t0"),
8077 column_aliases: vec![alias.clone()],
8078 alias_explicit_as: false,
8079 alias_keyword: None,
8080 pre_alias_comments: vec![],
8081 trailing_comments: vec![],
8082 inferred_type: None,
8083 },
8084 ))
8085 } else {
8086 // BigQuery: UNNEST(x) AS results
8087 Expression::Alias(Box::new(
8088 crate::expressions::Alias {
8089 this: unnest_expr,
8090 alias: alias.clone(),
8091 column_aliases: vec![],
8092 alias_explicit_as: false,
8093 alias_keyword: None,
8094 pre_alias_comments: vec![],
8095 trailing_comments: vec![],
8096 inferred_type: None,
8097 },
8098 ))
8099 }
8100 } else {
8101 unnest_expr
8102 };
8103 new_joins.push(crate::expressions::Join {
8104 kind: crate::expressions::JoinKind::Cross,
8105 this: join_this,
8106 on: None,
8107 using: Vec::new(),
8108 use_inner_keyword: false,
8109 use_outer_keyword: false,
8110 deferred_condition: false,
8111 join_hint: None,
8112 match_condition: None,
8113 pivots: Vec::new(),
8114 comments: Vec::new(),
8115 nesting_group: 0,
8116 directed: false,
8117 });
8118 } else {
8119 new_from_exprs.push(expr.clone());
8120 }
8121 } else if t.schema.is_none() && t.name.name.contains('.') {
8122 // Dotted name in quoted identifier: `Coordinates.position`
8123 let parts: Vec<&str> = t.name.name.split('.').collect();
8124 if parts.len() >= 2
8125 && first_tables
8126 .contains(&parts[0].to_ascii_lowercase())
8127 {
8128 let join_this =
8129 if matches!(target, DialectType::BigQuery) {
8130 // BigQuery: keep as single quoted identifier, just convert comma -> CROSS JOIN
8131 Expression::Table(t.clone())
8132 } else {
8133 // Other targets: split into "schema"."name"
8134 let mut new_t = t.clone();
8135 new_t.schema =
8136 Some(Identifier::quoted(parts[0]));
8137 new_t.name = Identifier::quoted(parts[1]);
8138 Expression::Table(new_t)
8139 };
8140 new_joins.push(crate::expressions::Join {
8141 kind: crate::expressions::JoinKind::Cross,
8142 this: join_this,
8143 on: None,
8144 using: Vec::new(),
8145 use_inner_keyword: false,
8146 use_outer_keyword: false,
8147 deferred_condition: false,
8148 join_hint: None,
8149 match_condition: None,
8150 pivots: Vec::new(),
8151 comments: Vec::new(),
8152 nesting_group: 0,
8153 directed: false,
8154 });
8155 } else {
8156 new_from_exprs.push(expr.clone());
8157 }
8158 } else {
8159 new_from_exprs.push(expr.clone());
8160 }
8161 } else {
8162 new_from_exprs.push(expr.clone());
8163 }
8164 }
8165
8166 new_select.from = Some(crate::expressions::From {
8167 expressions: new_from_exprs,
8168 ..from.clone()
8169 });
8170 new_select.joins = new_joins;
8171 return Ok(Expression::Select(new_select));
8172 }
8173 }
8174 }
8175 }
8176 }
8177
8178 // CROSS JOIN UNNEST -> LATERAL VIEW EXPLODE for Hive/Spark
8179 if matches!(
8180 target,
8181 DialectType::Hive | DialectType::Spark | DialectType::Databricks
8182 ) {
8183 if let Expression::Select(ref s) = e {
8184 // Check if any joins are CROSS JOIN with UNNEST/EXPLODE
8185 let is_unnest_or_explode_expr = |expr: &Expression| -> bool {
8186 matches!(expr, Expression::Unnest(_))
8187 || matches!(expr, Expression::Function(f) if f.name.eq_ignore_ascii_case("EXPLODE"))
8188 };
8189 let has_unnest_join = s.joins.iter().any(|j| {
8190 j.kind == crate::expressions::JoinKind::Cross && (
8191 matches!(&j.this, Expression::Alias(a) if is_unnest_or_explode_expr(&a.this))
8192 || is_unnest_or_explode_expr(&j.this)
8193 )
8194 });
8195 if has_unnest_join {
8196 let mut select = s.clone();
8197 let mut new_joins = Vec::new();
8198 for join in select.joins.drain(..) {
8199 if join.kind == crate::expressions::JoinKind::Cross {
8200 // Extract the UNNEST/EXPLODE from the join
8201 let (func_expr, table_alias, col_aliases) = match &join.this {
8202 Expression::Alias(a) => {
8203 let ta = if a.alias.is_empty() {
8204 None
8205 } else {
8206 Some(a.alias.clone())
8207 };
8208 let cas = a.column_aliases.clone();
8209 match &a.this {
8210 Expression::Unnest(u) => {
8211 // Multi-arg UNNEST(y, z) -> INLINE(ARRAYS_ZIP(y, z))
8212 if !u.expressions.is_empty() {
8213 let mut all_args = vec![u.this.clone()];
8214 all_args.extend(u.expressions.clone());
8215 let arrays_zip =
8216 Expression::Function(Box::new(
8217 crate::expressions::Function::new(
8218 "ARRAYS_ZIP".to_string(),
8219 all_args,
8220 ),
8221 ));
8222 let inline = Expression::Function(Box::new(
8223 crate::expressions::Function::new(
8224 "INLINE".to_string(),
8225 vec![arrays_zip],
8226 ),
8227 ));
8228 (Some(inline), ta, a.column_aliases.clone())
8229 } else {
8230 // Convert UNNEST(x) to EXPLODE(x) or POSEXPLODE(x)
8231 let func_name = if u.with_ordinality {
8232 "POSEXPLODE"
8233 } else {
8234 "EXPLODE"
8235 };
8236 let explode = Expression::Function(Box::new(
8237 crate::expressions::Function::new(
8238 func_name.to_string(),
8239 vec![u.this.clone()],
8240 ),
8241 ));
8242 // For POSEXPLODE, add 'pos' to column aliases
8243 let cas = if u.with_ordinality {
8244 let mut pos_aliases =
8245 vec![Identifier::new(
8246 "pos".to_string(),
8247 )];
8248 pos_aliases
8249 .extend(a.column_aliases.clone());
8250 pos_aliases
8251 } else {
8252 a.column_aliases.clone()
8253 };
8254 (Some(explode), ta, cas)
8255 }
8256 }
8257 Expression::Function(f)
8258 if f.name.eq_ignore_ascii_case("EXPLODE") =>
8259 {
8260 (Some(Expression::Function(f.clone())), ta, cas)
8261 }
8262 _ => (None, None, Vec::new()),
8263 }
8264 }
8265 Expression::Unnest(u) => {
8266 let func_name = if u.with_ordinality {
8267 "POSEXPLODE"
8268 } else {
8269 "EXPLODE"
8270 };
8271 let explode = Expression::Function(Box::new(
8272 crate::expressions::Function::new(
8273 func_name.to_string(),
8274 vec![u.this.clone()],
8275 ),
8276 ));
8277 let ta = u.alias.clone();
8278 let col_aliases = if u.with_ordinality {
8279 vec![Identifier::new("pos".to_string())]
8280 } else {
8281 Vec::new()
8282 };
8283 (Some(explode), ta, col_aliases)
8284 }
8285 _ => (None, None, Vec::new()),
8286 };
8287 if let Some(func) = func_expr {
8288 select.lateral_views.push(crate::expressions::LateralView {
8289 this: func,
8290 table_alias,
8291 column_aliases: col_aliases,
8292 outer: false,
8293 });
8294 } else {
8295 new_joins.push(join);
8296 }
8297 } else {
8298 new_joins.push(join);
8299 }
8300 }
8301 select.joins = new_joins;
8302 return Ok(Expression::Select(select));
8303 }
8304 }
8305 }
8306
8307 // UNNEST expansion: DuckDB SELECT UNNEST(arr) in SELECT list -> expanded query
8308 // for BigQuery, Presto/Trino, Snowflake
8309 if matches!(source, DialectType::DuckDB | DialectType::PostgreSQL)
8310 && matches!(
8311 target,
8312 DialectType::BigQuery
8313 | DialectType::Presto
8314 | DialectType::Trino
8315 | DialectType::Snowflake
8316 )
8317 {
8318 if let Expression::Select(ref s) = e {
8319 // Check if any SELECT expressions contain UNNEST
8320 // Note: UNNEST can appear as Expression::Unnest OR Expression::Function("UNNEST")
8321 let has_unnest_in_select = s.expressions.iter().any(|expr| {
8322 fn contains_unnest(e: &Expression) -> bool {
8323 match e {
8324 Expression::Unnest(_) => true,
8325 Expression::Function(f)
8326 if f.name.eq_ignore_ascii_case("UNNEST") =>
8327 {
8328 true
8329 }
8330 Expression::Alias(a) => contains_unnest(&a.this),
8331 Expression::Add(op)
8332 | Expression::Sub(op)
8333 | Expression::Mul(op)
8334 | Expression::Div(op) => {
8335 contains_unnest(&op.left) || contains_unnest(&op.right)
8336 }
8337 _ => false,
8338 }
8339 }
8340 contains_unnest(expr)
8341 });
8342
8343 if has_unnest_in_select {
8344 let rewritten = Self::rewrite_unnest_expansion(s, target);
8345 if let Some(new_select) = rewritten {
8346 return Ok(Expression::Select(Box::new(new_select)));
8347 }
8348 }
8349 }
8350 }
8351
8352 // BigQuery -> PostgreSQL: convert escape sequences in string literals to actual characters
8353 // BigQuery '\n' -> PostgreSQL literal newline in string
8354 if matches!(source, DialectType::BigQuery) && matches!(target, DialectType::PostgreSQL)
8355 {
8356 if let Expression::Literal(ref lit) = e {
8357 if let Literal::String(ref s) = lit.as_ref() {
8358 if s.contains("\\n")
8359 || s.contains("\\t")
8360 || s.contains("\\r")
8361 || s.contains("\\\\")
8362 {
8363 let converted = s
8364 .replace("\\n", "\n")
8365 .replace("\\t", "\t")
8366 .replace("\\r", "\r")
8367 .replace("\\\\", "\\");
8368 return Ok(Expression::Literal(Box::new(Literal::String(converted))));
8369 }
8370 }
8371 }
8372 }
8373
8374 // Cross-dialect: convert Literal::Timestamp to target-specific CAST form
8375 // when source != target (identity tests keep the Literal::Timestamp for native handling)
8376 if source != target {
8377 if let Expression::Literal(ref lit) = e {
8378 if let Literal::Timestamp(ref s) = lit.as_ref() {
8379 let s = s.clone();
8380 // MySQL: TIMESTAMP handling depends on source dialect
8381 // BigQuery TIMESTAMP is timezone-aware -> TIMESTAMP() function in MySQL
8382 // Other sources' TIMESTAMP is non-timezone -> CAST('x' AS DATETIME) in MySQL
8383 if matches!(target, DialectType::MySQL) {
8384 if matches!(source, DialectType::BigQuery) {
8385 // BigQuery TIMESTAMP is timezone-aware -> MySQL TIMESTAMP() function
8386 return Ok(Expression::Function(Box::new(Function::new(
8387 "TIMESTAMP".to_string(),
8388 vec![Expression::Literal(Box::new(Literal::String(s)))],
8389 ))));
8390 } else {
8391 // Non-timezone TIMESTAMP -> CAST('x' AS DATETIME) in MySQL
8392 return Ok(Expression::Cast(Box::new(Cast {
8393 this: Expression::Literal(Box::new(Literal::String(s))),
8394 to: DataType::Custom {
8395 name: "DATETIME".to_string(),
8396 },
8397 trailing_comments: Vec::new(),
8398 double_colon_syntax: false,
8399 format: None,
8400 default: None,
8401 inferred_type: None,
8402 })));
8403 }
8404 }
8405 let dt = match target {
8406 DialectType::BigQuery | DialectType::StarRocks => DataType::Custom {
8407 name: "DATETIME".to_string(),
8408 },
8409 DialectType::Snowflake => {
8410 // BigQuery TIMESTAMP is timezone-aware -> use TIMESTAMPTZ for Snowflake
8411 if matches!(source, DialectType::BigQuery) {
8412 DataType::Custom {
8413 name: "TIMESTAMPTZ".to_string(),
8414 }
8415 } else if matches!(
8416 source,
8417 DialectType::PostgreSQL
8418 | DialectType::Redshift
8419 | DialectType::Snowflake
8420 ) {
8421 DataType::Timestamp {
8422 precision: None,
8423 timezone: false,
8424 }
8425 } else {
8426 DataType::Custom {
8427 name: "TIMESTAMPNTZ".to_string(),
8428 }
8429 }
8430 }
8431 DialectType::Spark | DialectType::Databricks => {
8432 // BigQuery TIMESTAMP is timezone-aware -> use plain TIMESTAMP for Spark/Databricks
8433 if matches!(source, DialectType::BigQuery) {
8434 DataType::Timestamp {
8435 precision: None,
8436 timezone: false,
8437 }
8438 } else {
8439 DataType::Custom {
8440 name: "TIMESTAMP_NTZ".to_string(),
8441 }
8442 }
8443 }
8444 DialectType::ClickHouse => DataType::Nullable {
8445 inner: Box::new(DataType::Custom {
8446 name: "DateTime".to_string(),
8447 }),
8448 },
8449 DialectType::TSQL | DialectType::Fabric => DataType::Custom {
8450 name: "DATETIME2".to_string(),
8451 },
8452 DialectType::DuckDB => {
8453 // DuckDB: use TIMESTAMPTZ when source is BigQuery (BQ TIMESTAMP is always UTC/tz-aware)
8454 // or when the timestamp string explicitly has timezone info
8455 if matches!(source, DialectType::BigQuery)
8456 || Self::timestamp_string_has_timezone(&s)
8457 {
8458 DataType::Custom {
8459 name: "TIMESTAMPTZ".to_string(),
8460 }
8461 } else {
8462 DataType::Timestamp {
8463 precision: None,
8464 timezone: false,
8465 }
8466 }
8467 }
8468 _ => DataType::Timestamp {
8469 precision: None,
8470 timezone: false,
8471 },
8472 };
8473 return Ok(Expression::Cast(Box::new(Cast {
8474 this: Expression::Literal(Box::new(Literal::String(s))),
8475 to: dt,
8476 trailing_comments: vec![],
8477 double_colon_syntax: false,
8478 format: None,
8479 default: None,
8480 inferred_type: None,
8481 })));
8482 }
8483 }
8484 }
8485
8486 // PostgreSQL DELETE requires explicit AS for table aliases
8487 if matches!(target, DialectType::PostgreSQL | DialectType::Redshift) {
8488 if let Expression::Delete(ref del) = e {
8489 if del.alias.is_some() && !del.alias_explicit_as {
8490 let mut new_del = del.clone();
8491 new_del.alias_explicit_as = true;
8492 return Ok(Expression::Delete(new_del));
8493 }
8494 }
8495 }
8496
8497 // UNION/INTERSECT/EXCEPT DISTINCT handling:
8498 // Some dialects require explicit DISTINCT (BigQuery, ClickHouse),
8499 // while others don't support it (Presto, Spark, DuckDB, etc.)
8500 {
8501 let needs_distinct =
8502 matches!(target, DialectType::BigQuery | DialectType::ClickHouse);
8503 let drop_distinct = matches!(
8504 target,
8505 DialectType::Presto
8506 | DialectType::Trino
8507 | DialectType::Athena
8508 | DialectType::Spark
8509 | DialectType::Databricks
8510 | DialectType::DuckDB
8511 | DialectType::Hive
8512 | DialectType::MySQL
8513 | DialectType::PostgreSQL
8514 | DialectType::SQLite
8515 | DialectType::TSQL
8516 | DialectType::Redshift
8517 | DialectType::Snowflake
8518 | DialectType::Oracle
8519 | DialectType::Teradata
8520 | DialectType::Drill
8521 | DialectType::Doris
8522 | DialectType::StarRocks
8523 );
8524 match &e {
8525 Expression::Union(u) if !u.all && needs_distinct && !u.distinct => {
8526 let mut new_u = (**u).clone();
8527 new_u.distinct = true;
8528 return Ok(Expression::Union(Box::new(new_u)));
8529 }
8530 Expression::Intersect(i) if !i.all && needs_distinct && !i.distinct => {
8531 let mut new_i = (**i).clone();
8532 new_i.distinct = true;
8533 return Ok(Expression::Intersect(Box::new(new_i)));
8534 }
8535 Expression::Except(ex) if !ex.all && needs_distinct && !ex.distinct => {
8536 let mut new_ex = (**ex).clone();
8537 new_ex.distinct = true;
8538 return Ok(Expression::Except(Box::new(new_ex)));
8539 }
8540 Expression::Union(u) if u.distinct && drop_distinct => {
8541 let mut new_u = (**u).clone();
8542 new_u.distinct = false;
8543 return Ok(Expression::Union(Box::new(new_u)));
8544 }
8545 Expression::Intersect(i) if i.distinct && drop_distinct => {
8546 let mut new_i = (**i).clone();
8547 new_i.distinct = false;
8548 return Ok(Expression::Intersect(Box::new(new_i)));
8549 }
8550 Expression::Except(ex) if ex.distinct && drop_distinct => {
8551 let mut new_ex = (**ex).clone();
8552 new_ex.distinct = false;
8553 return Ok(Expression::Except(Box::new(new_ex)));
8554 }
8555 _ => {}
8556 }
8557 }
8558
8559 // ClickHouse: MAP('a', '1') -> map('a', '1') (lowercase function name)
8560 if matches!(target, DialectType::ClickHouse) {
8561 if let Expression::Function(ref f) = e {
8562 if f.name.eq_ignore_ascii_case("MAP") && !f.args.is_empty() {
8563 let mut new_f = f.as_ref().clone();
8564 new_f.name = "map".to_string();
8565 return Ok(Expression::Function(Box::new(new_f)));
8566 }
8567 }
8568 }
8569
8570 // ClickHouse: INTERSECT ALL -> INTERSECT (ClickHouse doesn't support ALL on INTERSECT)
8571 if matches!(target, DialectType::ClickHouse) {
8572 if let Expression::Intersect(ref i) = e {
8573 if i.all {
8574 let mut new_i = (**i).clone();
8575 new_i.all = false;
8576 return Ok(Expression::Intersect(Box::new(new_i)));
8577 }
8578 }
8579 }
8580
8581 // Integer division: a / b -> CAST(a AS DOUBLE) / b for dialects that need it
8582 // Only from Generic source, to prevent double-wrapping
8583 if matches!(source, DialectType::Generic) {
8584 if let Expression::Div(ref op) = e {
8585 let cast_type = match target {
8586 DialectType::TSQL | DialectType::Fabric => Some(DataType::Float {
8587 precision: None,
8588 scale: None,
8589 real_spelling: false,
8590 }),
8591 DialectType::Drill
8592 | DialectType::Trino
8593 | DialectType::Athena
8594 | DialectType::Presto => Some(DataType::Double {
8595 precision: None,
8596 scale: None,
8597 }),
8598 DialectType::PostgreSQL
8599 | DialectType::Redshift
8600 | DialectType::Materialize
8601 | DialectType::Teradata
8602 | DialectType::RisingWave => Some(DataType::Double {
8603 precision: None,
8604 scale: None,
8605 }),
8606 _ => None,
8607 };
8608 if let Some(dt) = cast_type {
8609 let cast_left = Expression::Cast(Box::new(Cast {
8610 this: op.left.clone(),
8611 to: dt,
8612 double_colon_syntax: false,
8613 trailing_comments: Vec::new(),
8614 format: None,
8615 default: None,
8616 inferred_type: None,
8617 }));
8618 let new_op = crate::expressions::BinaryOp {
8619 left: cast_left,
8620 right: op.right.clone(),
8621 left_comments: op.left_comments.clone(),
8622 operator_comments: op.operator_comments.clone(),
8623 trailing_comments: op.trailing_comments.clone(),
8624 inferred_type: None,
8625 };
8626 return Ok(Expression::Div(Box::new(new_op)));
8627 }
8628 }
8629 }
8630
8631 // CREATE DATABASE -> CREATE SCHEMA for DuckDB target
8632 if matches!(target, DialectType::DuckDB) {
8633 if let Expression::CreateDatabase(db) = e {
8634 let mut schema = crate::expressions::CreateSchema::new(db.name.name.clone());
8635 schema.if_not_exists = db.if_not_exists;
8636 return Ok(Expression::CreateSchema(Box::new(schema)));
8637 }
8638 if let Expression::DropDatabase(db) = e {
8639 let mut schema = crate::expressions::DropSchema::new(db.name.name.clone());
8640 schema.if_exists = db.if_exists;
8641 return Ok(Expression::DropSchema(Box::new(schema)));
8642 }
8643 }
8644
8645 // Strip ClickHouse Nullable(...) wrapper for non-ClickHouse targets
8646 if matches!(source, DialectType::ClickHouse)
8647 && !matches!(target, DialectType::ClickHouse)
8648 {
8649 if let Expression::Cast(ref c) = e {
8650 if let DataType::Custom { ref name } = c.to {
8651 if name.len() >= 9
8652 && name[..9].eq_ignore_ascii_case("NULLABLE(")
8653 && name.ends_with(")")
8654 {
8655 let inner = &name[9..name.len() - 1]; // strip "Nullable(" and ")"
8656 let inner_upper = inner.to_ascii_uppercase();
8657 let new_dt = match inner_upper.as_str() {
8658 "DATETIME" | "DATETIME64" => DataType::Timestamp {
8659 precision: None,
8660 timezone: false,
8661 },
8662 "DATE" => DataType::Date,
8663 "INT64" | "BIGINT" => DataType::BigInt { length: None },
8664 "INT32" | "INT" | "INTEGER" => DataType::Int {
8665 length: None,
8666 integer_spelling: false,
8667 },
8668 "FLOAT64" | "DOUBLE" => DataType::Double {
8669 precision: None,
8670 scale: None,
8671 },
8672 "STRING" => DataType::Text,
8673 _ => DataType::Custom {
8674 name: inner.to_string(),
8675 },
8676 };
8677 let mut new_cast = c.clone();
8678 new_cast.to = new_dt;
8679 return Ok(Expression::Cast(new_cast));
8680 }
8681 }
8682 }
8683 }
8684
8685 // ARRAY_CONCAT_AGG -> Snowflake: ARRAY_FLATTEN(ARRAY_AGG(...))
8686 if matches!(target, DialectType::Snowflake) {
8687 if let Expression::ArrayConcatAgg(ref agg) = e {
8688 let mut agg_clone = agg.as_ref().clone();
8689 agg_clone.name = None; // Clear name so generator uses default "ARRAY_AGG"
8690 let array_agg = Expression::ArrayAgg(Box::new(agg_clone));
8691 let flatten = Expression::Function(Box::new(Function::new(
8692 "ARRAY_FLATTEN".to_string(),
8693 vec![array_agg],
8694 )));
8695 return Ok(flatten);
8696 }
8697 }
8698
8699 // ARRAY_CONCAT_AGG -> others: keep as function for cross-dialect
8700 if !matches!(target, DialectType::BigQuery | DialectType::Snowflake) {
8701 if let Expression::ArrayConcatAgg(agg) = e {
8702 let arg = agg.this;
8703 return Ok(Expression::Function(Box::new(Function::new(
8704 "ARRAY_CONCAT_AGG".to_string(),
8705 vec![arg],
8706 ))));
8707 }
8708 }
8709
8710 // Determine what action to take by inspecting e immutably
8711 let action = {
8712 let source_propagates_nulls =
8713 matches!(source, DialectType::Snowflake | DialectType::BigQuery);
8714 let target_ignores_nulls =
8715 matches!(target, DialectType::DuckDB | DialectType::PostgreSQL);
8716
8717 match &e {
8718 Expression::Function(f) => {
8719 let name = f.name.to_ascii_uppercase();
8720 // DuckDB json(x) is a synonym for CAST(x AS JSON) — parses a string.
8721 // Map to JSON_PARSE(x) for Trino/Presto/Athena to preserve semantics.
8722 if name == "JSON"
8723 && f.args.len() == 1
8724 && matches!(source, DialectType::DuckDB)
8725 && matches!(
8726 target,
8727 DialectType::Presto | DialectType::Trino | DialectType::Athena
8728 )
8729 {
8730 Action::DuckDBJsonFuncToJsonParse
8731 // DuckDB json_valid(x) has no direct Trino equivalent; emit the
8732 // SQL:2016 `x IS JSON` predicate which has matching semantics.
8733 } else if name == "JSON_VALID"
8734 && f.args.len() == 1
8735 && matches!(source, DialectType::DuckDB)
8736 && matches!(
8737 target,
8738 DialectType::Presto | DialectType::Trino | DialectType::Athena
8739 )
8740 {
8741 Action::DuckDBJsonValidToIsJson
8742 // DATE_PART: strip quotes from first arg when target is Snowflake (source != Snowflake)
8743 } else if (name == "DATE_PART" || name == "DATEPART")
8744 && f.args.len() == 2
8745 && matches!(target, DialectType::Snowflake)
8746 && !matches!(source, DialectType::Snowflake)
8747 && matches!(
8748 &f.args[0],
8749 Expression::Literal(lit) if matches!(lit.as_ref(), crate::expressions::Literal::String(_))
8750 )
8751 {
8752 Action::DatePartUnquote
8753 } else if source_propagates_nulls
8754 && target_ignores_nulls
8755 && (name == "GREATEST" || name == "LEAST")
8756 && f.args.len() >= 2
8757 {
8758 Action::GreatestLeastNull
8759 } else if matches!(source, DialectType::Snowflake)
8760 && name == "ARRAY_GENERATE_RANGE"
8761 && f.args.len() >= 2
8762 {
8763 Action::ArrayGenerateRange
8764 } else if matches!(source, DialectType::Snowflake)
8765 && matches!(target, DialectType::DuckDB)
8766 && name == "DATE_TRUNC"
8767 && f.args.len() == 2
8768 {
8769 // Determine if DuckDB DATE_TRUNC needs CAST wrapping to preserve input type.
8770 // Logic based on Python sqlglot's input_type_preserved flag:
8771 // - DATE + non-date-unit (HOUR, MINUTE, etc.) -> wrap
8772 // - TIMESTAMP + date-unit (YEAR, QUARTER, MONTH, WEEK, DAY) -> wrap
8773 // - TIMESTAMPTZ/TIMESTAMPLTZ/TIME -> always wrap
8774 let unit_str = match &f.args[0] {
8775 Expression::Literal(lit) if matches!(lit.as_ref(), crate::expressions::Literal::String(_)) => {
8776 let crate::expressions::Literal::String(s) = lit.as_ref() else { unreachable!() };
8777 Some(s.to_ascii_uppercase())
8778 }
8779 _ => None,
8780 };
8781 let is_date_unit = unit_str.as_ref().map_or(false, |u| {
8782 matches!(u.as_str(), "YEAR" | "QUARTER" | "MONTH" | "WEEK" | "DAY")
8783 });
8784 match &f.args[1] {
8785 Expression::Cast(c) => match &c.to {
8786 DataType::Time { .. } => Action::DateTruncWrapCast,
8787 DataType::Custom { name }
8788 if name.eq_ignore_ascii_case("TIMESTAMPTZ")
8789 || name.eq_ignore_ascii_case("TIMESTAMPLTZ") =>
8790 {
8791 Action::DateTruncWrapCast
8792 }
8793 DataType::Timestamp { timezone: true, .. } => {
8794 Action::DateTruncWrapCast
8795 }
8796 DataType::Date if !is_date_unit => Action::DateTruncWrapCast,
8797 DataType::Timestamp {
8798 timezone: false, ..
8799 } if is_date_unit => Action::DateTruncWrapCast,
8800 _ => Action::None,
8801 },
8802 _ => Action::None,
8803 }
8804 } else if matches!(source, DialectType::Snowflake)
8805 && matches!(target, DialectType::DuckDB)
8806 && name == "TO_DATE"
8807 && f.args.len() == 1
8808 && !matches!(
8809 &f.args[0],
8810 Expression::Literal(lit) if matches!(lit.as_ref(), crate::expressions::Literal::String(_))
8811 )
8812 {
8813 Action::ToDateToCast
8814 } else if !matches!(source, DialectType::Redshift)
8815 && matches!(target, DialectType::Redshift)
8816 && name == "CONVERT_TIMEZONE"
8817 && (f.args.len() == 2 || f.args.len() == 3)
8818 {
8819 // Convert Function("CONVERT_TIMEZONE") to Expression::ConvertTimezone
8820 // so Redshift's transform_expr won't expand 2-arg to 3-arg with 'UTC'.
8821 // The Redshift parser adds 'UTC' as default source_tz, but when
8822 // transpiling from other dialects, we should preserve the original form.
8823 Action::ConvertTimezoneToExpr
8824 } else if matches!(source, DialectType::Snowflake)
8825 && matches!(target, DialectType::DuckDB)
8826 && name == "REGEXP_REPLACE"
8827 && f.args.len() == 4
8828 && !matches!(
8829 &f.args[3],
8830 Expression::Literal(lit) if matches!(lit.as_ref(), crate::expressions::Literal::String(_))
8831 )
8832 {
8833 // Snowflake REGEXP_REPLACE with position arg -> DuckDB needs 'g' flag
8834 Action::RegexpReplaceSnowflakeToDuckDB
8835 } else if matches!(source, DialectType::Snowflake)
8836 && matches!(target, DialectType::DuckDB)
8837 && name == "REGEXP_REPLACE"
8838 && f.args.len() == 5
8839 {
8840 // Snowflake REGEXP_REPLACE(s, p, r, pos, occ) -> DuckDB
8841 Action::RegexpReplacePositionSnowflakeToDuckDB
8842 } else if matches!(source, DialectType::Snowflake)
8843 && matches!(target, DialectType::DuckDB)
8844 && name == "REGEXP_SUBSTR"
8845 {
8846 // Snowflake REGEXP_SUBSTR -> DuckDB REGEXP_EXTRACT variants
8847 Action::RegexpSubstrSnowflakeToDuckDB
8848 } else if matches!(source, DialectType::Snowflake)
8849 && matches!(target, DialectType::Snowflake)
8850 && (name == "REGEXP_SUBSTR" || name == "REGEXP_SUBSTR_ALL")
8851 && f.args.len() == 6
8852 {
8853 // Snowflake identity: strip trailing group=0
8854 Action::RegexpSubstrSnowflakeIdentity
8855 } else if matches!(source, DialectType::Snowflake)
8856 && matches!(target, DialectType::DuckDB)
8857 && name == "REGEXP_SUBSTR_ALL"
8858 {
8859 // Snowflake REGEXP_SUBSTR_ALL -> DuckDB REGEXP_EXTRACT_ALL variants
8860 Action::RegexpSubstrAllSnowflakeToDuckDB
8861 } else if matches!(source, DialectType::Snowflake)
8862 && matches!(target, DialectType::DuckDB)
8863 && name == "REGEXP_COUNT"
8864 {
8865 // Snowflake REGEXP_COUNT -> DuckDB LENGTH(REGEXP_EXTRACT_ALL(...))
8866 Action::RegexpCountSnowflakeToDuckDB
8867 } else if matches!(source, DialectType::Snowflake)
8868 && matches!(target, DialectType::DuckDB)
8869 && name == "REGEXP_INSTR"
8870 {
8871 // Snowflake REGEXP_INSTR -> DuckDB complex CASE expression
8872 Action::RegexpInstrSnowflakeToDuckDB
8873 } else if matches!(source, DialectType::BigQuery)
8874 && matches!(target, DialectType::Snowflake)
8875 && name == "REGEXP_EXTRACT_ALL"
8876 {
8877 // BigQuery REGEXP_EXTRACT_ALL -> Snowflake REGEXP_SUBSTR_ALL
8878 Action::RegexpExtractAllToSnowflake
8879 } else if name == "_BQ_TO_HEX" {
8880 // Internal marker from TO_HEX conversion - bare (no LOWER/UPPER wrapper)
8881 Action::BigQueryToHexBare
8882 } else if matches!(source, DialectType::BigQuery)
8883 && !matches!(target, DialectType::BigQuery)
8884 {
8885 // BigQuery-specific functions that need to be converted to standard forms
8886 match name.as_str() {
8887 "TIMESTAMP_DIFF" | "DATETIME_DIFF" | "TIME_DIFF"
8888 | "DATE_DIFF"
8889 | "TIMESTAMP_ADD" | "TIMESTAMP_SUB"
8890 | "DATETIME_ADD" | "DATETIME_SUB"
8891 | "TIME_ADD" | "TIME_SUB"
8892 | "DATE_ADD" | "DATE_SUB"
8893 | "SAFE_DIVIDE"
8894 | "GENERATE_UUID"
8895 | "COUNTIF"
8896 | "EDIT_DISTANCE"
8897 | "TIMESTAMP_SECONDS" | "TIMESTAMP_MILLIS" | "TIMESTAMP_MICROS"
8898 | "TIMESTAMP_TRUNC" | "DATETIME_TRUNC" | "DATE_TRUNC"
8899 | "TO_HEX"
8900 | "TO_JSON_STRING"
8901 | "GENERATE_ARRAY" | "GENERATE_TIMESTAMP_ARRAY"
8902 | "DIV"
8903 | "UNIX_DATE" | "UNIX_SECONDS" | "UNIX_MILLIS" | "UNIX_MICROS"
8904 | "LAST_DAY"
8905 | "TIME" | "DATETIME" | "TIMESTAMP" | "STRING"
8906 | "REGEXP_CONTAINS"
8907 | "CONTAINS_SUBSTR"
8908 | "SAFE_ADD" | "SAFE_SUBTRACT" | "SAFE_MULTIPLY"
8909 | "SAFE_CAST"
8910 | "GENERATE_DATE_ARRAY"
8911 | "PARSE_DATE" | "PARSE_TIMESTAMP"
8912 | "FORMAT_DATE" | "FORMAT_DATETIME" | "FORMAT_TIMESTAMP"
8913 | "ARRAY_CONCAT"
8914 | "JSON_QUERY" | "JSON_VALUE_ARRAY"
8915 | "INSTR"
8916 | "MD5" | "SHA1" | "SHA256" | "SHA512"
8917 | "GENERATE_UUID()" // just in case
8918 | "REGEXP_EXTRACT_ALL"
8919 | "REGEXP_EXTRACT"
8920 | "INT64"
8921 | "ARRAY_CONCAT_AGG"
8922 | "DATE_DIFF(" // just in case
8923 | "TO_HEX_MD5" // internal
8924 | "MOD"
8925 | "CONCAT"
8926 | "CURRENT_TIMESTAMP" | "CURRENT_DATE" | "CURRENT_DATETIME" | "CURRENT_TIME"
8927 | "STRUCT"
8928 | "ROUND"
8929 | "MAKE_INTERVAL"
8930 | "ARRAY_TO_STRING"
8931 | "PERCENTILE_CONT"
8932 => Action::BigQueryFunctionNormalize,
8933 "ARRAY" if matches!(target, DialectType::Snowflake)
8934 && f.args.len() == 1
8935 && matches!(&f.args[0], Expression::Select(s) if s.kind.as_deref() == Some("STRUCT"))
8936 => Action::BigQueryArraySelectAsStructToSnowflake,
8937 _ => Action::None,
8938 }
8939 } else if matches!(source, DialectType::BigQuery)
8940 && matches!(target, DialectType::BigQuery)
8941 {
8942 // BigQuery -> BigQuery normalizations
8943 match name.as_str() {
8944 "TIMESTAMP_DIFF"
8945 | "DATETIME_DIFF"
8946 | "TIME_DIFF"
8947 | "DATE_DIFF"
8948 | "DATE_ADD"
8949 | "TO_HEX"
8950 | "CURRENT_TIMESTAMP"
8951 | "CURRENT_DATE"
8952 | "CURRENT_TIME"
8953 | "CURRENT_DATETIME"
8954 | "GENERATE_DATE_ARRAY"
8955 | "INSTR"
8956 | "FORMAT_DATETIME"
8957 | "DATETIME"
8958 | "MAKE_INTERVAL" => Action::BigQueryFunctionNormalize,
8959 _ => Action::None,
8960 }
8961 } else {
8962 // Generic function normalization for non-BigQuery sources
8963 match name.as_str() {
8964 "ARBITRARY" | "AGGREGATE"
8965 | "REGEXP_MATCHES" | "REGEXP_FULL_MATCH"
8966 | "STRUCT_EXTRACT"
8967 | "LIST_FILTER" | "LIST_TRANSFORM" | "LIST_SORT" | "LIST_REVERSE_SORT"
8968 | "STRING_TO_ARRAY" | "STR_SPLIT" | "STR_SPLIT_REGEX" | "SPLIT_TO_ARRAY"
8969 | "SUBSTRINGINDEX"
8970 | "ARRAY_LENGTH" | "SIZE" | "CARDINALITY"
8971 | "UNICODE"
8972 | "XOR"
8973 | "ARRAY_REVERSE_SORT"
8974 | "ENCODE" | "DECODE"
8975 | "QUANTILE"
8976 | "EPOCH" | "EPOCH_MS"
8977 | "HASHBYTES"
8978 | "JSON_EXTRACT_PATH" | "JSON_EXTRACT_PATH_TEXT"
8979 | "APPROX_DISTINCT"
8980 | "DATE_PARSE" | "FORMAT_DATETIME"
8981 | "REGEXP_EXTRACT" | "REGEXP_SUBSTR" | "TO_DAYS"
8982 | "RLIKE"
8983 | "DATEDIFF" | "DATE_DIFF" | "MONTHS_BETWEEN"
8984 | "ADD_MONTHS" | "DATEADD" | "DATE_ADD" | "DATE_SUB" | "DATETRUNC"
8985 | "LAST_DAY" | "LAST_DAY_OF_MONTH" | "EOMONTH"
8986 | "ARRAY_CONSTRUCT" | "ARRAY_CAT" | "ARRAY_COMPACT"
8987 | "ARRAY_FILTER" | "FILTER" | "REDUCE" | "ARRAY_REVERSE"
8988 | "MAP" | "MAP_FROM_ENTRIES"
8989 | "COLLECT_LIST" | "COLLECT_SET"
8990 | "ISNAN" | "IS_NAN"
8991 | "TO_UTC_TIMESTAMP" | "FROM_UTC_TIMESTAMP"
8992 | "FORMAT_NUMBER"
8993 | "TOMONDAY" | "TOSTARTOFWEEK" | "TOSTARTOFMONTH" | "TOSTARTOFYEAR"
8994 | "ELEMENT_AT"
8995 | "EXPLODE" | "EXPLODE_OUTER" | "POSEXPLODE"
8996 | "SPLIT_PART"
8997 // GENERATE_SERIES: handled separately below
8998 | "JSON_EXTRACT" | "JSON_EXTRACT_SCALAR"
8999 | "JSON_QUERY" | "JSON_VALUE"
9000 | "JSON_SEARCH"
9001 | "JSON_EXTRACT_JSON" | "BSON_EXTRACT_BSON"
9002 | "TO_UNIX_TIMESTAMP" | "UNIX_TIMESTAMP"
9003 | "CURDATE" | "CURTIME"
9004 | "ARRAY_TO_STRING"
9005 | "ARRAY_SORT" | "SORT_ARRAY"
9006 | "LEFT" | "RIGHT"
9007 | "MAP_FROM_ARRAYS"
9008 | "LIKE" | "ILIKE"
9009 | "ARRAY_CONCAT" | "LIST_CONCAT"
9010 | "QUANTILE_CONT" | "QUANTILE_DISC"
9011 | "PERCENTILE_CONT" | "PERCENTILE_DISC"
9012 | "PERCENTILE_APPROX" | "APPROX_PERCENTILE"
9013 | "LOCATE" | "STRPOS" | "INSTR"
9014 | "CHAR"
9015 // CONCAT: handled separately for COALESCE wrapping
9016 | "ARRAY_JOIN"
9017 | "ARRAY_CONTAINS" | "HAS" | "CONTAINS"
9018 | "ISNULL"
9019 | "MONTHNAME"
9020 | "TO_TIMESTAMP"
9021 | "TO_DATE"
9022 | "TO_JSON"
9023 | "REGEXP_SPLIT"
9024 | "SPLIT"
9025 | "FORMATDATETIME"
9026 | "ARRAYJOIN"
9027 | "SPLITBYSTRING" | "SPLITBYREGEXP"
9028 | "NVL"
9029 | "TO_CHAR"
9030 | "DBMS_RANDOM.VALUE"
9031 | "REGEXP_LIKE"
9032 | "REPLICATE"
9033 | "LEN"
9034 | "COUNT_BIG"
9035 | "DATEFROMPARTS"
9036 | "DATETIMEFROMPARTS"
9037 | "CONVERT" | "TRY_CONVERT"
9038 | "STRFTIME" | "STRPTIME"
9039 | "DATE_FORMAT" | "FORMAT_DATE"
9040 | "PARSE_TIMESTAMP" | "PARSE_DATE"
9041 | "FROM_BASE64" | "TO_BASE64"
9042 | "GETDATE"
9043 | "TO_HEX" | "FROM_HEX" | "UNHEX" | "HEX"
9044 | "TO_UTF8" | "FROM_UTF8"
9045 | "STARTS_WITH" | "STARTSWITH"
9046 | "APPROX_COUNT_DISTINCT"
9047 | "JSON_FORMAT"
9048 | "SYSDATE"
9049 | "LOGICAL_OR" | "LOGICAL_AND"
9050 | "MONTHS_ADD"
9051 | "SCHEMA_NAME"
9052 | "STRTOL"
9053 | "EDITDIST3"
9054 | "FORMAT"
9055 | "LIST_CONTAINS" | "LIST_HAS"
9056 | "VARIANCE" | "STDDEV"
9057 | "ISINF"
9058 | "TO_UNIXTIME"
9059 | "FROM_UNIXTIME"
9060 | "DATEPART" | "DATE_PART"
9061 | "DATENAME"
9062 | "STRING_AGG"
9063 | "JSON_ARRAYAGG"
9064 | "APPROX_QUANTILE"
9065 | "MAKE_DATE"
9066 | "LIST_HAS_ANY" | "ARRAY_HAS_ANY"
9067 | "RANGE"
9068 | "TRY_ELEMENT_AT"
9069 | "STR_TO_MAP"
9070 | "STRING"
9071 | "STR_TO_TIME"
9072 | "CURRENT_SCHEMA"
9073 | "LTRIM" | "RTRIM"
9074 | "UUID"
9075 | "FARM_FINGERPRINT"
9076 | "JSON_KEYS"
9077 | "WEEKOFYEAR"
9078 | "CONCAT_WS"
9079 | "TRY_DIVIDE"
9080 | "ARRAY_SLICE"
9081 | "ARRAY_PREPEND"
9082 | "ARRAY_REMOVE"
9083 | "GENERATE_DATE_ARRAY"
9084 | "PARSE_JSON"
9085 | "JSON_REMOVE"
9086 | "JSON_SET"
9087 | "LEVENSHTEIN"
9088 | "CURRENT_VERSION"
9089 | "ARRAY_MAX"
9090 | "ARRAY_MIN"
9091 | "JAROWINKLER_SIMILARITY"
9092 | "CURRENT_SCHEMAS"
9093 | "TO_VARIANT"
9094 | "JSON_GROUP_ARRAY" | "JSON_GROUP_OBJECT"
9095 | "ARRAYS_OVERLAP" | "ARRAY_INTERSECTION"
9096 => Action::GenericFunctionNormalize,
9097 // Canonical date functions -> dialect-specific
9098 "TS_OR_DS_TO_DATE" => Action::TsOrDsToDateConvert,
9099 "TS_OR_DS_TO_DATE_STR" if f.args.len() == 1 => Action::TsOrDsToDateStrConvert,
9100 "DATE_STR_TO_DATE" if f.args.len() == 1 => Action::DateStrToDateConvert,
9101 "TIME_STR_TO_DATE" if f.args.len() == 1 => Action::TimeStrToDateConvert,
9102 "TIME_STR_TO_TIME" if f.args.len() <= 2 => Action::TimeStrToTimeConvert,
9103 "TIME_STR_TO_UNIX" if f.args.len() == 1 => Action::TimeStrToUnixConvert,
9104 "TIME_TO_TIME_STR" if f.args.len() == 1 => Action::TimeToTimeStrConvert,
9105 "DATE_TO_DATE_STR" if f.args.len() == 1 => Action::DateToDateStrConvert,
9106 "DATE_TO_DI" if f.args.len() == 1 => Action::DateToDiConvert,
9107 "DI_TO_DATE" if f.args.len() == 1 => Action::DiToDateConvert,
9108 "TS_OR_DI_TO_DI" if f.args.len() == 1 => Action::TsOrDiToDiConvert,
9109 "UNIX_TO_STR" if f.args.len() == 2 => Action::UnixToStrConvert,
9110 "UNIX_TO_TIME" if f.args.len() == 1 => Action::UnixToTimeConvert,
9111 "UNIX_TO_TIME_STR" if f.args.len() == 1 => Action::UnixToTimeStrConvert,
9112 "TIME_TO_UNIX" if f.args.len() == 1 => Action::TimeToUnixConvert,
9113 "TIME_TO_STR" if f.args.len() == 2 => Action::TimeToStrConvert,
9114 "STR_TO_UNIX" if f.args.len() == 2 => Action::StrToUnixConvert,
9115 // STR_TO_DATE(x, fmt) -> dialect-specific
9116 "STR_TO_DATE" if f.args.len() == 2
9117 && matches!(source, DialectType::Generic) => Action::StrToDateConvert,
9118 "STR_TO_DATE" => Action::GenericFunctionNormalize,
9119 // TS_OR_DS_ADD(x, n, 'UNIT') from Generic -> dialect-specific DATE_ADD
9120 "TS_OR_DS_ADD" if f.args.len() == 3
9121 && matches!(source, DialectType::Generic) => Action::TsOrDsAddConvert,
9122 // DATE_FROM_UNIX_DATE(n) -> DATEADD(DAY, n, '1970-01-01')
9123 "DATE_FROM_UNIX_DATE" if f.args.len() == 1 => Action::DateFromUnixDateConvert,
9124 // NVL2(a, b, c) -> CASE WHEN NOT a IS NULL THEN b [ELSE c] END
9125 "NVL2" if (f.args.len() == 2 || f.args.len() == 3) => Action::Nvl2Expand,
9126 // IFNULL(a, b) -> COALESCE(a, b) when coming from Generic source
9127 "IFNULL" if f.args.len() == 2 => Action::IfnullToCoalesce,
9128 // IS_ASCII(x) -> dialect-specific
9129 "IS_ASCII" if f.args.len() == 1 => Action::IsAsciiConvert,
9130 // STR_POSITION(haystack, needle[, pos[, occ]]) -> dialect-specific
9131 "STR_POSITION" => Action::StrPositionConvert,
9132 // ARRAY_SUM -> dialect-specific
9133 "ARRAY_SUM" => Action::ArraySumConvert,
9134 // ARRAY_SIZE -> dialect-specific (Drill only)
9135 "ARRAY_SIZE" if matches!(target, DialectType::Drill) => Action::ArraySizeConvert,
9136 // ARRAY_ANY -> dialect-specific
9137 "ARRAY_ANY" if f.args.len() == 2 => Action::ArrayAnyConvert,
9138 // Functions needing specific cross-dialect transforms
9139 "MAX_BY" | "MIN_BY" if matches!(target, DialectType::ClickHouse | DialectType::Spark | DialectType::Databricks | DialectType::DuckDB) => Action::MaxByMinByConvert,
9140 "STRUCT" if matches!(source, DialectType::Spark | DialectType::Databricks)
9141 && !matches!(target, DialectType::Spark | DialectType::Databricks | DialectType::Hive) => Action::SparkStructConvert,
9142 "ARRAY" if matches!(source, DialectType::BigQuery)
9143 && matches!(target, DialectType::Snowflake)
9144 && f.args.len() == 1
9145 && matches!(&f.args[0], Expression::Select(s) if s.kind.as_deref() == Some("STRUCT")) => Action::BigQueryArraySelectAsStructToSnowflake,
9146 "ARRAY" if matches!(target, DialectType::Presto | DialectType::Trino | DialectType::Athena | DialectType::BigQuery | DialectType::DuckDB | DialectType::Snowflake | DialectType::ClickHouse | DialectType::StarRocks) => Action::ArraySyntaxConvert,
9147 "TRUNC" if f.args.len() == 2 && matches!(&f.args[1], Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_))) && matches!(target, DialectType::Presto | DialectType::Trino | DialectType::ClickHouse) => Action::TruncToDateTrunc,
9148 "TRUNC" | "TRUNCATE" if f.args.len() <= 2 && !f.args.get(1).map_or(false, |a| matches!(a, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)))) => Action::GenericFunctionNormalize,
9149 // DATE_TRUNC('unit', x) from Generic source -> arg swap for BigQuery/Doris/Spark/MySQL
9150 "DATE_TRUNC" if f.args.len() == 2
9151 && matches!(source, DialectType::Generic)
9152 && matches!(target, DialectType::BigQuery | DialectType::Doris | DialectType::StarRocks
9153 | DialectType::Spark | DialectType::Databricks | DialectType::MySQL) => Action::DateTruncSwapArgs,
9154 // TIMESTAMP_TRUNC(x, UNIT) from Generic source -> convert to per-dialect
9155 "TIMESTAMP_TRUNC" if f.args.len() >= 2
9156 && matches!(source, DialectType::Generic) => Action::TimestampTruncConvert,
9157 "UNIFORM" if matches!(target, DialectType::Snowflake) => Action::GenericFunctionNormalize,
9158 // GENERATE_SERIES -> SEQUENCE/UNNEST/EXPLODE for target dialects
9159 "GENERATE_SERIES" if matches!(source, DialectType::PostgreSQL | DialectType::Redshift)
9160 && !matches!(target, DialectType::PostgreSQL | DialectType::Redshift | DialectType::TSQL | DialectType::Fabric) => Action::GenerateSeriesConvert,
9161 // GENERATE_SERIES with interval normalization for PG target
9162 "GENERATE_SERIES" if f.args.len() >= 3
9163 && matches!(source, DialectType::PostgreSQL | DialectType::Redshift)
9164 && matches!(target, DialectType::PostgreSQL | DialectType::Redshift) => Action::GenerateSeriesConvert,
9165 "GENERATE_SERIES" => Action::None, // passthrough for other cases
9166 // CONCAT(a, b) -> COALESCE wrapping for Presto/ClickHouse from PostgreSQL
9167 "CONCAT" if matches!(source, DialectType::PostgreSQL | DialectType::Redshift)
9168 && matches!(target, DialectType::Presto | DialectType::Trino | DialectType::ClickHouse) => Action::ConcatCoalesceWrap,
9169 "CONCAT" => Action::GenericFunctionNormalize,
9170 // DIV(a, b) -> target-specific integer division
9171 "DIV" if f.args.len() == 2
9172 && matches!(source, DialectType::PostgreSQL)
9173 && matches!(target, DialectType::DuckDB | DialectType::BigQuery | DialectType::SQLite) => Action::DivFuncConvert,
9174 // JSON_OBJECT_AGG/JSONB_OBJECT_AGG -> JSON_GROUP_OBJECT for DuckDB
9175 "JSON_OBJECT_AGG" | "JSONB_OBJECT_AGG" if f.args.len() == 2
9176 && matches!(target, DialectType::DuckDB) => Action::JsonObjectAggConvert,
9177 // JSONB_EXISTS -> JSON_EXISTS for DuckDB
9178 "JSONB_EXISTS" if f.args.len() == 2
9179 && matches!(target, DialectType::DuckDB) => Action::JsonbExistsConvert,
9180 // DATE_BIN -> TIME_BUCKET for DuckDB
9181 "DATE_BIN" if matches!(target, DialectType::DuckDB) => Action::DateBinConvert,
9182 // Multi-arg MIN(a,b,c) -> LEAST, MAX(a,b,c) -> GREATEST
9183 "MIN" | "MAX" if f.args.len() > 1 && !matches!(target, DialectType::SQLite) => Action::MinMaxToLeastGreatest,
9184 // ClickHouse uniq -> APPROX_COUNT_DISTINCT for other dialects
9185 "UNIQ" if matches!(source, DialectType::ClickHouse) && !matches!(target, DialectType::ClickHouse) => Action::ClickHouseUniqToApproxCountDistinct,
9186 // ClickHouse any -> ANY_VALUE for other dialects
9187 "ANY" if f.args.len() == 1 && matches!(source, DialectType::ClickHouse) && !matches!(target, DialectType::ClickHouse) => Action::ClickHouseAnyToAnyValue,
9188 _ => Action::None,
9189 }
9190 }
9191 }
9192 Expression::AggregateFunction(af) => {
9193 let name = af.name.to_ascii_uppercase();
9194 match name.as_str() {
9195 "ARBITRARY" | "AGGREGATE" => Action::GenericFunctionNormalize,
9196 "JSON_ARRAYAGG" => Action::GenericFunctionNormalize,
9197 // JSON_OBJECT_AGG/JSONB_OBJECT_AGG -> JSON_GROUP_OBJECT for DuckDB
9198 "JSON_OBJECT_AGG" | "JSONB_OBJECT_AGG"
9199 if matches!(target, DialectType::DuckDB) =>
9200 {
9201 Action::JsonObjectAggConvert
9202 }
9203 "ARRAY_AGG"
9204 if matches!(
9205 target,
9206 DialectType::Hive
9207 | DialectType::Spark
9208 | DialectType::Databricks
9209 ) =>
9210 {
9211 Action::ArrayAggToCollectList
9212 }
9213 "MAX_BY" | "MIN_BY"
9214 if matches!(
9215 target,
9216 DialectType::ClickHouse
9217 | DialectType::Spark
9218 | DialectType::Databricks
9219 | DialectType::DuckDB
9220 ) =>
9221 {
9222 Action::MaxByMinByConvert
9223 }
9224 "COLLECT_LIST"
9225 if matches!(
9226 target,
9227 DialectType::Presto | DialectType::Trino | DialectType::DuckDB
9228 ) =>
9229 {
9230 Action::CollectListToArrayAgg
9231 }
9232 "COLLECT_SET"
9233 if matches!(
9234 target,
9235 DialectType::Presto
9236 | DialectType::Trino
9237 | DialectType::Snowflake
9238 | DialectType::DuckDB
9239 ) =>
9240 {
9241 Action::CollectSetConvert
9242 }
9243 "PERCENTILE"
9244 if matches!(
9245 target,
9246 DialectType::DuckDB | DialectType::Presto | DialectType::Trino
9247 ) =>
9248 {
9249 Action::PercentileConvert
9250 }
9251 // CORR -> CASE WHEN ISNAN(CORR(a,b)) THEN NULL ELSE CORR(a,b) END for DuckDB
9252 "CORR"
9253 if matches!(target, DialectType::DuckDB)
9254 && matches!(source, DialectType::Snowflake) =>
9255 {
9256 Action::CorrIsnanWrap
9257 }
9258 // BigQuery APPROX_QUANTILES(x, n) -> APPROX_QUANTILE(x, [quantiles]) for DuckDB
9259 "APPROX_QUANTILES"
9260 if matches!(source, DialectType::BigQuery)
9261 && matches!(target, DialectType::DuckDB) =>
9262 {
9263 Action::BigQueryApproxQuantiles
9264 }
9265 // BigQuery PERCENTILE_CONT(x, frac RESPECT NULLS) -> QUANTILE_CONT(x, frac) for DuckDB
9266 "PERCENTILE_CONT"
9267 if matches!(source, DialectType::BigQuery)
9268 && matches!(target, DialectType::DuckDB)
9269 && af.args.len() >= 2 =>
9270 {
9271 Action::BigQueryPercentileContToDuckDB
9272 }
9273 _ => Action::None,
9274 }
9275 }
9276 Expression::JSONArrayAgg(_) => match target {
9277 DialectType::PostgreSQL => Action::GenericFunctionNormalize,
9278 _ => Action::None,
9279 },
9280 Expression::ToNumber(tn) => {
9281 // TO_NUMBER(x) with 1 arg -> CAST(x AS DOUBLE) for most targets
9282 if tn.format.is_none() && tn.precision.is_none() && tn.scale.is_none() {
9283 match target {
9284 DialectType::Oracle
9285 | DialectType::Snowflake
9286 | DialectType::Teradata => Action::None,
9287 _ => Action::GenericFunctionNormalize,
9288 }
9289 } else {
9290 Action::None
9291 }
9292 }
9293 Expression::Nvl2(_) => {
9294 // NVL2(a, b, c) -> CASE WHEN NOT a IS NULL THEN b ELSE c END for most dialects
9295 // Keep as NVL2 for dialects that support it natively
9296 match target {
9297 DialectType::Oracle
9298 | DialectType::Snowflake
9299 | DialectType::Teradata
9300 | DialectType::Spark
9301 | DialectType::Databricks
9302 | DialectType::Redshift => Action::None,
9303 _ => Action::Nvl2Expand,
9304 }
9305 }
9306 Expression::Decode(_) | Expression::DecodeCase(_) => {
9307 // DECODE(a, b, c[, d, e[, ...]]) -> CASE WHEN with null-safe comparisons
9308 // Keep as DECODE for Oracle/Snowflake
9309 match target {
9310 DialectType::Oracle | DialectType::Snowflake => Action::None,
9311 _ => Action::DecodeSimplify,
9312 }
9313 }
9314 Expression::Coalesce(ref cf) => {
9315 // IFNULL(a, b) -> COALESCE(a, b): clear original_name for cross-dialect
9316 // BigQuery keeps IFNULL natively when source is also BigQuery
9317 if cf.original_name.as_deref() == Some("IFNULL")
9318 && !(matches!(source, DialectType::BigQuery)
9319 && matches!(target, DialectType::BigQuery))
9320 {
9321 Action::IfnullToCoalesce
9322 } else {
9323 Action::None
9324 }
9325 }
9326 Expression::IfFunc(if_func) => {
9327 if matches!(source, DialectType::Snowflake)
9328 && matches!(
9329 target,
9330 DialectType::Presto | DialectType::Trino | DialectType::SQLite
9331 )
9332 && matches!(if_func.false_value, Some(Expression::Div(_)))
9333 {
9334 Action::Div0TypedDivision
9335 } else {
9336 Action::None
9337 }
9338 }
9339 Expression::ToJson(_) => match target {
9340 DialectType::Presto | DialectType::Trino => Action::ToJsonConvert,
9341 DialectType::BigQuery => Action::ToJsonConvert,
9342 DialectType::DuckDB => Action::ToJsonConvert,
9343 _ => Action::None,
9344 },
9345 Expression::ArrayAgg(ref agg) => {
9346 if matches!(target, DialectType::MySQL | DialectType::SingleStore) {
9347 Action::ArrayAggToGroupConcat
9348 } else if matches!(
9349 target,
9350 DialectType::Hive | DialectType::Spark | DialectType::Databricks
9351 ) {
9352 // Any source -> Hive/Spark: convert ARRAY_AGG to COLLECT_LIST
9353 Action::ArrayAggToCollectList
9354 } else if matches!(
9355 source,
9356 DialectType::Spark | DialectType::Databricks | DialectType::Hive
9357 ) && matches!(target, DialectType::DuckDB)
9358 && agg.filter.is_some()
9359 {
9360 // Spark/Hive ARRAY_AGG excludes NULLs, DuckDB includes them
9361 // Need to add NOT x IS NULL to existing filter
9362 Action::ArrayAggNullFilter
9363 } else if matches!(target, DialectType::DuckDB)
9364 && agg.ignore_nulls == Some(true)
9365 && !agg.order_by.is_empty()
9366 {
9367 // BigQuery ARRAY_AGG(x IGNORE NULLS ORDER BY ...) -> DuckDB ARRAY_AGG(x ORDER BY a NULLS FIRST, ...)
9368 Action::ArrayAggIgnoreNullsDuckDB
9369 } else if !matches!(source, DialectType::Snowflake) {
9370 Action::None
9371 } else if matches!(target, DialectType::Spark | DialectType::Databricks) {
9372 let is_array_agg = agg.name.as_deref().map_or(false, |n| n.eq_ignore_ascii_case("ARRAY_AGG"))
9373 || agg.name.is_none();
9374 if is_array_agg {
9375 Action::ArrayAggCollectList
9376 } else {
9377 Action::None
9378 }
9379 } else if matches!(
9380 target,
9381 DialectType::DuckDB | DialectType::Presto | DialectType::Trino
9382 ) && agg.filter.is_none()
9383 {
9384 Action::ArrayAggFilter
9385 } else {
9386 Action::None
9387 }
9388 }
9389 Expression::WithinGroup(wg) => {
9390 if matches!(source, DialectType::Snowflake)
9391 && matches!(
9392 target,
9393 DialectType::DuckDB | DialectType::Presto | DialectType::Trino
9394 )
9395 && matches!(wg.this, Expression::ArrayAgg(_))
9396 {
9397 Action::ArrayAggWithinGroupFilter
9398 } else if matches!(&wg.this, Expression::AggregateFunction(af) if af.name.eq_ignore_ascii_case("STRING_AGG"))
9399 || matches!(&wg.this, Expression::Function(f) if f.name.eq_ignore_ascii_case("STRING_AGG"))
9400 || matches!(&wg.this, Expression::StringAgg(_))
9401 {
9402 Action::StringAggConvert
9403 } else if matches!(
9404 target,
9405 DialectType::Presto
9406 | DialectType::Trino
9407 | DialectType::Athena
9408 | DialectType::Spark
9409 | DialectType::Databricks
9410 ) && (matches!(&wg.this, Expression::Function(f) if f.name.eq_ignore_ascii_case("PERCENTILE_CONT") || f.name.eq_ignore_ascii_case("PERCENTILE_DISC"))
9411 || matches!(&wg.this, Expression::AggregateFunction(af) if af.name.eq_ignore_ascii_case("PERCENTILE_CONT") || af.name.eq_ignore_ascii_case("PERCENTILE_DISC"))
9412 || matches!(&wg.this, Expression::PercentileCont(_)))
9413 {
9414 Action::PercentileContConvert
9415 } else {
9416 Action::None
9417 }
9418 }
9419 // For BigQuery: CAST(x AS TIMESTAMP) -> CAST(x AS DATETIME)
9420 // because BigQuery's TIMESTAMP is really TIMESTAMPTZ, and
9421 // DATETIME is the timezone-unaware type
9422 Expression::Cast(ref c) => {
9423 if c.format.is_some()
9424 && (matches!(source, DialectType::BigQuery)
9425 || matches!(source, DialectType::Teradata))
9426 {
9427 Action::BigQueryCastFormat
9428 } else if matches!(target, DialectType::BigQuery)
9429 && !matches!(source, DialectType::BigQuery)
9430 && matches!(
9431 c.to,
9432 DataType::Timestamp {
9433 timezone: false,
9434 ..
9435 }
9436 )
9437 {
9438 Action::CastTimestampToDatetime
9439 } else if matches!(target, DialectType::MySQL | DialectType::StarRocks)
9440 && !matches!(source, DialectType::MySQL | DialectType::StarRocks)
9441 && matches!(
9442 c.to,
9443 DataType::Timestamp {
9444 timezone: false,
9445 ..
9446 }
9447 )
9448 {
9449 // Generic/other -> MySQL/StarRocks: CAST(x AS TIMESTAMP) -> CAST(x AS DATETIME)
9450 // but MySQL-native CAST(x AS TIMESTAMP) stays as TIMESTAMP(x) via transform_cast
9451 Action::CastTimestampToDatetime
9452 } else if matches!(
9453 source,
9454 DialectType::Hive | DialectType::Spark | DialectType::Databricks
9455 ) && matches!(
9456 target,
9457 DialectType::Presto
9458 | DialectType::Trino
9459 | DialectType::Athena
9460 | DialectType::DuckDB
9461 | DialectType::Snowflake
9462 | DialectType::BigQuery
9463 | DialectType::Databricks
9464 | DialectType::TSQL
9465 ) {
9466 Action::HiveCastToTryCast
9467 } else if matches!(c.to, DataType::Timestamp { timezone: true, .. })
9468 && matches!(target, DialectType::MySQL | DialectType::StarRocks)
9469 {
9470 // CAST(x AS TIMESTAMPTZ) -> TIMESTAMP(x) function for MySQL/StarRocks
9471 Action::CastTimestamptzToFunc
9472 } else if matches!(c.to, DataType::Timestamp { timezone: true, .. })
9473 && matches!(
9474 target,
9475 DialectType::Hive
9476 | DialectType::Spark
9477 | DialectType::Databricks
9478 | DialectType::BigQuery
9479 )
9480 {
9481 // CAST(x AS TIMESTAMP WITH TIME ZONE) -> CAST(x AS TIMESTAMP) for Hive/Spark/BigQuery
9482 Action::CastTimestampStripTz
9483 } else if matches!(&c.to, DataType::Json)
9484 && matches!(source, DialectType::DuckDB)
9485 && matches!(target, DialectType::Snowflake)
9486 {
9487 Action::DuckDBCastJsonToVariant
9488 } else if matches!(&c.to, DataType::Json)
9489 && matches!(&c.this, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)))
9490 && matches!(
9491 target,
9492 DialectType::Presto
9493 | DialectType::Trino
9494 | DialectType::Athena
9495 | DialectType::Snowflake
9496 )
9497 {
9498 // CAST('x' AS JSON) -> JSON_PARSE('x') for Presto, PARSE_JSON for Snowflake
9499 // Only when the input is a string literal (JSON 'value' syntax)
9500 Action::JsonLiteralToJsonParse
9501 } else if matches!(&c.to, DataType::Json)
9502 && matches!(source, DialectType::DuckDB)
9503 && matches!(
9504 target,
9505 DialectType::Presto | DialectType::Trino | DialectType::Athena
9506 )
9507 {
9508 // DuckDB's CAST(x AS JSON) parses the string value into a JSON value.
9509 // Trino/Presto/Athena's CAST(x AS JSON) instead wraps the value as a
9510 // JSON string (no parsing) — different semantics. Use JSON_PARSE(x)
9511 // in the target to preserve DuckDB's parse semantics.
9512 Action::JsonLiteralToJsonParse
9513 } else if matches!(&c.to, DataType::Json | DataType::JsonB)
9514 && matches!(target, DialectType::Spark | DialectType::Databricks)
9515 {
9516 // CAST(x AS JSON) -> TO_JSON(x) for Spark
9517 Action::CastToJsonForSpark
9518 } else if (matches!(
9519 &c.to,
9520 DataType::Array { .. } | DataType::Map { .. } | DataType::Struct { .. }
9521 )) && matches!(
9522 target,
9523 DialectType::Spark | DialectType::Databricks
9524 ) && (matches!(&c.this, Expression::ParseJson(_))
9525 || matches!(
9526 &c.this,
9527 Expression::Function(f)
9528 if f.name.eq_ignore_ascii_case("JSON_EXTRACT")
9529 || f.name.eq_ignore_ascii_case("JSON_EXTRACT_SCALAR")
9530 || f.name.eq_ignore_ascii_case("GET_JSON_OBJECT")
9531 ))
9532 {
9533 // CAST(JSON_PARSE(...) AS ARRAY/MAP) or CAST(JSON_EXTRACT/GET_JSON_OBJECT(...) AS ARRAY/MAP)
9534 // -> FROM_JSON(..., type_string) for Spark
9535 Action::CastJsonToFromJson
9536 } else if matches!(target, DialectType::Spark | DialectType::Databricks)
9537 && matches!(
9538 c.to,
9539 DataType::Timestamp {
9540 timezone: false,
9541 ..
9542 }
9543 )
9544 && matches!(source, DialectType::DuckDB)
9545 {
9546 Action::StrftimeCastTimestamp
9547 } else if matches!(source, DialectType::DuckDB)
9548 && matches!(
9549 c.to,
9550 DataType::Decimal {
9551 precision: None,
9552 ..
9553 }
9554 )
9555 {
9556 Action::DecimalDefaultPrecision
9557 } else if matches!(source, DialectType::MySQL | DialectType::SingleStore)
9558 && matches!(c.to, DataType::Char { length: None })
9559 && !matches!(target, DialectType::MySQL | DialectType::SingleStore)
9560 {
9561 // MySQL CAST(x AS CHAR) was originally TEXT - convert to target text type
9562 Action::MysqlCastCharToText
9563 } else if matches!(
9564 source,
9565 DialectType::Spark | DialectType::Databricks | DialectType::Hive
9566 ) && matches!(
9567 target,
9568 DialectType::Spark | DialectType::Databricks | DialectType::Hive
9569 ) && Self::has_varchar_char_type(&c.to)
9570 {
9571 // Spark parses VARCHAR(n)/CHAR(n) as TEXT, so normalize back to STRING
9572 Action::SparkCastVarcharToString
9573 } else {
9574 Action::None
9575 }
9576 }
9577 Expression::SafeCast(ref c) => {
9578 if c.format.is_some()
9579 && matches!(source, DialectType::BigQuery)
9580 && !matches!(target, DialectType::BigQuery)
9581 {
9582 Action::BigQueryCastFormat
9583 } else {
9584 Action::None
9585 }
9586 }
9587 Expression::TryCast(ref c) => {
9588 if matches!(&c.to, DataType::Json)
9589 && matches!(source, DialectType::DuckDB)
9590 && matches!(
9591 target,
9592 DialectType::Presto | DialectType::Trino | DialectType::Athena
9593 )
9594 {
9595 // DuckDB's TRY_CAST(x AS JSON) tries to parse x as JSON, returning
9596 // NULL on parse failure. Trino/Presto/Athena's TRY_CAST(x AS JSON)
9597 // wraps the value as a JSON string (no parse). Emit TRY(JSON_PARSE(x))
9598 // to preserve DuckDB's parse-or-null semantics.
9599 Action::DuckDBTryCastJsonToTryJsonParse
9600 } else {
9601 Action::None
9602 }
9603 }
9604 Expression::JSONArray(ref ja)
9605 if matches!(target, DialectType::Snowflake)
9606 && ja.null_handling.is_none()
9607 && ja.return_type.is_none()
9608 && ja.strict.is_none() =>
9609 {
9610 Action::GenericFunctionNormalize
9611 }
9612 Expression::JsonArray(_) if matches!(target, DialectType::Snowflake) => {
9613 Action::GenericFunctionNormalize
9614 }
9615 // For DuckDB: DATE_TRUNC should preserve the input type
9616 Expression::DateTrunc(_) | Expression::TimestampTrunc(_) => {
9617 if matches!(source, DialectType::Snowflake)
9618 && matches!(target, DialectType::DuckDB)
9619 {
9620 Action::DateTruncWrapCast
9621 } else {
9622 Action::None
9623 }
9624 }
9625 // For DuckDB: SET a = 1 -> SET VARIABLE a = 1
9626 Expression::SetStatement(s) => {
9627 if matches!(target, DialectType::DuckDB)
9628 && !matches!(source, DialectType::TSQL | DialectType::Fabric)
9629 && s.items.iter().any(|item| item.kind.is_none())
9630 {
9631 Action::SetToVariable
9632 } else {
9633 Action::None
9634 }
9635 }
9636 // Cross-dialect NULL ordering normalization.
9637 // When nulls_first is not specified, fill in the source dialect's implied
9638 // default so the target generator can correctly add/strip NULLS FIRST/LAST.
9639 Expression::Ordered(o) => {
9640 // MySQL doesn't support NULLS FIRST/LAST - strip or rewrite
9641 if matches!(target, DialectType::MySQL) && o.nulls_first.is_some() {
9642 Action::MysqlNullsOrdering
9643 } else {
9644 // Skip targets that don't support NULLS FIRST/LAST syntax unless
9645 // the generator can preserve semantics with a CASE sort key.
9646 let target_rewrites_nulls =
9647 matches!(target, DialectType::TSQL | DialectType::Fabric);
9648 let target_supports_nulls = !matches!(
9649 target,
9650 DialectType::MySQL
9651 | DialectType::TSQL
9652 | DialectType::Fabric
9653 | DialectType::StarRocks
9654 | DialectType::Doris
9655 );
9656 if o.nulls_first.is_none()
9657 && source != target
9658 && (target_supports_nulls || target_rewrites_nulls)
9659 {
9660 Action::NullsOrdering
9661 } else {
9662 Action::None
9663 }
9664 }
9665 }
9666 // BigQuery data types: convert INT64, BYTES, NUMERIC etc. to standard types
9667 Expression::DataType(dt) => {
9668 if matches!(source, DialectType::BigQuery)
9669 && !matches!(target, DialectType::BigQuery)
9670 {
9671 match dt {
9672 DataType::Custom { ref name }
9673 if name.eq_ignore_ascii_case("INT64")
9674 || name.eq_ignore_ascii_case("FLOAT64")
9675 || name.eq_ignore_ascii_case("BOOL")
9676 || name.eq_ignore_ascii_case("BYTES")
9677 || name.eq_ignore_ascii_case("NUMERIC")
9678 || name.eq_ignore_ascii_case("STRING")
9679 || name.eq_ignore_ascii_case("DATETIME") =>
9680 {
9681 Action::BigQueryCastType
9682 }
9683 _ => Action::None,
9684 }
9685 } else if matches!(source, DialectType::TSQL) {
9686 // For TSQL source -> any target (including TSQL itself for REAL)
9687 match dt {
9688 // REAL -> FLOAT even for TSQL->TSQL
9689 DataType::Custom { ref name }
9690 if name.eq_ignore_ascii_case("REAL") =>
9691 {
9692 Action::TSQLTypeNormalize
9693 }
9694 DataType::Float {
9695 real_spelling: true,
9696 ..
9697 } => Action::TSQLTypeNormalize,
9698 // Other TSQL type normalizations only for non-TSQL targets
9699 DataType::Custom { ref name }
9700 if !matches!(target, DialectType::TSQL)
9701 && (name.eq_ignore_ascii_case("MONEY")
9702 || name.eq_ignore_ascii_case("SMALLMONEY")
9703 || name.eq_ignore_ascii_case("DATETIME2")
9704 || name.eq_ignore_ascii_case("IMAGE")
9705 || name.eq_ignore_ascii_case("BIT")
9706 || name.eq_ignore_ascii_case("ROWVERSION")
9707 || name.eq_ignore_ascii_case("UNIQUEIDENTIFIER")
9708 || name.eq_ignore_ascii_case("DATETIMEOFFSET")
9709 || (name.len() >= 7 && name[..7].eq_ignore_ascii_case("NUMERIC"))
9710 || (name.len() >= 10 && name[..10].eq_ignore_ascii_case("DATETIME2("))
9711 || (name.len() >= 5 && name[..5].eq_ignore_ascii_case("TIME("))) =>
9712 {
9713 Action::TSQLTypeNormalize
9714 }
9715 DataType::Float {
9716 precision: Some(_), ..
9717 } if !matches!(target, DialectType::TSQL) => {
9718 Action::TSQLTypeNormalize
9719 }
9720 DataType::TinyInt { .. }
9721 if !matches!(target, DialectType::TSQL) =>
9722 {
9723 Action::TSQLTypeNormalize
9724 }
9725 // INTEGER -> INT for Databricks/Spark targets
9726 DataType::Int {
9727 integer_spelling: true,
9728 ..
9729 } if matches!(
9730 target,
9731 DialectType::Databricks | DialectType::Spark
9732 ) =>
9733 {
9734 Action::TSQLTypeNormalize
9735 }
9736 _ => Action::None,
9737 }
9738 } else if (matches!(source, DialectType::Oracle)
9739 || matches!(source, DialectType::Generic))
9740 && !matches!(target, DialectType::Oracle)
9741 {
9742 match dt {
9743 DataType::Custom { ref name }
9744 if (name.len() >= 9 && name[..9].eq_ignore_ascii_case("VARCHAR2("))
9745 || (name.len() >= 10 && name[..10].eq_ignore_ascii_case("NVARCHAR2("))
9746 || name.eq_ignore_ascii_case("VARCHAR2")
9747 || name.eq_ignore_ascii_case("NVARCHAR2") =>
9748 {
9749 Action::OracleVarchar2ToVarchar
9750 }
9751 _ => Action::None,
9752 }
9753 } else if matches!(target, DialectType::Snowflake)
9754 && !matches!(source, DialectType::Snowflake)
9755 {
9756 // When target is Snowflake but source is NOT Snowflake,
9757 // protect FLOAT from being converted to DOUBLE by Snowflake's transform.
9758 // Snowflake treats FLOAT=DOUBLE internally, but non-Snowflake sources
9759 // should keep their FLOAT spelling.
9760 match dt {
9761 DataType::Float { .. } => Action::SnowflakeFloatProtect,
9762 _ => Action::None,
9763 }
9764 } else {
9765 Action::None
9766 }
9767 }
9768 // LOWER patterns from BigQuery TO_HEX conversions:
9769 // - LOWER(LOWER(HEX(x))) from non-BQ targets: flatten
9770 // - LOWER(Function("TO_HEX")) for BQ->BQ: strip LOWER
9771 Expression::Lower(uf) => {
9772 if matches!(source, DialectType::BigQuery) {
9773 match &uf.this {
9774 Expression::Lower(_) => Action::BigQueryToHexLower,
9775 Expression::Function(f)
9776 if f.name == "TO_HEX"
9777 && matches!(target, DialectType::BigQuery) =>
9778 {
9779 // BQ->BQ: LOWER(TO_HEX(x)) -> TO_HEX(x)
9780 Action::BigQueryToHexLower
9781 }
9782 _ => Action::None,
9783 }
9784 } else {
9785 Action::None
9786 }
9787 }
9788 // UPPER patterns from BigQuery TO_HEX conversions:
9789 // - UPPER(LOWER(HEX(x))) from non-BQ targets: extract inner
9790 // - UPPER(Function("TO_HEX")) for BQ->BQ: keep as UPPER(TO_HEX(x))
9791 Expression::Upper(uf) => {
9792 if matches!(source, DialectType::BigQuery) {
9793 match &uf.this {
9794 Expression::Lower(_) => Action::BigQueryToHexUpper,
9795 _ => Action::None,
9796 }
9797 } else {
9798 Action::None
9799 }
9800 }
9801 // BigQuery LAST_DAY(date, unit) -> strip unit for non-BigQuery targets
9802 // Snowflake supports LAST_DAY with unit, so keep it there
9803 Expression::LastDay(ld) => {
9804 if matches!(source, DialectType::BigQuery)
9805 && !matches!(target, DialectType::BigQuery | DialectType::Snowflake)
9806 && ld.unit.is_some()
9807 {
9808 Action::BigQueryLastDayStripUnit
9809 } else {
9810 Action::None
9811 }
9812 }
9813 // BigQuery SafeDivide expressions (already parsed as SafeDivide)
9814 Expression::SafeDivide(_) => {
9815 if matches!(source, DialectType::BigQuery)
9816 && !matches!(target, DialectType::BigQuery)
9817 {
9818 Action::BigQuerySafeDivide
9819 } else {
9820 Action::None
9821 }
9822 }
9823 // BigQuery ANY_VALUE(x HAVING MAX/MIN y) -> ARG_MAX_NULL/ARG_MIN_NULL for DuckDB
9824 // ANY_VALUE(x) -> ANY_VALUE(x) IGNORE NULLS for Spark
9825 Expression::AnyValue(ref agg) => {
9826 if matches!(source, DialectType::BigQuery)
9827 && matches!(target, DialectType::DuckDB)
9828 && agg.having_max.is_some()
9829 {
9830 Action::BigQueryAnyValueHaving
9831 } else if matches!(target, DialectType::Spark | DialectType::Databricks)
9832 && !matches!(source, DialectType::Spark | DialectType::Databricks)
9833 && agg.ignore_nulls.is_none()
9834 {
9835 Action::AnyValueIgnoreNulls
9836 } else {
9837 Action::None
9838 }
9839 }
9840 Expression::Any(ref q) => {
9841 if matches!(source, DialectType::PostgreSQL)
9842 && matches!(
9843 target,
9844 DialectType::Spark | DialectType::Databricks | DialectType::Hive
9845 )
9846 && q.op.is_some()
9847 && !matches!(
9848 q.subquery,
9849 Expression::Select(_) | Expression::Subquery(_)
9850 )
9851 {
9852 Action::AnyToExists
9853 } else {
9854 Action::None
9855 }
9856 }
9857 // BigQuery APPROX_QUANTILES(x, n) -> APPROX_QUANTILE(x, [quantiles]) for DuckDB
9858 // Snowflake RLIKE does full-string match; DuckDB REGEXP_FULL_MATCH also does full-string match
9859 Expression::RegexpLike(_)
9860 if matches!(source, DialectType::Snowflake)
9861 && matches!(target, DialectType::DuckDB) =>
9862 {
9863 Action::RlikeSnowflakeToDuckDB
9864 }
9865 // RegexpLike from non-DuckDB/non-Snowflake sources -> REGEXP_MATCHES for DuckDB target
9866 Expression::RegexpLike(_)
9867 if !matches!(source, DialectType::DuckDB)
9868 && matches!(target, DialectType::DuckDB) =>
9869 {
9870 Action::RegexpLikeToDuckDB
9871 }
9872 // RegexpLike -> Exasol: anchor pattern with .*...*
9873 Expression::RegexpLike(_)
9874 if matches!(target, DialectType::Exasol) =>
9875 {
9876 Action::RegexpLikeExasolAnchor
9877 }
9878 // Safe-division source -> non-safe target: NULLIF wrapping and/or CAST
9879 // Safe-division dialects: MySQL, DuckDB, SingleStore, TiDB, ClickHouse, Doris
9880 Expression::Div(ref op)
9881 if matches!(
9882 source,
9883 DialectType::MySQL
9884 | DialectType::DuckDB
9885 | DialectType::SingleStore
9886 | DialectType::TiDB
9887 | DialectType::ClickHouse
9888 | DialectType::Doris
9889 ) && matches!(
9890 target,
9891 DialectType::PostgreSQL
9892 | DialectType::Redshift
9893 | DialectType::Drill
9894 | DialectType::Trino
9895 | DialectType::Presto
9896 | DialectType::Athena
9897 | DialectType::TSQL
9898 | DialectType::Teradata
9899 | DialectType::SQLite
9900 | DialectType::BigQuery
9901 | DialectType::Snowflake
9902 | DialectType::Databricks
9903 | DialectType::Oracle
9904 | DialectType::Materialize
9905 | DialectType::RisingWave
9906 ) =>
9907 {
9908 // Only wrap if RHS is not already NULLIF
9909 if !matches!(&op.right, Expression::Function(f) if f.name.eq_ignore_ascii_case("NULLIF"))
9910 {
9911 Action::MySQLSafeDivide
9912 } else {
9913 Action::None
9914 }
9915 }
9916 // ALTER TABLE ... RENAME TO <schema>.<table> -> strip schema for most targets
9917 // For TSQL/Fabric, convert to sp_rename instead
9918 Expression::AlterTable(ref at) if !at.actions.is_empty() => {
9919 if let Some(crate::expressions::AlterTableAction::RenameTable(
9920 ref new_tbl,
9921 )) = at.actions.first()
9922 {
9923 if matches!(target, DialectType::TSQL | DialectType::Fabric) {
9924 // TSQL: ALTER TABLE RENAME -> EXEC sp_rename
9925 Action::AlterTableToSpRename
9926 } else if new_tbl.schema.is_some()
9927 && matches!(
9928 target,
9929 DialectType::BigQuery
9930 | DialectType::Doris
9931 | DialectType::StarRocks
9932 | DialectType::DuckDB
9933 | DialectType::PostgreSQL
9934 | DialectType::Redshift
9935 )
9936 {
9937 Action::AlterTableRenameStripSchema
9938 } else {
9939 Action::None
9940 }
9941 } else {
9942 Action::None
9943 }
9944 }
9945 // EPOCH(x) expression -> target-specific epoch conversion
9946 Expression::Epoch(_) if !matches!(target, DialectType::DuckDB) => {
9947 Action::EpochConvert
9948 }
9949 // EPOCH_MS(x) expression -> target-specific epoch ms conversion
9950 Expression::EpochMs(_) if !matches!(target, DialectType::DuckDB) => {
9951 Action::EpochMsConvert
9952 }
9953 // STRING_AGG -> GROUP_CONCAT for MySQL/SQLite
9954 Expression::StringAgg(_) => {
9955 if matches!(
9956 target,
9957 DialectType::MySQL
9958 | DialectType::SingleStore
9959 | DialectType::Doris
9960 | DialectType::StarRocks
9961 | DialectType::SQLite
9962 ) {
9963 Action::StringAggConvert
9964 } else if matches!(target, DialectType::Spark | DialectType::Databricks) {
9965 Action::StringAggConvert
9966 } else {
9967 Action::None
9968 }
9969 }
9970 Expression::CombinedParameterizedAgg(_) => Action::GenericFunctionNormalize,
9971 // GROUP_CONCAT -> STRING_AGG for PostgreSQL/Presto/etc.
9972 // Also handles GROUP_CONCAT normalization for MySQL/SQLite targets
9973 Expression::GroupConcat(_) => Action::GroupConcatConvert,
9974 // CARDINALITY/ARRAY_LENGTH/ARRAY_SIZE -> target-specific array length
9975 // DuckDB CARDINALITY -> keep as CARDINALITY for DuckDB target (used for maps)
9976 Expression::Cardinality(_)
9977 if matches!(source, DialectType::DuckDB)
9978 && matches!(target, DialectType::DuckDB) =>
9979 {
9980 Action::None
9981 }
9982 Expression::Cardinality(_) | Expression::ArrayLength(_) => {
9983 Action::ArrayLengthConvert
9984 }
9985 Expression::ArraySize(_) => {
9986 if matches!(target, DialectType::Drill) {
9987 Action::ArraySizeDrill
9988 } else {
9989 Action::ArrayLengthConvert
9990 }
9991 }
9992 // ARRAY_REMOVE(arr, target) -> LIST_FILTER/arrayFilter/ARRAY subquery
9993 Expression::ArrayRemove(_) => match target {
9994 DialectType::DuckDB | DialectType::ClickHouse | DialectType::BigQuery => {
9995 Action::ArrayRemoveConvert
9996 }
9997 _ => Action::None,
9998 },
9999 // ARRAY_REVERSE(x) -> arrayReverse for ClickHouse
10000 Expression::ArrayReverse(_) => match target {
10001 DialectType::ClickHouse => Action::ArrayReverseConvert,
10002 _ => Action::None,
10003 },
10004 // JSON_KEYS(x) -> JSON_OBJECT_KEYS/OBJECT_KEYS for Spark/Databricks/Snowflake
10005 Expression::JsonKeys(_) => match target {
10006 DialectType::Spark | DialectType::Databricks | DialectType::Snowflake => {
10007 Action::JsonKeysConvert
10008 }
10009 _ => Action::None,
10010 },
10011 // PARSE_JSON(x) -> strip for SQLite/Doris/MySQL/StarRocks
10012 Expression::ParseJson(_) => match target {
10013 DialectType::SQLite
10014 | DialectType::Doris
10015 | DialectType::MySQL
10016 | DialectType::StarRocks => Action::ParseJsonStrip,
10017 _ => Action::None,
10018 },
10019 // WeekOfYear -> WEEKISO for Snowflake (cross-dialect only)
10020 Expression::WeekOfYear(_)
10021 if matches!(target, DialectType::Snowflake)
10022 && !matches!(source, DialectType::Snowflake) =>
10023 {
10024 Action::WeekOfYearToWeekIso
10025 }
10026 // NVL: clear original_name so generator uses dialect-specific function names
10027 Expression::Nvl(f) if f.original_name.is_some() => Action::NvlClearOriginal,
10028 // XOR: expand for dialects that don't support the XOR keyword
10029 Expression::Xor(_) => {
10030 let target_supports_xor = matches!(
10031 target,
10032 DialectType::MySQL
10033 | DialectType::SingleStore
10034 | DialectType::Doris
10035 | DialectType::StarRocks
10036 );
10037 if !target_supports_xor {
10038 Action::XorExpand
10039 } else {
10040 Action::None
10041 }
10042 }
10043 // TSQL #table -> temp table normalization (CREATE TABLE)
10044 Expression::CreateTable(ct)
10045 if matches!(source, DialectType::TSQL | DialectType::Fabric)
10046 && !matches!(target, DialectType::TSQL | DialectType::Fabric)
10047 && ct.name.name.name.starts_with('#') =>
10048 {
10049 Action::TempTableHash
10050 }
10051 // TSQL #table -> strip # from table references in SELECT/etc.
10052 Expression::Table(tr)
10053 if matches!(source, DialectType::TSQL | DialectType::Fabric)
10054 && !matches!(target, DialectType::TSQL | DialectType::Fabric)
10055 && tr.name.name.starts_with('#') =>
10056 {
10057 Action::TempTableHash
10058 }
10059 // TSQL #table -> strip # from DROP TABLE names
10060 Expression::DropTable(ref dt)
10061 if matches!(source, DialectType::TSQL | DialectType::Fabric)
10062 && !matches!(target, DialectType::TSQL | DialectType::Fabric)
10063 && dt.names.iter().any(|n| n.name.name.starts_with('#')) =>
10064 {
10065 Action::TempTableHash
10066 }
10067 // JSON_EXTRACT -> ISNULL(JSON_QUERY, JSON_VALUE) for TSQL
10068 Expression::JsonExtract(_)
10069 if matches!(target, DialectType::TSQL | DialectType::Fabric) =>
10070 {
10071 Action::JsonExtractToTsql
10072 }
10073 // JSON_EXTRACT_SCALAR -> ISNULL(JSON_QUERY, JSON_VALUE) for TSQL
10074 Expression::JsonExtractScalar(_)
10075 if matches!(target, DialectType::TSQL | DialectType::Fabric) =>
10076 {
10077 Action::JsonExtractToTsql
10078 }
10079 // JSON_EXTRACT -> JSONExtractString for ClickHouse
10080 Expression::JsonExtract(_) if matches!(target, DialectType::ClickHouse) => {
10081 Action::JsonExtractToClickHouse
10082 }
10083 // JSON_EXTRACT_SCALAR -> JSONExtractString for ClickHouse
10084 Expression::JsonExtractScalar(_)
10085 if matches!(target, DialectType::ClickHouse) =>
10086 {
10087 Action::JsonExtractToClickHouse
10088 }
10089 // JSON_EXTRACT -> arrow syntax for SQLite/DuckDB
10090 Expression::JsonExtract(ref f)
10091 if !f.arrow_syntax
10092 && matches!(target, DialectType::SQLite | DialectType::DuckDB) =>
10093 {
10094 Action::JsonExtractToArrow
10095 }
10096 // JSON_EXTRACT with JSONPath -> JSON_EXTRACT_PATH for PostgreSQL (non-PG sources only)
10097 Expression::JsonExtract(ref f)
10098 if matches!(target, DialectType::PostgreSQL | DialectType::Redshift)
10099 && !matches!(
10100 source,
10101 DialectType::PostgreSQL
10102 | DialectType::Redshift
10103 | DialectType::Materialize
10104 )
10105 && matches!(&f.path, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(s) if s.starts_with('$'))) =>
10106 {
10107 Action::JsonExtractToGetJsonObject
10108 }
10109 // JSON_EXTRACT -> GET_JSON_OBJECT for Hive/Spark
10110 Expression::JsonExtract(_)
10111 if matches!(
10112 target,
10113 DialectType::Hive | DialectType::Spark | DialectType::Databricks
10114 ) =>
10115 {
10116 Action::JsonExtractToGetJsonObject
10117 }
10118 // JSON_EXTRACT_SCALAR -> target-specific for PostgreSQL, Snowflake, SQLite
10119 // Skip if already in arrow/hash_arrow syntax (same-dialect identity case)
10120 Expression::JsonExtractScalar(ref f)
10121 if !f.arrow_syntax
10122 && !f.hash_arrow_syntax
10123 && matches!(
10124 target,
10125 DialectType::PostgreSQL
10126 | DialectType::Redshift
10127 | DialectType::Snowflake
10128 | DialectType::SQLite
10129 | DialectType::DuckDB
10130 ) =>
10131 {
10132 Action::JsonExtractScalarConvert
10133 }
10134 // JSON_EXTRACT_SCALAR -> GET_JSON_OBJECT for Hive/Spark
10135 Expression::JsonExtractScalar(_)
10136 if matches!(
10137 target,
10138 DialectType::Hive | DialectType::Spark | DialectType::Databricks
10139 ) =>
10140 {
10141 Action::JsonExtractScalarToGetJsonObject
10142 }
10143 // JSON_EXTRACT path normalization for BigQuery, MySQL (bracket/wildcard handling)
10144 Expression::JsonExtract(ref f)
10145 if !f.arrow_syntax
10146 && matches!(target, DialectType::BigQuery | DialectType::MySQL) =>
10147 {
10148 Action::JsonPathNormalize
10149 }
10150 // JsonQuery (parsed JSON_QUERY) -> target-specific
10151 Expression::JsonQuery(_) => Action::JsonQueryValueConvert,
10152 // JsonValue (parsed JSON_VALUE) -> target-specific
10153 Expression::JsonValue(_) => Action::JsonQueryValueConvert,
10154 // AT TIME ZONE -> AT_TIMEZONE for Presto, FROM_UTC_TIMESTAMP for Spark,
10155 // TIMESTAMP(DATETIME(...)) for BigQuery, CONVERT_TIMEZONE for Snowflake
10156 Expression::AtTimeZone(_)
10157 if matches!(
10158 target,
10159 DialectType::Presto
10160 | DialectType::Trino
10161 | DialectType::Athena
10162 | DialectType::Spark
10163 | DialectType::Databricks
10164 | DialectType::BigQuery
10165 | DialectType::Snowflake
10166 ) =>
10167 {
10168 Action::AtTimeZoneConvert
10169 }
10170 // DAY_OF_WEEK -> dialect-specific
10171 Expression::DayOfWeek(_)
10172 if matches!(
10173 target,
10174 DialectType::DuckDB | DialectType::Spark | DialectType::Databricks
10175 ) =>
10176 {
10177 Action::DayOfWeekConvert
10178 }
10179 // CURRENT_USER -> CURRENT_USER() for Snowflake
10180 Expression::CurrentUser(_) if matches!(target, DialectType::Snowflake) => {
10181 Action::CurrentUserParens
10182 }
10183 // ELEMENT_AT(arr, idx) -> arr[idx] for PostgreSQL, arr[SAFE_ORDINAL(idx)] for BigQuery
10184 Expression::ElementAt(_)
10185 if matches!(target, DialectType::PostgreSQL | DialectType::BigQuery) =>
10186 {
10187 Action::ElementAtConvert
10188 }
10189 // ARRAY[...] (ArrayFunc bracket_notation=false) -> convert for target dialect
10190 Expression::ArrayFunc(ref arr)
10191 if !arr.bracket_notation
10192 && matches!(
10193 target,
10194 DialectType::Spark
10195 | DialectType::Databricks
10196 | DialectType::Hive
10197 | DialectType::BigQuery
10198 | DialectType::DuckDB
10199 | DialectType::Snowflake
10200 | DialectType::Presto
10201 | DialectType::Trino
10202 | DialectType::Athena
10203 | DialectType::ClickHouse
10204 | DialectType::StarRocks
10205 ) =>
10206 {
10207 Action::ArraySyntaxConvert
10208 }
10209 // VARIANCE expression -> varSamp for ClickHouse
10210 Expression::Variance(_) if matches!(target, DialectType::ClickHouse) => {
10211 Action::VarianceToClickHouse
10212 }
10213 // STDDEV expression -> stddevSamp for ClickHouse
10214 Expression::Stddev(_) if matches!(target, DialectType::ClickHouse) => {
10215 Action::StddevToClickHouse
10216 }
10217 // ApproxQuantile -> APPROX_PERCENTILE for Snowflake
10218 Expression::ApproxQuantile(_) if matches!(target, DialectType::Snowflake) => {
10219 Action::ApproxQuantileConvert
10220 }
10221 // MonthsBetween -> target-specific
10222 Expression::MonthsBetween(_)
10223 if !matches!(
10224 target,
10225 DialectType::Spark | DialectType::Databricks | DialectType::Hive
10226 ) =>
10227 {
10228 Action::MonthsBetweenConvert
10229 }
10230 // AddMonths -> target-specific DATEADD/DATE_ADD
10231 Expression::AddMonths(_) => Action::AddMonthsConvert,
10232 // MapFromArrays -> target-specific (MAP, OBJECT_CONSTRUCT, MAP_FROM_ARRAYS)
10233 Expression::MapFromArrays(_)
10234 if !matches!(target, DialectType::Spark | DialectType::Databricks) =>
10235 {
10236 Action::MapFromArraysConvert
10237 }
10238 // CURRENT_USER -> CURRENT_USER() for Spark
10239 Expression::CurrentUser(_)
10240 if matches!(target, DialectType::Spark | DialectType::Databricks) =>
10241 {
10242 Action::CurrentUserSparkParens
10243 }
10244 // MONTH/YEAR/DAY('string') from Spark -> cast string to DATE for DuckDB/Presto
10245 Expression::Month(ref f) | Expression::Year(ref f) | Expression::Day(ref f)
10246 if matches!(
10247 source,
10248 DialectType::Spark | DialectType::Databricks | DialectType::Hive
10249 ) && matches!(&f.this, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)))
10250 && matches!(
10251 target,
10252 DialectType::DuckDB
10253 | DialectType::Presto
10254 | DialectType::Trino
10255 | DialectType::Athena
10256 | DialectType::PostgreSQL
10257 | DialectType::Redshift
10258 ) =>
10259 {
10260 Action::SparkDateFuncCast
10261 }
10262 // $parameter -> @parameter for BigQuery
10263 Expression::Parameter(ref p)
10264 if matches!(target, DialectType::BigQuery)
10265 && matches!(source, DialectType::DuckDB)
10266 && (p.style == crate::expressions::ParameterStyle::Dollar
10267 || p.style == crate::expressions::ParameterStyle::DoubleDollar) =>
10268 {
10269 Action::DollarParamConvert
10270 }
10271 // EscapeString literal: normalize literal newlines to \n
10272 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::EscapeString(ref s) if s.contains('\n') || s.contains('\r') || s.contains('\t'))
10273 =>
10274 {
10275 Action::EscapeStringNormalize
10276 }
10277 // straight_join: keep lowercase for DuckDB, quote for MySQL
10278 Expression::Column(ref col)
10279 if col.name.name == "STRAIGHT_JOIN"
10280 && col.table.is_none()
10281 && matches!(source, DialectType::DuckDB)
10282 && matches!(target, DialectType::DuckDB | DialectType::MySQL) =>
10283 {
10284 Action::StraightJoinCase
10285 }
10286 // DATE and TIMESTAMP literal type conversions are now handled in the generator directly
10287 // Snowflake INTERVAL format: INTERVAL '2' HOUR -> INTERVAL '2 HOUR'
10288 Expression::Interval(ref iv)
10289 if matches!(
10290 target,
10291 DialectType::Snowflake
10292 | DialectType::PostgreSQL
10293 | DialectType::Redshift
10294 ) && iv.unit.is_some()
10295 && iv.this.as_ref().map_or(false, |t| matches!(t, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)))) =>
10296 {
10297 Action::SnowflakeIntervalFormat
10298 }
10299 // TABLESAMPLE -> TABLESAMPLE RESERVOIR for DuckDB target
10300 Expression::TableSample(ref ts) if matches!(target, DialectType::DuckDB) => {
10301 if let Some(ref sample) = ts.sample {
10302 if !sample.explicit_method {
10303 Action::TablesampleReservoir
10304 } else {
10305 Action::None
10306 }
10307 } else {
10308 Action::None
10309 }
10310 }
10311 // TABLESAMPLE from non-Snowflake source to Snowflake: strip method and PERCENT
10312 // Handles both Expression::TableSample wrapper and Expression::Table with table_sample
10313 Expression::TableSample(ref ts)
10314 if matches!(target, DialectType::Snowflake)
10315 && !matches!(source, DialectType::Snowflake)
10316 && ts.sample.is_some() =>
10317 {
10318 if let Some(ref sample) = ts.sample {
10319 if !sample.explicit_method {
10320 Action::TablesampleSnowflakeStrip
10321 } else {
10322 Action::None
10323 }
10324 } else {
10325 Action::None
10326 }
10327 }
10328 Expression::Table(ref t)
10329 if matches!(target, DialectType::Snowflake)
10330 && !matches!(source, DialectType::Snowflake)
10331 && t.table_sample.is_some() =>
10332 {
10333 if let Some(ref sample) = t.table_sample {
10334 if !sample.explicit_method {
10335 Action::TablesampleSnowflakeStrip
10336 } else {
10337 Action::None
10338 }
10339 } else {
10340 Action::None
10341 }
10342 }
10343 // ALTER TABLE RENAME -> EXEC sp_rename for TSQL
10344 Expression::AlterTable(ref at)
10345 if matches!(target, DialectType::TSQL | DialectType::Fabric)
10346 && !at.actions.is_empty()
10347 && matches!(
10348 at.actions.first(),
10349 Some(crate::expressions::AlterTableAction::RenameTable(_))
10350 ) =>
10351 {
10352 Action::AlterTableToSpRename
10353 }
10354 // Subscript index: 1-based to 0-based for BigQuery/Hive/Spark
10355 Expression::Subscript(ref sub)
10356 if matches!(
10357 target,
10358 DialectType::BigQuery
10359 | DialectType::Hive
10360 | DialectType::Spark
10361 | DialectType::Databricks
10362 ) && matches!(
10363 source,
10364 DialectType::DuckDB
10365 | DialectType::PostgreSQL
10366 | DialectType::Presto
10367 | DialectType::Trino
10368 | DialectType::Redshift
10369 | DialectType::ClickHouse
10370 ) && matches!(&sub.index, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(ref n) if n.parse::<i64>().unwrap_or(0) > 0)) =>
10371 {
10372 Action::ArrayIndexConvert
10373 }
10374 // ANY_VALUE IGNORE NULLS detection moved to the AnyValue arm above
10375 // MysqlNullsOrdering for Ordered is now handled in the Ordered arm above
10376 // RESPECT NULLS handling for SQLite (strip it, add NULLS LAST to ORDER BY)
10377 // and for MySQL (rewrite ORDER BY with CASE WHEN for null ordering)
10378 Expression::WindowFunction(ref wf) => {
10379 // BigQuery doesn't support NULLS FIRST/LAST in window function ORDER BY
10380 // EXCEPT for ROW_NUMBER which keeps NULLS LAST
10381 let is_row_number = matches!(wf.this, Expression::RowNumber(_));
10382 if matches!(target, DialectType::BigQuery)
10383 && !is_row_number
10384 && !wf.over.order_by.is_empty()
10385 && wf.over.order_by.iter().any(|o| o.nulls_first.is_some())
10386 {
10387 Action::BigQueryNullsOrdering
10388 // DuckDB -> MySQL: Add CASE WHEN for NULLS LAST simulation in window ORDER BY
10389 // But NOT when frame is RANGE/GROUPS, since adding CASE WHEN would break value-based frames
10390 } else {
10391 let source_nulls_last = matches!(source, DialectType::DuckDB);
10392 let has_range_frame = wf.over.frame.as_ref().map_or(false, |f| {
10393 matches!(
10394 f.kind,
10395 crate::expressions::WindowFrameKind::Range
10396 | crate::expressions::WindowFrameKind::Groups
10397 )
10398 });
10399 if source_nulls_last
10400 && matches!(target, DialectType::MySQL)
10401 && !wf.over.order_by.is_empty()
10402 && wf.over.order_by.iter().any(|o| !o.desc)
10403 && !has_range_frame
10404 {
10405 Action::MysqlNullsLastRewrite
10406 } else {
10407 // Check for Snowflake window frame handling for FIRST_VALUE/LAST_VALUE/NTH_VALUE
10408 let is_ranking_window_func = matches!(
10409 &wf.this,
10410 Expression::FirstValue(_)
10411 | Expression::LastValue(_)
10412 | Expression::NthValue(_)
10413 );
10414 let has_full_unbounded_frame = wf.over.frame.as_ref().map_or(false, |f| {
10415 matches!(f.kind, crate::expressions::WindowFrameKind::Rows)
10416 && matches!(f.start, crate::expressions::WindowFrameBound::UnboundedPreceding)
10417 && matches!(f.end, Some(crate::expressions::WindowFrameBound::UnboundedFollowing))
10418 && f.exclude.is_none()
10419 });
10420 if is_ranking_window_func && matches!(source, DialectType::Snowflake) {
10421 if has_full_unbounded_frame && matches!(target, DialectType::Snowflake) {
10422 // Strip the default frame for Snowflake target
10423 Action::SnowflakeWindowFrameStrip
10424 } else if !has_full_unbounded_frame && wf.over.frame.is_none() && !matches!(target, DialectType::Snowflake) {
10425 // Add default frame for non-Snowflake target
10426 Action::SnowflakeWindowFrameAdd
10427 } else {
10428 match &wf.this {
10429 Expression::FirstValue(ref vf)
10430 | Expression::LastValue(ref vf)
10431 if vf.ignore_nulls == Some(false) =>
10432 {
10433 match target {
10434 DialectType::SQLite => Action::RespectNullsConvert,
10435 _ => Action::None,
10436 }
10437 }
10438 _ => Action::None,
10439 }
10440 }
10441 } else {
10442 match &wf.this {
10443 Expression::FirstValue(ref vf)
10444 | Expression::LastValue(ref vf)
10445 if vf.ignore_nulls == Some(false) =>
10446 {
10447 // RESPECT NULLS
10448 match target {
10449 DialectType::SQLite | DialectType::PostgreSQL => {
10450 Action::RespectNullsConvert
10451 }
10452 _ => Action::None,
10453 }
10454 }
10455 _ => Action::None,
10456 }
10457 }
10458 }
10459 }
10460 }
10461 // CREATE TABLE a LIKE b -> dialect-specific transformations
10462 Expression::CreateTable(ref ct)
10463 if ct.columns.is_empty()
10464 && ct.constraints.iter().any(|c| {
10465 matches!(c, crate::expressions::TableConstraint::Like { .. })
10466 })
10467 && matches!(
10468 target,
10469 DialectType::DuckDB | DialectType::SQLite | DialectType::Drill
10470 ) =>
10471 {
10472 Action::CreateTableLikeToCtas
10473 }
10474 Expression::CreateTable(ref ct)
10475 if ct.columns.is_empty()
10476 && ct.constraints.iter().any(|c| {
10477 matches!(c, crate::expressions::TableConstraint::Like { .. })
10478 })
10479 && matches!(target, DialectType::TSQL | DialectType::Fabric) =>
10480 {
10481 Action::CreateTableLikeToSelectInto
10482 }
10483 Expression::CreateTable(ref ct)
10484 if ct.columns.is_empty()
10485 && ct.constraints.iter().any(|c| {
10486 matches!(c, crate::expressions::TableConstraint::Like { .. })
10487 })
10488 && matches!(target, DialectType::ClickHouse) =>
10489 {
10490 Action::CreateTableLikeToAs
10491 }
10492 // CREATE TABLE: strip COMMENT column constraint, USING, PARTITIONED BY for DuckDB
10493 Expression::CreateTable(ref ct)
10494 if matches!(target, DialectType::DuckDB)
10495 && matches!(
10496 source,
10497 DialectType::DuckDB
10498 | DialectType::Spark
10499 | DialectType::Databricks
10500 | DialectType::Hive
10501 ) =>
10502 {
10503 let has_comment = ct.columns.iter().any(|c| {
10504 c.comment.is_some()
10505 || c.constraints.iter().any(|con| {
10506 matches!(con, crate::expressions::ColumnConstraint::Comment(_))
10507 })
10508 });
10509 let has_props = !ct.properties.is_empty();
10510 if has_comment || has_props {
10511 Action::CreateTableStripComment
10512 } else {
10513 Action::None
10514 }
10515 }
10516 // Array conversion: Expression::Array -> Expression::ArrayFunc for PostgreSQL
10517 Expression::Array(_)
10518 if matches!(target, DialectType::PostgreSQL | DialectType::Redshift) =>
10519 {
10520 Action::ArrayConcatBracketConvert
10521 }
10522 // ArrayFunc (bracket notation) -> Function("ARRAY") for Redshift (from BigQuery source)
10523 Expression::ArrayFunc(ref arr)
10524 if arr.bracket_notation
10525 && matches!(source, DialectType::BigQuery)
10526 && matches!(target, DialectType::Redshift) =>
10527 {
10528 Action::ArrayConcatBracketConvert
10529 }
10530 // BIT_OR/BIT_AND/BIT_XOR: float/decimal arg cast for DuckDB, or rename for Snowflake
10531 Expression::BitwiseOrAgg(ref f)
10532 | Expression::BitwiseAndAgg(ref f)
10533 | Expression::BitwiseXorAgg(ref f) => {
10534 if matches!(target, DialectType::DuckDB) {
10535 // Check if the arg is CAST(val AS FLOAT/DOUBLE/DECIMAL/REAL)
10536 if let Expression::Cast(ref c) = f.this {
10537 match &c.to {
10538 DataType::Float { .. }
10539 | DataType::Double { .. }
10540 | DataType::Decimal { .. } => Action::BitAggFloatCast,
10541 DataType::Custom { ref name }
10542 if name.eq_ignore_ascii_case("REAL") =>
10543 {
10544 Action::BitAggFloatCast
10545 }
10546 _ => Action::None,
10547 }
10548 } else {
10549 Action::None
10550 }
10551 } else if matches!(target, DialectType::Snowflake) {
10552 Action::BitAggSnowflakeRename
10553 } else {
10554 Action::None
10555 }
10556 }
10557 // FILTER -> IFF for Snowflake (aggregate functions with FILTER clause)
10558 Expression::Filter(ref _f) if matches!(target, DialectType::Snowflake) => {
10559 Action::FilterToIff
10560 }
10561 // AggFunc.filter -> IFF wrapping for Snowflake (e.g., AVG(x) FILTER(WHERE cond))
10562 Expression::Avg(ref f)
10563 | Expression::Sum(ref f)
10564 | Expression::Min(ref f)
10565 | Expression::Max(ref f)
10566 | Expression::CountIf(ref f)
10567 | Expression::Stddev(ref f)
10568 | Expression::StddevPop(ref f)
10569 | Expression::StddevSamp(ref f)
10570 | Expression::Variance(ref f)
10571 | Expression::VarPop(ref f)
10572 | Expression::VarSamp(ref f)
10573 | Expression::Median(ref f)
10574 | Expression::Mode(ref f)
10575 | Expression::First(ref f)
10576 | Expression::Last(ref f)
10577 | Expression::ApproxDistinct(ref f)
10578 if f.filter.is_some() && matches!(target, DialectType::Snowflake) =>
10579 {
10580 Action::AggFilterToIff
10581 }
10582 Expression::Count(ref c)
10583 if c.filter.is_some() && matches!(target, DialectType::Snowflake) =>
10584 {
10585 Action::AggFilterToIff
10586 }
10587 // COUNT(DISTINCT a, b) -> COUNT(DISTINCT CASE WHEN ... END) for dialects that don't support multi-arg DISTINCT
10588 Expression::Count(ref c)
10589 if c.distinct
10590 && matches!(&c.this, Some(Expression::Tuple(_)))
10591 && matches!(
10592 target,
10593 DialectType::Presto
10594 | DialectType::Trino
10595 | DialectType::DuckDB
10596 | DialectType::PostgreSQL
10597 ) =>
10598 {
10599 Action::CountDistinctMultiArg
10600 }
10601 // JSON arrow -> GET_PATH/PARSE_JSON for Snowflake
10602 Expression::JsonExtract(_) if matches!(target, DialectType::Snowflake) => {
10603 Action::JsonToGetPath
10604 }
10605 // DuckDB struct/dict -> BigQuery STRUCT / Presto ROW
10606 Expression::Struct(_)
10607 if matches!(
10608 target,
10609 DialectType::BigQuery | DialectType::Presto | DialectType::Trino
10610 ) && matches!(source, DialectType::DuckDB) =>
10611 {
10612 Action::StructToRow
10613 }
10614 // DuckDB curly-brace dict {'key': value} -> BigQuery STRUCT / Presto ROW
10615 Expression::MapFunc(ref m)
10616 if m.curly_brace_syntax
10617 && matches!(
10618 target,
10619 DialectType::BigQuery | DialectType::Presto | DialectType::Trino
10620 )
10621 && matches!(source, DialectType::DuckDB) =>
10622 {
10623 Action::StructToRow
10624 }
10625 // APPROX_COUNT_DISTINCT -> APPROX_DISTINCT for Presto/Trino
10626 Expression::ApproxCountDistinct(_)
10627 if matches!(
10628 target,
10629 DialectType::Presto | DialectType::Trino | DialectType::Athena
10630 ) =>
10631 {
10632 Action::ApproxCountDistinctToApproxDistinct
10633 }
10634 // ARRAY_CONTAINS(arr, val) -> CONTAINS(arr, val) for Presto, ARRAY_CONTAINS(CAST(val AS VARIANT), arr) for Snowflake
10635 Expression::ArrayContains(_)
10636 if matches!(
10637 target,
10638 DialectType::Presto | DialectType::Trino | DialectType::Snowflake
10639 ) && !(matches!(source, DialectType::Snowflake) && matches!(target, DialectType::Snowflake)) =>
10640 {
10641 Action::ArrayContainsConvert
10642 }
10643 // ARRAY_CONTAINS -> DuckDB NULL-aware CASE (from Snowflake source with check_null semantics)
10644 Expression::ArrayContains(_)
10645 if matches!(target, DialectType::DuckDB)
10646 && matches!(source, DialectType::Snowflake) =>
10647 {
10648 Action::ArrayContainsDuckDBConvert
10649 }
10650 // ARRAY_EXCEPT -> target-specific conversion
10651 Expression::ArrayExcept(_)
10652 if matches!(
10653 target,
10654 DialectType::DuckDB | DialectType::Snowflake | DialectType::Presto | DialectType::Trino | DialectType::Athena
10655 ) =>
10656 {
10657 Action::ArrayExceptConvert
10658 }
10659 // ARRAY_POSITION -> swap args for Snowflake target (only when source is not Snowflake)
10660 Expression::ArrayPosition(_)
10661 if matches!(target, DialectType::Snowflake)
10662 && !matches!(source, DialectType::Snowflake) =>
10663 {
10664 Action::ArrayPositionSnowflakeSwap
10665 }
10666 // ARRAY_POSITION(val, arr) -> ARRAY_POSITION(arr, val) - 1 for DuckDB from Snowflake source
10667 Expression::ArrayPosition(_)
10668 if matches!(target, DialectType::DuckDB)
10669 && matches!(source, DialectType::Snowflake) =>
10670 {
10671 Action::SnowflakeArrayPositionToDuckDB
10672 }
10673 // ARRAY_DISTINCT -> arrayDistinct for ClickHouse
10674 Expression::ArrayDistinct(_)
10675 if matches!(target, DialectType::ClickHouse) =>
10676 {
10677 Action::ArrayDistinctClickHouse
10678 }
10679 // ARRAY_DISTINCT -> DuckDB LIST_DISTINCT with NULL-aware CASE
10680 Expression::ArrayDistinct(_)
10681 if matches!(target, DialectType::DuckDB)
10682 && matches!(source, DialectType::Snowflake) =>
10683 {
10684 Action::ArrayDistinctConvert
10685 }
10686 // StrPosition with position -> complex expansion for Presto/DuckDB
10687 // STRPOS doesn't support a position arg in these dialects
10688 Expression::StrPosition(ref sp)
10689 if sp.position.is_some()
10690 && matches!(
10691 target,
10692 DialectType::Presto
10693 | DialectType::Trino
10694 | DialectType::Athena
10695 | DialectType::DuckDB
10696 ) =>
10697 {
10698 Action::StrPositionExpand
10699 }
10700 // FIRST(col) IGNORE NULLS -> ANY_VALUE(col) for DuckDB
10701 Expression::First(ref f)
10702 if f.ignore_nulls == Some(true)
10703 && matches!(target, DialectType::DuckDB) =>
10704 {
10705 Action::FirstToAnyValue
10706 }
10707 // BEGIN -> START TRANSACTION for Presto/Trino
10708 Expression::Command(ref cmd)
10709 if cmd.this.eq_ignore_ascii_case("BEGIN")
10710 && matches!(
10711 target,
10712 DialectType::Presto | DialectType::Trino | DialectType::Athena
10713 ) =>
10714 {
10715 // Handled inline below
10716 Action::None // We'll handle it directly
10717 }
10718 // Note: PostgreSQL ^ is now parsed as Power directly (not BitwiseXor).
10719 // PostgreSQL # is parsed as BitwiseXor (which is correct).
10720 // a || b (Concat operator) -> CONCAT function for Presto/Trino
10721 Expression::Concat(ref _op)
10722 if matches!(source, DialectType::PostgreSQL | DialectType::Redshift)
10723 && matches!(target, DialectType::Presto | DialectType::Trino) =>
10724 {
10725 Action::PipeConcatToConcat
10726 }
10727 _ => Action::None,
10728 }
10729 };
10730
10731 match action {
10732 Action::None => {
10733 // Handle inline transforms that don't need a dedicated action
10734 if matches!(target, DialectType::TSQL | DialectType::Fabric) {
10735 if let Some(rewritten) = Self::rewrite_tsql_interval_arithmetic(&e) {
10736 return Ok(rewritten);
10737 }
10738 }
10739
10740 // BETWEEN SYMMETRIC/ASYMMETRIC expansion for non-PostgreSQL/Dremio targets
10741 if let Expression::Between(ref b) = e {
10742 if let Some(sym) = b.symmetric {
10743 let keeps_symmetric =
10744 matches!(target, DialectType::PostgreSQL | DialectType::Dremio);
10745 if !keeps_symmetric {
10746 if sym {
10747 // SYMMETRIC: expand to (x BETWEEN a AND b OR x BETWEEN b AND a)
10748 let b = if let Expression::Between(b) = e {
10749 *b
10750 } else {
10751 unreachable!()
10752 };
10753 let between1 = Expression::Between(Box::new(
10754 crate::expressions::Between {
10755 this: b.this.clone(),
10756 low: b.low.clone(),
10757 high: b.high.clone(),
10758 not: b.not,
10759 symmetric: None,
10760 },
10761 ));
10762 let between2 = Expression::Between(Box::new(
10763 crate::expressions::Between {
10764 this: b.this,
10765 low: b.high,
10766 high: b.low,
10767 not: b.not,
10768 symmetric: None,
10769 },
10770 ));
10771 return Ok(Expression::Paren(Box::new(
10772 crate::expressions::Paren {
10773 this: Expression::Or(Box::new(
10774 crate::expressions::BinaryOp::new(
10775 between1, between2,
10776 ),
10777 )),
10778 trailing_comments: vec![],
10779 },
10780 )));
10781 } else {
10782 // ASYMMETRIC: strip qualifier, keep as regular BETWEEN
10783 let b = if let Expression::Between(b) = e {
10784 *b
10785 } else {
10786 unreachable!()
10787 };
10788 return Ok(Expression::Between(Box::new(
10789 crate::expressions::Between {
10790 this: b.this,
10791 low: b.low,
10792 high: b.high,
10793 not: b.not,
10794 symmetric: None,
10795 },
10796 )));
10797 }
10798 }
10799 }
10800 }
10801
10802 // ILIKE -> LOWER(x) LIKE LOWER(y) for StarRocks/Doris
10803 if let Expression::ILike(ref _like) = e {
10804 if matches!(target, DialectType::StarRocks | DialectType::Doris) {
10805 let like = if let Expression::ILike(l) = e {
10806 *l
10807 } else {
10808 unreachable!()
10809 };
10810 let lower_left = Expression::Function(Box::new(Function::new(
10811 "LOWER".to_string(),
10812 vec![like.left],
10813 )));
10814 let lower_right = Expression::Function(Box::new(Function::new(
10815 "LOWER".to_string(),
10816 vec![like.right],
10817 )));
10818 return Ok(Expression::Like(Box::new(crate::expressions::LikeOp {
10819 left: lower_left,
10820 right: lower_right,
10821 escape: like.escape,
10822 quantifier: like.quantifier,
10823 inferred_type: None,
10824 })));
10825 }
10826 }
10827
10828 // Oracle DBMS_RANDOM.VALUE() -> RANDOM() for PostgreSQL, RAND() for others
10829 if let Expression::MethodCall(ref mc) = e {
10830 if matches!(source, DialectType::Oracle)
10831 && mc.method.name.eq_ignore_ascii_case("VALUE")
10832 && mc.args.is_empty()
10833 {
10834 let is_dbms_random = match &mc.this {
10835 Expression::Identifier(id) => {
10836 id.name.eq_ignore_ascii_case("DBMS_RANDOM")
10837 }
10838 Expression::Column(col) => {
10839 col.table.is_none()
10840 && col.name.name.eq_ignore_ascii_case("DBMS_RANDOM")
10841 }
10842 _ => false,
10843 };
10844 if is_dbms_random {
10845 let func_name = match target {
10846 DialectType::PostgreSQL
10847 | DialectType::Redshift
10848 | DialectType::DuckDB
10849 | DialectType::SQLite => "RANDOM",
10850 DialectType::Oracle => "DBMS_RANDOM.VALUE",
10851 _ => "RAND",
10852 };
10853 return Ok(Expression::Function(Box::new(Function::new(
10854 func_name.to_string(),
10855 vec![],
10856 ))));
10857 }
10858 }
10859 }
10860 // TRIM without explicit position -> add BOTH for ClickHouse
10861 if let Expression::Trim(ref trim) = e {
10862 if matches!(target, DialectType::ClickHouse)
10863 && trim.sql_standard_syntax
10864 && trim.characters.is_some()
10865 && !trim.position_explicit
10866 {
10867 let mut new_trim = (**trim).clone();
10868 new_trim.position_explicit = true;
10869 return Ok(Expression::Trim(Box::new(new_trim)));
10870 }
10871 }
10872 // BEGIN -> START TRANSACTION for Presto/Trino
10873 if let Expression::Transaction(ref txn) = e {
10874 if matches!(
10875 target,
10876 DialectType::Presto | DialectType::Trino | DialectType::Athena
10877 ) {
10878 // Convert BEGIN to START TRANSACTION by setting mark to "START"
10879 let mut txn = txn.clone();
10880 txn.mark = Some(Box::new(Expression::Identifier(Identifier::new(
10881 "START".to_string(),
10882 ))));
10883 return Ok(Expression::Transaction(Box::new(*txn)));
10884 }
10885 }
10886 // IS TRUE/FALSE -> simplified forms for Presto/Trino
10887 if matches!(
10888 target,
10889 DialectType::Presto | DialectType::Trino | DialectType::Athena
10890 ) {
10891 match &e {
10892 Expression::IsTrue(itf) if !itf.not => {
10893 // x IS TRUE -> x
10894 return Ok(itf.this.clone());
10895 }
10896 Expression::IsTrue(itf) if itf.not => {
10897 // x IS NOT TRUE -> NOT x
10898 return Ok(Expression::Not(Box::new(
10899 crate::expressions::UnaryOp {
10900 this: itf.this.clone(),
10901 inferred_type: None,
10902 },
10903 )));
10904 }
10905 Expression::IsFalse(itf) if !itf.not => {
10906 // x IS FALSE -> NOT x
10907 return Ok(Expression::Not(Box::new(
10908 crate::expressions::UnaryOp {
10909 this: itf.this.clone(),
10910 inferred_type: None,
10911 },
10912 )));
10913 }
10914 Expression::IsFalse(itf) if itf.not => {
10915 // x IS NOT FALSE -> NOT NOT x
10916 let not_x =
10917 Expression::Not(Box::new(crate::expressions::UnaryOp {
10918 this: itf.this.clone(),
10919 inferred_type: None,
10920 }));
10921 return Ok(Expression::Not(Box::new(
10922 crate::expressions::UnaryOp {
10923 this: not_x,
10924 inferred_type: None,
10925 },
10926 )));
10927 }
10928 _ => {}
10929 }
10930 }
10931 // x IS NOT FALSE -> NOT x IS FALSE for Redshift
10932 if matches!(target, DialectType::Redshift) {
10933 if let Expression::IsFalse(ref itf) = e {
10934 if itf.not {
10935 return Ok(Expression::Not(Box::new(
10936 crate::expressions::UnaryOp {
10937 this: Expression::IsFalse(Box::new(
10938 crate::expressions::IsTrueFalse {
10939 this: itf.this.clone(),
10940 not: false,
10941 },
10942 )),
10943 inferred_type: None,
10944 },
10945 )));
10946 }
10947 }
10948 }
10949 // REGEXP_REPLACE: add 'g' flag when source defaults to global replacement
10950 // Snowflake default is global, PostgreSQL/DuckDB default is first-match-only
10951 if let Expression::Function(ref f) = e {
10952 if f.name.eq_ignore_ascii_case("REGEXP_REPLACE")
10953 && matches!(source, DialectType::Snowflake)
10954 && matches!(target, DialectType::PostgreSQL | DialectType::DuckDB)
10955 {
10956 if f.args.len() == 3 {
10957 let mut args = f.args.clone();
10958 args.push(Expression::string("g"));
10959 return Ok(Expression::Function(Box::new(Function::new(
10960 "REGEXP_REPLACE".to_string(),
10961 args,
10962 ))));
10963 } else if f.args.len() == 4 {
10964 // 4th arg might be position, add 'g' as 5th
10965 let mut args = f.args.clone();
10966 args.push(Expression::string("g"));
10967 return Ok(Expression::Function(Box::new(Function::new(
10968 "REGEXP_REPLACE".to_string(),
10969 args,
10970 ))));
10971 }
10972 }
10973 }
10974 Ok(e)
10975 }
10976
10977 Action::GreatestLeastNull => {
10978 let f = if let Expression::Function(f) = e {
10979 *f
10980 } else {
10981 unreachable!("action only triggered for Function expressions")
10982 };
10983 let mut null_checks: Vec<Expression> = f
10984 .args
10985 .iter()
10986 .map(|a| {
10987 Expression::IsNull(Box::new(IsNull {
10988 this: a.clone(),
10989 not: false,
10990 postfix_form: false,
10991 }))
10992 })
10993 .collect();
10994 let condition = if null_checks.len() == 1 {
10995 null_checks.remove(0)
10996 } else {
10997 let first = null_checks.remove(0);
10998 null_checks.into_iter().fold(first, |acc, check| {
10999 Expression::Or(Box::new(BinaryOp::new(acc, check)))
11000 })
11001 };
11002 Ok(Expression::Case(Box::new(Case {
11003 operand: None,
11004 whens: vec![(condition, Expression::Null(Null))],
11005 else_: Some(Expression::Function(Box::new(Function::new(
11006 f.name, f.args,
11007 )))),
11008 comments: Vec::new(),
11009 inferred_type: None,
11010 })))
11011 }
11012
11013 Action::ArrayGenerateRange => {
11014 let f = if let Expression::Function(f) = e {
11015 *f
11016 } else {
11017 unreachable!("action only triggered for Function expressions")
11018 };
11019 let start = f.args[0].clone();
11020 let end = f.args[1].clone();
11021 let step = f.args.get(2).cloned();
11022
11023 // Helper: compute end - 1 for converting exclusive→inclusive end.
11024 // When end is a literal number, simplify to a computed literal.
11025 fn exclusive_to_inclusive_end(end: &Expression) -> Expression {
11026 // Try to simplify literal numbers
11027 match end {
11028 Expression::Literal(lit)
11029 if matches!(lit.as_ref(), Literal::Number(_)) =>
11030 {
11031 let Literal::Number(n) = lit.as_ref() else {
11032 unreachable!()
11033 };
11034 if let Ok(val) = n.parse::<i64>() {
11035 return Expression::number(val - 1);
11036 }
11037 }
11038 Expression::Neg(u) => {
11039 if let Expression::Literal(lit) = &u.this {
11040 if let Literal::Number(n) = lit.as_ref() {
11041 if let Ok(val) = n.parse::<i64>() {
11042 return Expression::number(-val - 1);
11043 }
11044 }
11045 }
11046 }
11047 _ => {}
11048 }
11049 // Non-literal: produce end - 1 expression
11050 Expression::Sub(Box::new(BinaryOp::new(end.clone(), Expression::number(1))))
11051 }
11052
11053 match target {
11054 // Snowflake ARRAY_GENERATE_RANGE and DuckDB RANGE both use exclusive end,
11055 // so no adjustment needed — just rename the function.
11056 DialectType::Snowflake => {
11057 let mut args = vec![start, end];
11058 if let Some(s) = step {
11059 args.push(s);
11060 }
11061 Ok(Expression::Function(Box::new(Function::new(
11062 "ARRAY_GENERATE_RANGE".to_string(),
11063 args,
11064 ))))
11065 }
11066 DialectType::DuckDB => {
11067 let mut args = vec![start, end];
11068 if let Some(s) = step {
11069 args.push(s);
11070 }
11071 Ok(Expression::Function(Box::new(Function::new(
11072 "RANGE".to_string(),
11073 args,
11074 ))))
11075 }
11076 // These dialects use inclusive end, so convert exclusive→inclusive.
11077 // Presto/Trino: simplify literal numbers (3 → 2).
11078 DialectType::Presto | DialectType::Trino => {
11079 let end_inclusive = exclusive_to_inclusive_end(&end);
11080 let mut args = vec![start, end_inclusive];
11081 if let Some(s) = step {
11082 args.push(s);
11083 }
11084 Ok(Expression::Function(Box::new(Function::new(
11085 "SEQUENCE".to_string(),
11086 args,
11087 ))))
11088 }
11089 // PostgreSQL, Redshift, BigQuery: keep as end - 1 expression form.
11090 DialectType::PostgreSQL | DialectType::Redshift => {
11091 let end_minus_1 = Expression::Sub(Box::new(BinaryOp::new(
11092 end.clone(),
11093 Expression::number(1),
11094 )));
11095 let mut args = vec![start, end_minus_1];
11096 if let Some(s) = step {
11097 args.push(s);
11098 }
11099 Ok(Expression::Function(Box::new(Function::new(
11100 "GENERATE_SERIES".to_string(),
11101 args,
11102 ))))
11103 }
11104 DialectType::BigQuery => {
11105 let end_minus_1 = Expression::Sub(Box::new(BinaryOp::new(
11106 end.clone(),
11107 Expression::number(1),
11108 )));
11109 let mut args = vec![start, end_minus_1];
11110 if let Some(s) = step {
11111 args.push(s);
11112 }
11113 Ok(Expression::Function(Box::new(Function::new(
11114 "GENERATE_ARRAY".to_string(),
11115 args,
11116 ))))
11117 }
11118 _ => Ok(Expression::Function(Box::new(Function::new(
11119 f.name, f.args,
11120 )))),
11121 }
11122 }
11123
11124 Action::Div0TypedDivision => {
11125 let if_func = if let Expression::IfFunc(f) = e {
11126 *f
11127 } else {
11128 unreachable!("action only triggered for IfFunc expressions")
11129 };
11130 if let Some(Expression::Div(div)) = if_func.false_value {
11131 let cast_type = if matches!(target, DialectType::SQLite) {
11132 DataType::Float {
11133 precision: None,
11134 scale: None,
11135 real_spelling: true,
11136 }
11137 } else {
11138 DataType::Double {
11139 precision: None,
11140 scale: None,
11141 }
11142 };
11143 let casted_left = Expression::Cast(Box::new(Cast {
11144 this: div.left,
11145 to: cast_type,
11146 trailing_comments: vec![],
11147 double_colon_syntax: false,
11148 format: None,
11149 default: None,
11150 inferred_type: None,
11151 }));
11152 Ok(Expression::IfFunc(Box::new(crate::expressions::IfFunc {
11153 condition: if_func.condition,
11154 true_value: if_func.true_value,
11155 false_value: Some(Expression::Div(Box::new(BinaryOp::new(
11156 casted_left,
11157 div.right,
11158 )))),
11159 original_name: if_func.original_name,
11160 inferred_type: None,
11161 })))
11162 } else {
11163 // Not actually a Div, reconstruct
11164 Ok(Expression::IfFunc(Box::new(if_func)))
11165 }
11166 }
11167
11168 Action::ArrayAggCollectList => {
11169 let agg = if let Expression::ArrayAgg(a) = e {
11170 *a
11171 } else {
11172 unreachable!("action only triggered for ArrayAgg expressions")
11173 };
11174 Ok(Expression::ArrayAgg(Box::new(AggFunc {
11175 name: Some("COLLECT_LIST".to_string()),
11176 ..agg
11177 })))
11178 }
11179
11180 Action::ArrayAggToGroupConcat => {
11181 let agg = if let Expression::ArrayAgg(a) = e {
11182 *a
11183 } else {
11184 unreachable!("action only triggered for ArrayAgg expressions")
11185 };
11186 Ok(Expression::ArrayAgg(Box::new(AggFunc {
11187 name: Some("GROUP_CONCAT".to_string()),
11188 ..agg
11189 })))
11190 }
11191
11192 Action::ArrayAggWithinGroupFilter => {
11193 let wg = if let Expression::WithinGroup(w) = e {
11194 *w
11195 } else {
11196 unreachable!("action only triggered for WithinGroup expressions")
11197 };
11198 if let Expression::ArrayAgg(inner_agg) = wg.this {
11199 let col = inner_agg.this.clone();
11200 let filter = Expression::IsNull(Box::new(IsNull {
11201 this: col,
11202 not: true,
11203 postfix_form: false,
11204 }));
11205 // For DuckDB, add explicit NULLS FIRST for DESC ordering
11206 let order_by = if matches!(target, DialectType::DuckDB) {
11207 wg.order_by
11208 .into_iter()
11209 .map(|mut o| {
11210 if o.desc && o.nulls_first.is_none() {
11211 o.nulls_first = Some(true);
11212 }
11213 o
11214 })
11215 .collect()
11216 } else {
11217 wg.order_by
11218 };
11219 Ok(Expression::ArrayAgg(Box::new(AggFunc {
11220 this: inner_agg.this,
11221 distinct: inner_agg.distinct,
11222 filter: Some(filter),
11223 order_by,
11224 name: inner_agg.name,
11225 ignore_nulls: inner_agg.ignore_nulls,
11226 having_max: inner_agg.having_max,
11227 limit: inner_agg.limit,
11228 inferred_type: None,
11229 })))
11230 } else {
11231 Ok(Expression::WithinGroup(Box::new(wg)))
11232 }
11233 }
11234
11235 Action::ArrayAggFilter => {
11236 let agg = if let Expression::ArrayAgg(a) = e {
11237 *a
11238 } else {
11239 unreachable!("action only triggered for ArrayAgg expressions")
11240 };
11241 let col = agg.this.clone();
11242 let filter = Expression::IsNull(Box::new(IsNull {
11243 this: col,
11244 not: true,
11245 postfix_form: false,
11246 }));
11247 Ok(Expression::ArrayAgg(Box::new(AggFunc {
11248 filter: Some(filter),
11249 ..agg
11250 })))
11251 }
11252
11253 Action::ArrayAggNullFilter => {
11254 // ARRAY_AGG(x) FILTER(WHERE cond) -> ARRAY_AGG(x) FILTER(WHERE cond AND NOT x IS NULL)
11255 // For source dialects that exclude NULLs (Spark/Hive) targeting DuckDB which includes them
11256 let agg = if let Expression::ArrayAgg(a) = e {
11257 *a
11258 } else {
11259 unreachable!("action only triggered for ArrayAgg expressions")
11260 };
11261 let col = agg.this.clone();
11262 let not_null = Expression::IsNull(Box::new(IsNull {
11263 this: col,
11264 not: true,
11265 postfix_form: true, // Use "NOT x IS NULL" form (prefix NOT)
11266 }));
11267 let new_filter = if let Some(existing_filter) = agg.filter {
11268 // AND the NOT IS NULL with existing filter
11269 Expression::And(Box::new(crate::expressions::BinaryOp::new(
11270 existing_filter,
11271 not_null,
11272 )))
11273 } else {
11274 not_null
11275 };
11276 Ok(Expression::ArrayAgg(Box::new(AggFunc {
11277 filter: Some(new_filter),
11278 ..agg
11279 })))
11280 }
11281
11282 Action::BigQueryArraySelectAsStructToSnowflake => {
11283 // ARRAY(SELECT AS STRUCT x1 AS x1, x2 AS x2 FROM t)
11284 // -> (SELECT ARRAY_AGG(OBJECT_CONSTRUCT('x1', x1, 'x2', x2)) FROM t)
11285 if let Expression::Function(mut f) = e {
11286 let is_match = f.args.len() == 1
11287 && matches!(&f.args[0], Expression::Select(s) if s.kind.as_deref() == Some("STRUCT"));
11288 if is_match {
11289 let inner_select = match f.args.remove(0) {
11290 Expression::Select(s) => *s,
11291 _ => unreachable!(
11292 "argument already verified to be a Select expression"
11293 ),
11294 };
11295 // Build OBJECT_CONSTRUCT args from SELECT expressions
11296 let mut oc_args = Vec::new();
11297 for expr in &inner_select.expressions {
11298 match expr {
11299 Expression::Alias(a) => {
11300 let key = Expression::Literal(Box::new(Literal::String(
11301 a.alias.name.clone(),
11302 )));
11303 let value = a.this.clone();
11304 oc_args.push(key);
11305 oc_args.push(value);
11306 }
11307 Expression::Column(c) => {
11308 let key = Expression::Literal(Box::new(Literal::String(
11309 c.name.name.clone(),
11310 )));
11311 oc_args.push(key);
11312 oc_args.push(expr.clone());
11313 }
11314 _ => {
11315 oc_args.push(expr.clone());
11316 }
11317 }
11318 }
11319 let object_construct = Expression::Function(Box::new(Function::new(
11320 "OBJECT_CONSTRUCT".to_string(),
11321 oc_args,
11322 )));
11323 let array_agg = Expression::Function(Box::new(Function::new(
11324 "ARRAY_AGG".to_string(),
11325 vec![object_construct],
11326 )));
11327 let mut new_select = crate::expressions::Select::new();
11328 new_select.expressions = vec![array_agg];
11329 new_select.from = inner_select.from.clone();
11330 new_select.where_clause = inner_select.where_clause.clone();
11331 new_select.group_by = inner_select.group_by.clone();
11332 new_select.having = inner_select.having.clone();
11333 new_select.joins = inner_select.joins.clone();
11334 Ok(Expression::Subquery(Box::new(
11335 crate::expressions::Subquery {
11336 this: Expression::Select(Box::new(new_select)),
11337 alias: None,
11338 column_aliases: Vec::new(),
11339 alias_explicit_as: false,
11340 alias_keyword: None,
11341 order_by: None,
11342 limit: None,
11343 offset: None,
11344 distribute_by: None,
11345 sort_by: None,
11346 cluster_by: None,
11347 lateral: false,
11348 modifiers_inside: false,
11349 trailing_comments: Vec::new(),
11350 inferred_type: None,
11351 },
11352 )))
11353 } else {
11354 Ok(Expression::Function(f))
11355 }
11356 } else {
11357 Ok(e)
11358 }
11359 }
11360
11361 Action::BigQueryPercentileContToDuckDB => {
11362 // PERCENTILE_CONT(x, frac [RESPECT NULLS]) -> QUANTILE_CONT(x, frac) for DuckDB
11363 if let Expression::AggregateFunction(mut af) = e {
11364 af.name = "QUANTILE_CONT".to_string();
11365 af.ignore_nulls = None; // Strip RESPECT/IGNORE NULLS
11366 // Keep only first 2 args
11367 if af.args.len() > 2 {
11368 af.args.truncate(2);
11369 }
11370 Ok(Expression::AggregateFunction(af))
11371 } else {
11372 Ok(e)
11373 }
11374 }
11375
11376 Action::ArrayAggIgnoreNullsDuckDB => {
11377 // ARRAY_AGG(x IGNORE NULLS ORDER BY a, b DESC) -> ARRAY_AGG(x ORDER BY a NULLS FIRST, b DESC)
11378 // Strip IGNORE NULLS, add NULLS FIRST to first ORDER BY column
11379 let mut agg = if let Expression::ArrayAgg(a) = e {
11380 *a
11381 } else {
11382 unreachable!("action only triggered for ArrayAgg expressions")
11383 };
11384 agg.ignore_nulls = None; // Strip IGNORE NULLS
11385 if !agg.order_by.is_empty() {
11386 agg.order_by[0].nulls_first = Some(true);
11387 }
11388 Ok(Expression::ArrayAgg(Box::new(agg)))
11389 }
11390
11391 Action::CountDistinctMultiArg => {
11392 // COUNT(DISTINCT a, b) -> COUNT(DISTINCT CASE WHEN a IS NULL THEN NULL WHEN b IS NULL THEN NULL ELSE (a, b) END)
11393 if let Expression::Count(c) = e {
11394 if let Some(Expression::Tuple(t)) = c.this {
11395 let args = t.expressions;
11396 // Build CASE expression:
11397 // WHEN a IS NULL THEN NULL WHEN b IS NULL THEN NULL ELSE (a, b) END
11398 let mut whens = Vec::new();
11399 for arg in &args {
11400 whens.push((
11401 Expression::IsNull(Box::new(IsNull {
11402 this: arg.clone(),
11403 not: false,
11404 postfix_form: false,
11405 })),
11406 Expression::Null(crate::expressions::Null),
11407 ));
11408 }
11409 // Build the tuple for ELSE
11410 let tuple_expr =
11411 Expression::Tuple(Box::new(crate::expressions::Tuple {
11412 expressions: args,
11413 }));
11414 let case_expr = Expression::Case(Box::new(crate::expressions::Case {
11415 operand: None,
11416 whens,
11417 else_: Some(tuple_expr),
11418 comments: Vec::new(),
11419 inferred_type: None,
11420 }));
11421 Ok(Expression::Count(Box::new(crate::expressions::CountFunc {
11422 this: Some(case_expr),
11423 star: false,
11424 distinct: true,
11425 filter: c.filter,
11426 ignore_nulls: c.ignore_nulls,
11427 original_name: c.original_name,
11428 inferred_type: None,
11429 })))
11430 } else {
11431 Ok(Expression::Count(c))
11432 }
11433 } else {
11434 Ok(e)
11435 }
11436 }
11437
11438 Action::CastTimestampToDatetime => {
11439 let c = if let Expression::Cast(c) = e {
11440 *c
11441 } else {
11442 unreachable!("action only triggered for Cast expressions")
11443 };
11444 Ok(Expression::Cast(Box::new(Cast {
11445 to: DataType::Custom {
11446 name: "DATETIME".to_string(),
11447 },
11448 ..c
11449 })))
11450 }
11451
11452 Action::CastTimestampStripTz => {
11453 // CAST(x AS TIMESTAMP(n) WITH TIME ZONE) -> CAST(x AS TIMESTAMP) for Hive/Spark/BigQuery
11454 let c = if let Expression::Cast(c) = e {
11455 *c
11456 } else {
11457 unreachable!("action only triggered for Cast expressions")
11458 };
11459 Ok(Expression::Cast(Box::new(Cast {
11460 to: DataType::Timestamp {
11461 precision: None,
11462 timezone: false,
11463 },
11464 ..c
11465 })))
11466 }
11467
11468 Action::CastTimestamptzToFunc => {
11469 // CAST(x AS TIMESTAMPTZ) -> TIMESTAMP(x) function for MySQL/StarRocks
11470 let c = if let Expression::Cast(c) = e {
11471 *c
11472 } else {
11473 unreachable!("action only triggered for Cast expressions")
11474 };
11475 Ok(Expression::Function(Box::new(Function::new(
11476 "TIMESTAMP".to_string(),
11477 vec![c.this],
11478 ))))
11479 }
11480
11481 Action::ToDateToCast => {
11482 // Convert TO_DATE(x) -> CAST(x AS DATE) for DuckDB
11483 if let Expression::Function(f) = e {
11484 let arg = f.args.into_iter().next().unwrap();
11485 Ok(Expression::Cast(Box::new(Cast {
11486 this: arg,
11487 to: DataType::Date,
11488 double_colon_syntax: false,
11489 trailing_comments: vec![],
11490 format: None,
11491 default: None,
11492 inferred_type: None,
11493 })))
11494 } else {
11495 Ok(e)
11496 }
11497 }
11498 Action::DateTruncWrapCast => {
11499 // Handle both Expression::DateTrunc/TimestampTrunc and
11500 // Expression::Function("DATE_TRUNC", [unit, expr])
11501 match e {
11502 Expression::DateTrunc(d) | Expression::TimestampTrunc(d) => {
11503 let input_type = match &d.this {
11504 Expression::Cast(c) => Some(c.to.clone()),
11505 _ => None,
11506 };
11507 if let Some(cast_type) = input_type {
11508 let is_time = matches!(cast_type, DataType::Time { .. });
11509 if is_time {
11510 let date_expr = Expression::Cast(Box::new(Cast {
11511 this: Expression::Literal(Box::new(
11512 crate::expressions::Literal::String(
11513 "1970-01-01".to_string(),
11514 ),
11515 )),
11516 to: DataType::Date,
11517 double_colon_syntax: false,
11518 trailing_comments: vec![],
11519 format: None,
11520 default: None,
11521 inferred_type: None,
11522 }));
11523 let add_expr =
11524 Expression::Add(Box::new(BinaryOp::new(date_expr, d.this)));
11525 let inner = Expression::DateTrunc(Box::new(DateTruncFunc {
11526 this: add_expr,
11527 unit: d.unit,
11528 }));
11529 Ok(Expression::Cast(Box::new(Cast {
11530 this: inner,
11531 to: cast_type,
11532 double_colon_syntax: false,
11533 trailing_comments: vec![],
11534 format: None,
11535 default: None,
11536 inferred_type: None,
11537 })))
11538 } else {
11539 let inner = Expression::DateTrunc(Box::new(*d));
11540 Ok(Expression::Cast(Box::new(Cast {
11541 this: inner,
11542 to: cast_type,
11543 double_colon_syntax: false,
11544 trailing_comments: vec![],
11545 format: None,
11546 default: None,
11547 inferred_type: None,
11548 })))
11549 }
11550 } else {
11551 Ok(Expression::DateTrunc(d))
11552 }
11553 }
11554 Expression::Function(f) if f.args.len() == 2 => {
11555 // Function-based DATE_TRUNC(unit, expr)
11556 let input_type = match &f.args[1] {
11557 Expression::Cast(c) => Some(c.to.clone()),
11558 _ => None,
11559 };
11560 if let Some(cast_type) = input_type {
11561 let is_time = matches!(cast_type, DataType::Time { .. });
11562 if is_time {
11563 let date_expr = Expression::Cast(Box::new(Cast {
11564 this: Expression::Literal(Box::new(
11565 crate::expressions::Literal::String(
11566 "1970-01-01".to_string(),
11567 ),
11568 )),
11569 to: DataType::Date,
11570 double_colon_syntax: false,
11571 trailing_comments: vec![],
11572 format: None,
11573 default: None,
11574 inferred_type: None,
11575 }));
11576 let mut args = f.args;
11577 let unit_arg = args.remove(0);
11578 let time_expr = args.remove(0);
11579 let add_expr = Expression::Add(Box::new(BinaryOp::new(
11580 date_expr, time_expr,
11581 )));
11582 let inner = Expression::Function(Box::new(Function::new(
11583 "DATE_TRUNC".to_string(),
11584 vec![unit_arg, add_expr],
11585 )));
11586 Ok(Expression::Cast(Box::new(Cast {
11587 this: inner,
11588 to: cast_type,
11589 double_colon_syntax: false,
11590 trailing_comments: vec![],
11591 format: None,
11592 default: None,
11593 inferred_type: None,
11594 })))
11595 } else {
11596 // Wrap the function in CAST
11597 Ok(Expression::Cast(Box::new(Cast {
11598 this: Expression::Function(f),
11599 to: cast_type,
11600 double_colon_syntax: false,
11601 trailing_comments: vec![],
11602 format: None,
11603 default: None,
11604 inferred_type: None,
11605 })))
11606 }
11607 } else {
11608 Ok(Expression::Function(f))
11609 }
11610 }
11611 other => Ok(other),
11612 }
11613 }
11614
11615 Action::RegexpReplaceSnowflakeToDuckDB => {
11616 // Snowflake REGEXP_REPLACE(s, p, r, position) -> REGEXP_REPLACE(s, p, r, 'g')
11617 if let Expression::Function(f) = e {
11618 let mut args = f.args;
11619 let subject = args.remove(0);
11620 let pattern = args.remove(0);
11621 let replacement = args.remove(0);
11622 Ok(Expression::Function(Box::new(Function::new(
11623 "REGEXP_REPLACE".to_string(),
11624 vec![
11625 subject,
11626 pattern,
11627 replacement,
11628 Expression::Literal(Box::new(crate::expressions::Literal::String(
11629 "g".to_string(),
11630 ))),
11631 ],
11632 ))))
11633 } else {
11634 Ok(e)
11635 }
11636 }
11637
11638 Action::RegexpReplacePositionSnowflakeToDuckDB => {
11639 // Snowflake REGEXP_REPLACE(s, p, r, pos, occ) -> DuckDB form
11640 // pos=1, occ=1 -> REGEXP_REPLACE(s, p, r) (single replace, no 'g')
11641 // pos>1, occ=0 -> SUBSTRING(s, 1, pos-1) || REGEXP_REPLACE(SUBSTRING(s, pos), p, r, 'g')
11642 // pos>1, occ=1 -> SUBSTRING(s, 1, pos-1) || REGEXP_REPLACE(SUBSTRING(s, pos), p, r)
11643 // pos=1, occ=0 -> REGEXP_REPLACE(s, p, r, 'g') (replace all)
11644 if let Expression::Function(f) = e {
11645 let mut args = f.args;
11646 let subject = args.remove(0);
11647 let pattern = args.remove(0);
11648 let replacement = args.remove(0);
11649 let position = args.remove(0);
11650 let occurrence = args.remove(0);
11651
11652 let is_pos_1 = matches!(&position, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(n) if n == "1"));
11653 let is_occ_0 = matches!(&occurrence, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(n) if n == "0"));
11654 let is_occ_1 = matches!(&occurrence, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(n) if n == "1"));
11655
11656 if is_pos_1 && is_occ_1 {
11657 // REGEXP_REPLACE(s, p, r) - single replace, no flags
11658 Ok(Expression::Function(Box::new(Function::new(
11659 "REGEXP_REPLACE".to_string(),
11660 vec![subject, pattern, replacement],
11661 ))))
11662 } else if is_pos_1 && is_occ_0 {
11663 // REGEXP_REPLACE(s, p, r, 'g') - global replace
11664 Ok(Expression::Function(Box::new(Function::new(
11665 "REGEXP_REPLACE".to_string(),
11666 vec![
11667 subject,
11668 pattern,
11669 replacement,
11670 Expression::Literal(Box::new(Literal::String("g".to_string()))),
11671 ],
11672 ))))
11673 } else {
11674 // pos>1: SUBSTRING(s, 1, pos-1) || REGEXP_REPLACE(SUBSTRING(s, pos), p, r[, 'g'])
11675 // Pre-compute pos-1 when position is a numeric literal
11676 let pos_minus_1 = if let Expression::Literal(ref lit) = position {
11677 if let Literal::Number(ref n) = lit.as_ref() {
11678 if let Ok(val) = n.parse::<i64>() {
11679 Expression::number(val - 1)
11680 } else {
11681 Expression::Sub(Box::new(BinaryOp::new(
11682 position.clone(),
11683 Expression::number(1),
11684 )))
11685 }
11686 } else {
11687 position.clone()
11688 }
11689 } else {
11690 Expression::Sub(Box::new(BinaryOp::new(
11691 position.clone(),
11692 Expression::number(1),
11693 )))
11694 };
11695 let prefix = Expression::Function(Box::new(Function::new(
11696 "SUBSTRING".to_string(),
11697 vec![subject.clone(), Expression::number(1), pos_minus_1],
11698 )));
11699 let suffix_subject = Expression::Function(Box::new(Function::new(
11700 "SUBSTRING".to_string(),
11701 vec![subject, position],
11702 )));
11703 let mut replace_args = vec![suffix_subject, pattern, replacement];
11704 if is_occ_0 {
11705 replace_args.push(Expression::Literal(Box::new(Literal::String(
11706 "g".to_string(),
11707 ))));
11708 }
11709 let replace_expr = Expression::Function(Box::new(Function::new(
11710 "REGEXP_REPLACE".to_string(),
11711 replace_args,
11712 )));
11713 Ok(Expression::DPipe(Box::new(crate::expressions::DPipe {
11714 this: Box::new(prefix),
11715 expression: Box::new(replace_expr),
11716 safe: None,
11717 })))
11718 }
11719 } else {
11720 Ok(e)
11721 }
11722 }
11723
11724 Action::RegexpSubstrSnowflakeToDuckDB => {
11725 // Snowflake REGEXP_SUBSTR -> DuckDB REGEXP_EXTRACT variants
11726 if let Expression::Function(f) = e {
11727 let mut args = f.args;
11728 let arg_count = args.len();
11729 match arg_count {
11730 // REGEXP_SUBSTR(s, p) -> REGEXP_EXTRACT(s, p)
11731 0..=2 => Ok(Expression::Function(Box::new(Function::new(
11732 "REGEXP_EXTRACT".to_string(),
11733 args,
11734 )))),
11735 // REGEXP_SUBSTR(s, p, pos) -> REGEXP_EXTRACT(NULLIF(SUBSTRING(s, pos), ''), p)
11736 3 => {
11737 let subject = args.remove(0);
11738 let pattern = args.remove(0);
11739 let position = args.remove(0);
11740 let is_pos_1 = matches!(&position, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(n) if n == "1"));
11741 if is_pos_1 {
11742 Ok(Expression::Function(Box::new(Function::new(
11743 "REGEXP_EXTRACT".to_string(),
11744 vec![subject, pattern],
11745 ))))
11746 } else {
11747 let substring_expr =
11748 Expression::Function(Box::new(Function::new(
11749 "SUBSTRING".to_string(),
11750 vec![subject, position],
11751 )));
11752 let nullif_expr =
11753 Expression::Function(Box::new(Function::new(
11754 "NULLIF".to_string(),
11755 vec![
11756 substring_expr,
11757 Expression::Literal(Box::new(Literal::String(
11758 String::new(),
11759 ))),
11760 ],
11761 )));
11762 Ok(Expression::Function(Box::new(Function::new(
11763 "REGEXP_EXTRACT".to_string(),
11764 vec![nullif_expr, pattern],
11765 ))))
11766 }
11767 }
11768 // REGEXP_SUBSTR(s, p, pos, occ) -> depends on pos and occ
11769 4 => {
11770 let subject = args.remove(0);
11771 let pattern = args.remove(0);
11772 let position = args.remove(0);
11773 let occurrence = args.remove(0);
11774 let is_pos_1 = matches!(&position, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(n) if n == "1"));
11775 let is_occ_1 = matches!(&occurrence, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(n) if n == "1"));
11776
11777 let effective_subject = if is_pos_1 {
11778 subject
11779 } else {
11780 let substring_expr =
11781 Expression::Function(Box::new(Function::new(
11782 "SUBSTRING".to_string(),
11783 vec![subject, position],
11784 )));
11785 Expression::Function(Box::new(Function::new(
11786 "NULLIF".to_string(),
11787 vec![
11788 substring_expr,
11789 Expression::Literal(Box::new(Literal::String(
11790 String::new(),
11791 ))),
11792 ],
11793 )))
11794 };
11795
11796 if is_occ_1 {
11797 Ok(Expression::Function(Box::new(Function::new(
11798 "REGEXP_EXTRACT".to_string(),
11799 vec![effective_subject, pattern],
11800 ))))
11801 } else {
11802 // ARRAY_EXTRACT(REGEXP_EXTRACT_ALL(s, p), occ)
11803 let extract_all =
11804 Expression::Function(Box::new(Function::new(
11805 "REGEXP_EXTRACT_ALL".to_string(),
11806 vec![effective_subject, pattern],
11807 )));
11808 Ok(Expression::Function(Box::new(Function::new(
11809 "ARRAY_EXTRACT".to_string(),
11810 vec![extract_all, occurrence],
11811 ))))
11812 }
11813 }
11814 // REGEXP_SUBSTR(s, p, 1, 1, 'e') -> REGEXP_EXTRACT(s, p)
11815 5 => {
11816 let subject = args.remove(0);
11817 let pattern = args.remove(0);
11818 let _position = args.remove(0);
11819 let _occurrence = args.remove(0);
11820 let _flags = args.remove(0);
11821 // Strip 'e' flag, convert to REGEXP_EXTRACT
11822 Ok(Expression::Function(Box::new(Function::new(
11823 "REGEXP_EXTRACT".to_string(),
11824 vec![subject, pattern],
11825 ))))
11826 }
11827 // REGEXP_SUBSTR(s, p, 1, 1, 'e', group) -> REGEXP_EXTRACT(s, p[, group])
11828 _ => {
11829 let subject = args.remove(0);
11830 let pattern = args.remove(0);
11831 let _position = args.remove(0);
11832 let _occurrence = args.remove(0);
11833 let _flags = args.remove(0);
11834 let group = args.remove(0);
11835 let is_group_0 = matches!(&group, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(n) if n == "0"));
11836 if is_group_0 {
11837 // Strip group=0 (default)
11838 Ok(Expression::Function(Box::new(Function::new(
11839 "REGEXP_EXTRACT".to_string(),
11840 vec![subject, pattern],
11841 ))))
11842 } else {
11843 Ok(Expression::Function(Box::new(Function::new(
11844 "REGEXP_EXTRACT".to_string(),
11845 vec![subject, pattern, group],
11846 ))))
11847 }
11848 }
11849 }
11850 } else {
11851 Ok(e)
11852 }
11853 }
11854
11855 Action::RegexpSubstrSnowflakeIdentity => {
11856 // Snowflake→Snowflake: REGEXP_SUBSTR/REGEXP_SUBSTR_ALL with 6 args
11857 // Strip trailing group=0
11858 if let Expression::Function(f) = e {
11859 let func_name = f.name.clone();
11860 let mut args = f.args;
11861 if args.len() == 6 {
11862 let is_group_0 = matches!(&args[5], Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(n) if n == "0"));
11863 if is_group_0 {
11864 args.truncate(5);
11865 }
11866 }
11867 Ok(Expression::Function(Box::new(Function::new(
11868 func_name, args,
11869 ))))
11870 } else {
11871 Ok(e)
11872 }
11873 }
11874
11875 Action::RegexpSubstrAllSnowflakeToDuckDB => {
11876 // Snowflake REGEXP_SUBSTR_ALL -> DuckDB REGEXP_EXTRACT_ALL variants
11877 if let Expression::Function(f) = e {
11878 let mut args = f.args;
11879 let arg_count = args.len();
11880 match arg_count {
11881 // REGEXP_SUBSTR_ALL(s, p) -> REGEXP_EXTRACT_ALL(s, p)
11882 0..=2 => Ok(Expression::Function(Box::new(Function::new(
11883 "REGEXP_EXTRACT_ALL".to_string(),
11884 args,
11885 )))),
11886 // REGEXP_SUBSTR_ALL(s, p, pos) -> REGEXP_EXTRACT_ALL(SUBSTRING(s, pos), p)
11887 3 => {
11888 let subject = args.remove(0);
11889 let pattern = args.remove(0);
11890 let position = args.remove(0);
11891 let is_pos_1 = matches!(&position, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(n) if n == "1"));
11892 if is_pos_1 {
11893 Ok(Expression::Function(Box::new(Function::new(
11894 "REGEXP_EXTRACT_ALL".to_string(),
11895 vec![subject, pattern],
11896 ))))
11897 } else {
11898 let substring_expr =
11899 Expression::Function(Box::new(Function::new(
11900 "SUBSTRING".to_string(),
11901 vec![subject, position],
11902 )));
11903 Ok(Expression::Function(Box::new(Function::new(
11904 "REGEXP_EXTRACT_ALL".to_string(),
11905 vec![substring_expr, pattern],
11906 ))))
11907 }
11908 }
11909 // REGEXP_SUBSTR_ALL(s, p, 1, occ) -> REGEXP_EXTRACT_ALL(s, p)[occ:]
11910 4 => {
11911 let subject = args.remove(0);
11912 let pattern = args.remove(0);
11913 let position = args.remove(0);
11914 let occurrence = args.remove(0);
11915 let is_pos_1 = matches!(&position, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(n) if n == "1"));
11916 let is_occ_1 = matches!(&occurrence, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(n) if n == "1"));
11917
11918 let effective_subject = if is_pos_1 {
11919 subject
11920 } else {
11921 Expression::Function(Box::new(Function::new(
11922 "SUBSTRING".to_string(),
11923 vec![subject, position],
11924 )))
11925 };
11926
11927 if is_occ_1 {
11928 Ok(Expression::Function(Box::new(Function::new(
11929 "REGEXP_EXTRACT_ALL".to_string(),
11930 vec![effective_subject, pattern],
11931 ))))
11932 } else {
11933 // REGEXP_EXTRACT_ALL(s, p)[occ:]
11934 let extract_all =
11935 Expression::Function(Box::new(Function::new(
11936 "REGEXP_EXTRACT_ALL".to_string(),
11937 vec![effective_subject, pattern],
11938 )));
11939 Ok(Expression::ArraySlice(Box::new(
11940 crate::expressions::ArraySlice {
11941 this: extract_all,
11942 start: Some(occurrence),
11943 end: None,
11944 },
11945 )))
11946 }
11947 }
11948 // REGEXP_SUBSTR_ALL(s, p, 1, 1, 'e') -> REGEXP_EXTRACT_ALL(s, p)
11949 5 => {
11950 let subject = args.remove(0);
11951 let pattern = args.remove(0);
11952 let _position = args.remove(0);
11953 let _occurrence = args.remove(0);
11954 let _flags = args.remove(0);
11955 Ok(Expression::Function(Box::new(Function::new(
11956 "REGEXP_EXTRACT_ALL".to_string(),
11957 vec![subject, pattern],
11958 ))))
11959 }
11960 // REGEXP_SUBSTR_ALL(s, p, 1, 1, 'e', 0) -> REGEXP_EXTRACT_ALL(s, p)
11961 _ => {
11962 let subject = args.remove(0);
11963 let pattern = args.remove(0);
11964 let _position = args.remove(0);
11965 let _occurrence = args.remove(0);
11966 let _flags = args.remove(0);
11967 let group = args.remove(0);
11968 let is_group_0 = matches!(&group, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(n) if n == "0"));
11969 if is_group_0 {
11970 Ok(Expression::Function(Box::new(Function::new(
11971 "REGEXP_EXTRACT_ALL".to_string(),
11972 vec![subject, pattern],
11973 ))))
11974 } else {
11975 Ok(Expression::Function(Box::new(Function::new(
11976 "REGEXP_EXTRACT_ALL".to_string(),
11977 vec![subject, pattern, group],
11978 ))))
11979 }
11980 }
11981 }
11982 } else {
11983 Ok(e)
11984 }
11985 }
11986
11987 Action::RegexpCountSnowflakeToDuckDB => {
11988 // Snowflake REGEXP_COUNT(s, p[, pos[, flags]]) ->
11989 // DuckDB: CASE WHEN p = '' THEN 0 ELSE LENGTH(REGEXP_EXTRACT_ALL(s, p)) END
11990 if let Expression::Function(f) = e {
11991 let mut args = f.args;
11992 let arg_count = args.len();
11993 let subject = args.remove(0);
11994 let pattern = args.remove(0);
11995
11996 // Handle position arg
11997 let effective_subject = if arg_count >= 3 {
11998 let position = args.remove(0);
11999 Expression::Function(Box::new(Function::new(
12000 "SUBSTRING".to_string(),
12001 vec![subject, position],
12002 )))
12003 } else {
12004 subject
12005 };
12006
12007 // Handle flags arg -> embed as (?flags) prefix in pattern
12008 let effective_pattern = if arg_count >= 4 {
12009 let flags = args.remove(0);
12010 match &flags {
12011 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(f_str) if !f_str.is_empty()) =>
12012 {
12013 let Literal::String(f_str) = lit.as_ref() else {
12014 unreachable!()
12015 };
12016 // Always use concatenation: '(?flags)' || pattern
12017 let prefix = Expression::Literal(Box::new(Literal::String(
12018 format!("(?{})", f_str),
12019 )));
12020 Expression::DPipe(Box::new(crate::expressions::DPipe {
12021 this: Box::new(prefix),
12022 expression: Box::new(pattern.clone()),
12023 safe: None,
12024 }))
12025 }
12026 _ => pattern.clone(),
12027 }
12028 } else {
12029 pattern.clone()
12030 };
12031
12032 // Build: CASE WHEN p = '' THEN 0 ELSE LENGTH(REGEXP_EXTRACT_ALL(s, p)) END
12033 let extract_all = Expression::Function(Box::new(Function::new(
12034 "REGEXP_EXTRACT_ALL".to_string(),
12035 vec![effective_subject, effective_pattern.clone()],
12036 )));
12037 let length_expr =
12038 Expression::Length(Box::new(crate::expressions::UnaryFunc {
12039 this: extract_all,
12040 original_name: None,
12041 inferred_type: None,
12042 }));
12043 let condition = Expression::Eq(Box::new(BinaryOp::new(
12044 effective_pattern,
12045 Expression::Literal(Box::new(Literal::String(String::new()))),
12046 )));
12047 Ok(Expression::Case(Box::new(Case {
12048 operand: None,
12049 whens: vec![(condition, Expression::number(0))],
12050 else_: Some(length_expr),
12051 comments: vec![],
12052 inferred_type: None,
12053 })))
12054 } else {
12055 Ok(e)
12056 }
12057 }
12058
12059 Action::RegexpInstrSnowflakeToDuckDB => {
12060 // Snowflake REGEXP_INSTR(s, p[, pos[, occ[, option[, flags[, group]]]]]) ->
12061 // DuckDB: CASE WHEN s IS NULL OR p IS NULL [OR ...] THEN NULL
12062 // WHEN p = '' THEN 0
12063 // WHEN LENGTH(REGEXP_EXTRACT_ALL(eff_s, eff_p)) < occ THEN 0
12064 // ELSE 1 + COALESCE(LIST_SUM(LIST_TRANSFORM(STRING_SPLIT_REGEX(eff_s, eff_p)[1:occ], x -> LENGTH(x))), 0)
12065 // + COALESCE(LIST_SUM(LIST_TRANSFORM(REGEXP_EXTRACT_ALL(eff_s, eff_p)[1:occ - 1], x -> LENGTH(x))), 0)
12066 // + pos_offset
12067 // END
12068 if let Expression::Function(f) = e {
12069 let mut args = f.args;
12070 let subject = args.remove(0);
12071 let pattern = if !args.is_empty() {
12072 args.remove(0)
12073 } else {
12074 Expression::Literal(Box::new(Literal::String(String::new())))
12075 };
12076
12077 // Collect all original args for NULL checks
12078 let position = if !args.is_empty() {
12079 Some(args.remove(0))
12080 } else {
12081 None
12082 };
12083 let occurrence = if !args.is_empty() {
12084 Some(args.remove(0))
12085 } else {
12086 None
12087 };
12088 let option = if !args.is_empty() {
12089 Some(args.remove(0))
12090 } else {
12091 None
12092 };
12093 let flags = if !args.is_empty() {
12094 Some(args.remove(0))
12095 } else {
12096 None
12097 };
12098 let _group = if !args.is_empty() {
12099 Some(args.remove(0))
12100 } else {
12101 None
12102 };
12103
12104 let is_pos_1 = position.as_ref().map_or(true, |p| matches!(p, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(n) if n == "1")));
12105 let occurrence_expr = occurrence.clone().unwrap_or(Expression::number(1));
12106
12107 // Build NULL check: subject IS NULL OR pattern IS NULL [OR pos IS NULL ...]
12108 let mut null_checks: Vec<Expression> = vec![
12109 Expression::Is(Box::new(BinaryOp::new(
12110 subject.clone(),
12111 Expression::Null(Null),
12112 ))),
12113 Expression::Is(Box::new(BinaryOp::new(
12114 pattern.clone(),
12115 Expression::Null(Null),
12116 ))),
12117 ];
12118 // Add NULL checks for all provided optional args
12119 for opt_arg in [&position, &occurrence, &option, &flags].iter() {
12120 if let Some(arg) = opt_arg {
12121 null_checks.push(Expression::Is(Box::new(BinaryOp::new(
12122 (*arg).clone(),
12123 Expression::Null(Null),
12124 ))));
12125 }
12126 }
12127 // Chain with OR
12128 let null_condition = null_checks
12129 .into_iter()
12130 .reduce(|a, b| Expression::Or(Box::new(BinaryOp::new(a, b))))
12131 .unwrap();
12132
12133 // Effective subject (apply position offset)
12134 let effective_subject = if is_pos_1 {
12135 subject.clone()
12136 } else {
12137 let pos = position.clone().unwrap_or(Expression::number(1));
12138 Expression::Function(Box::new(Function::new(
12139 "SUBSTRING".to_string(),
12140 vec![subject.clone(), pos],
12141 )))
12142 };
12143
12144 // Effective pattern (apply flags if present)
12145 let effective_pattern = if let Some(ref fl) = flags {
12146 if let Expression::Literal(lit) = fl {
12147 if let Literal::String(f_str) = lit.as_ref() {
12148 if !f_str.is_empty() {
12149 let prefix = Expression::Literal(Box::new(
12150 Literal::String(format!("(?{})", f_str)),
12151 ));
12152 Expression::DPipe(Box::new(crate::expressions::DPipe {
12153 this: Box::new(prefix),
12154 expression: Box::new(pattern.clone()),
12155 safe: None,
12156 }))
12157 } else {
12158 pattern.clone()
12159 }
12160 } else {
12161 fl.clone()
12162 }
12163 } else {
12164 pattern.clone()
12165 }
12166 } else {
12167 pattern.clone()
12168 };
12169
12170 // WHEN pattern = '' THEN 0
12171 let empty_pattern_check = Expression::Eq(Box::new(BinaryOp::new(
12172 effective_pattern.clone(),
12173 Expression::Literal(Box::new(Literal::String(String::new()))),
12174 )));
12175
12176 // WHEN LENGTH(REGEXP_EXTRACT_ALL(eff_s, eff_p)) < occ THEN 0
12177 let match_count_check = Expression::Lt(Box::new(BinaryOp::new(
12178 Expression::Length(Box::new(crate::expressions::UnaryFunc {
12179 this: Expression::Function(Box::new(Function::new(
12180 "REGEXP_EXTRACT_ALL".to_string(),
12181 vec![effective_subject.clone(), effective_pattern.clone()],
12182 ))),
12183 original_name: None,
12184 inferred_type: None,
12185 })),
12186 occurrence_expr.clone(),
12187 )));
12188
12189 // Helper: build LENGTH lambda for LIST_TRANSFORM
12190 let make_len_lambda = || {
12191 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
12192 parameters: vec![crate::expressions::Identifier::new("x")],
12193 body: Expression::Length(Box::new(crate::expressions::UnaryFunc {
12194 this: Expression::Identifier(
12195 crate::expressions::Identifier::new("x"),
12196 ),
12197 original_name: None,
12198 inferred_type: None,
12199 })),
12200 colon: false,
12201 parameter_types: vec![],
12202 }))
12203 };
12204
12205 // COALESCE(LIST_SUM(LIST_TRANSFORM(STRING_SPLIT_REGEX(s, p)[1:occ], x -> LENGTH(x))), 0)
12206 let split_sliced =
12207 Expression::ArraySlice(Box::new(crate::expressions::ArraySlice {
12208 this: Expression::Function(Box::new(Function::new(
12209 "STRING_SPLIT_REGEX".to_string(),
12210 vec![effective_subject.clone(), effective_pattern.clone()],
12211 ))),
12212 start: Some(Expression::number(1)),
12213 end: Some(occurrence_expr.clone()),
12214 }));
12215 let split_sum = Expression::Function(Box::new(Function::new(
12216 "COALESCE".to_string(),
12217 vec![
12218 Expression::Function(Box::new(Function::new(
12219 "LIST_SUM".to_string(),
12220 vec![Expression::Function(Box::new(Function::new(
12221 "LIST_TRANSFORM".to_string(),
12222 vec![split_sliced, make_len_lambda()],
12223 )))],
12224 ))),
12225 Expression::number(0),
12226 ],
12227 )));
12228
12229 // COALESCE(LIST_SUM(LIST_TRANSFORM(REGEXP_EXTRACT_ALL(s, p)[1:occ - 1], x -> LENGTH(x))), 0)
12230 let extract_sliced =
12231 Expression::ArraySlice(Box::new(crate::expressions::ArraySlice {
12232 this: Expression::Function(Box::new(Function::new(
12233 "REGEXP_EXTRACT_ALL".to_string(),
12234 vec![effective_subject.clone(), effective_pattern.clone()],
12235 ))),
12236 start: Some(Expression::number(1)),
12237 end: Some(Expression::Sub(Box::new(BinaryOp::new(
12238 occurrence_expr.clone(),
12239 Expression::number(1),
12240 )))),
12241 }));
12242 let extract_sum = Expression::Function(Box::new(Function::new(
12243 "COALESCE".to_string(),
12244 vec![
12245 Expression::Function(Box::new(Function::new(
12246 "LIST_SUM".to_string(),
12247 vec![Expression::Function(Box::new(Function::new(
12248 "LIST_TRANSFORM".to_string(),
12249 vec![extract_sliced, make_len_lambda()],
12250 )))],
12251 ))),
12252 Expression::number(0),
12253 ],
12254 )));
12255
12256 // Position offset: pos - 1 when pos > 1, else 0
12257 let pos_offset: Expression = if !is_pos_1 {
12258 let pos = position.clone().unwrap_or(Expression::number(1));
12259 Expression::Sub(Box::new(BinaryOp::new(pos, Expression::number(1))))
12260 } else {
12261 Expression::number(0)
12262 };
12263
12264 // ELSE: 1 + split_sum + extract_sum + pos_offset
12265 let else_expr = Expression::Add(Box::new(BinaryOp::new(
12266 Expression::Add(Box::new(BinaryOp::new(
12267 Expression::Add(Box::new(BinaryOp::new(
12268 Expression::number(1),
12269 split_sum,
12270 ))),
12271 extract_sum,
12272 ))),
12273 pos_offset,
12274 )));
12275
12276 Ok(Expression::Case(Box::new(Case {
12277 operand: None,
12278 whens: vec![
12279 (null_condition, Expression::Null(Null)),
12280 (empty_pattern_check, Expression::number(0)),
12281 (match_count_check, Expression::number(0)),
12282 ],
12283 else_: Some(else_expr),
12284 comments: vec![],
12285 inferred_type: None,
12286 })))
12287 } else {
12288 Ok(e)
12289 }
12290 }
12291
12292 Action::RlikeSnowflakeToDuckDB => {
12293 // Snowflake RLIKE(a, b[, flags]) -> DuckDB REGEXP_FULL_MATCH(a, b[, flags])
12294 // Both do full-string matching, so no anchoring needed
12295 let (subject, pattern, flags) = match e {
12296 Expression::RegexpLike(ref rl) => {
12297 (rl.this.clone(), rl.pattern.clone(), rl.flags.clone())
12298 }
12299 Expression::Function(ref f) if f.args.len() >= 2 => {
12300 let s = f.args[0].clone();
12301 let p = f.args[1].clone();
12302 let fl = f.args.get(2).cloned();
12303 (s, p, fl)
12304 }
12305 _ => return Ok(e),
12306 };
12307
12308 let mut result_args = vec![subject, pattern];
12309 if let Some(fl) = flags {
12310 result_args.push(fl);
12311 }
12312 Ok(Expression::Function(Box::new(Function::new(
12313 "REGEXP_FULL_MATCH".to_string(),
12314 result_args,
12315 ))))
12316 }
12317
12318 Action::RegexpExtractAllToSnowflake => {
12319 // BigQuery REGEXP_EXTRACT_ALL(s, p) -> Snowflake REGEXP_SUBSTR_ALL(s, p)
12320 // With capture group: REGEXP_SUBSTR_ALL(s, p, 1, 1, 'c', 1)
12321 if let Expression::Function(f) = e {
12322 let mut args = f.args;
12323 if args.len() >= 2 {
12324 let str_expr = args.remove(0);
12325 let pattern = args.remove(0);
12326
12327 let has_groups = match &pattern {
12328 Expression::Literal(lit)
12329 if matches!(lit.as_ref(), Literal::String(_)) =>
12330 {
12331 let Literal::String(s) = lit.as_ref() else {
12332 unreachable!()
12333 };
12334 s.contains('(') && s.contains(')')
12335 }
12336 _ => false,
12337 };
12338
12339 if has_groups {
12340 Ok(Expression::Function(Box::new(Function::new(
12341 "REGEXP_SUBSTR_ALL".to_string(),
12342 vec![
12343 str_expr,
12344 pattern,
12345 Expression::number(1),
12346 Expression::number(1),
12347 Expression::Literal(Box::new(Literal::String(
12348 "c".to_string(),
12349 ))),
12350 Expression::number(1),
12351 ],
12352 ))))
12353 } else {
12354 Ok(Expression::Function(Box::new(Function::new(
12355 "REGEXP_SUBSTR_ALL".to_string(),
12356 vec![str_expr, pattern],
12357 ))))
12358 }
12359 } else {
12360 Ok(Expression::Function(Box::new(Function::new(
12361 "REGEXP_SUBSTR_ALL".to_string(),
12362 args,
12363 ))))
12364 }
12365 } else {
12366 Ok(e)
12367 }
12368 }
12369
12370 Action::SetToVariable => {
12371 // For DuckDB: SET a = 1 -> SET VARIABLE a = 1
12372 if let Expression::SetStatement(mut s) = e {
12373 for item in &mut s.items {
12374 if item.kind.is_none() {
12375 // Check if name already has VARIABLE prefix (from DuckDB source parsing)
12376 let already_variable = match &item.name {
12377 Expression::Identifier(id) => id.name.starts_with("VARIABLE "),
12378 _ => false,
12379 };
12380 if already_variable {
12381 // Extract the actual name and set kind
12382 if let Expression::Identifier(ref mut id) = item.name {
12383 let actual_name = id.name["VARIABLE ".len()..].to_string();
12384 id.name = actual_name;
12385 }
12386 }
12387 item.kind = Some("VARIABLE".to_string());
12388 }
12389 }
12390 Ok(Expression::SetStatement(s))
12391 } else {
12392 Ok(e)
12393 }
12394 }
12395
12396 Action::ConvertTimezoneToExpr => {
12397 // Convert Function("CONVERT_TIMEZONE", args) to Expression::ConvertTimezone
12398 // This prevents Redshift's transform_expr from expanding 2-arg to 3-arg with 'UTC'
12399 if let Expression::Function(f) = e {
12400 if f.args.len() == 2 {
12401 let mut args = f.args;
12402 let target_tz = args.remove(0);
12403 let timestamp = args.remove(0);
12404 Ok(Expression::ConvertTimezone(Box::new(ConvertTimezone {
12405 source_tz: None,
12406 target_tz: Some(Box::new(target_tz)),
12407 timestamp: Some(Box::new(timestamp)),
12408 options: vec![],
12409 })))
12410 } else if f.args.len() == 3 {
12411 let mut args = f.args;
12412 let source_tz = args.remove(0);
12413 let target_tz = args.remove(0);
12414 let timestamp = args.remove(0);
12415 Ok(Expression::ConvertTimezone(Box::new(ConvertTimezone {
12416 source_tz: Some(Box::new(source_tz)),
12417 target_tz: Some(Box::new(target_tz)),
12418 timestamp: Some(Box::new(timestamp)),
12419 options: vec![],
12420 })))
12421 } else {
12422 Ok(Expression::Function(f))
12423 }
12424 } else {
12425 Ok(e)
12426 }
12427 }
12428
12429 Action::BigQueryCastType => {
12430 // Convert BigQuery types to standard SQL types
12431 if let Expression::DataType(dt) = e {
12432 match dt {
12433 DataType::Custom { ref name } if name.eq_ignore_ascii_case("INT64") => {
12434 Ok(Expression::DataType(DataType::BigInt { length: None }))
12435 }
12436 DataType::Custom { ref name }
12437 if name.eq_ignore_ascii_case("FLOAT64") =>
12438 {
12439 Ok(Expression::DataType(DataType::Double {
12440 precision: None,
12441 scale: None,
12442 }))
12443 }
12444 DataType::Custom { ref name } if name.eq_ignore_ascii_case("BOOL") => {
12445 Ok(Expression::DataType(DataType::Boolean))
12446 }
12447 DataType::Custom { ref name } if name.eq_ignore_ascii_case("BYTES") => {
12448 Ok(Expression::DataType(DataType::VarBinary { length: None }))
12449 }
12450 DataType::Custom { ref name }
12451 if name.eq_ignore_ascii_case("NUMERIC") =>
12452 {
12453 // For DuckDB target, use Custom("DECIMAL") to avoid DuckDB's
12454 // default precision (18, 3) being added to bare DECIMAL
12455 if matches!(target, DialectType::DuckDB) {
12456 Ok(Expression::DataType(DataType::Custom {
12457 name: "DECIMAL".to_string(),
12458 }))
12459 } else {
12460 Ok(Expression::DataType(DataType::Decimal {
12461 precision: None,
12462 scale: None,
12463 }))
12464 }
12465 }
12466 DataType::Custom { ref name }
12467 if name.eq_ignore_ascii_case("STRING") =>
12468 {
12469 Ok(Expression::DataType(DataType::String { length: None }))
12470 }
12471 DataType::Custom { ref name }
12472 if name.eq_ignore_ascii_case("DATETIME") =>
12473 {
12474 Ok(Expression::DataType(DataType::Timestamp {
12475 precision: None,
12476 timezone: false,
12477 }))
12478 }
12479 _ => Ok(Expression::DataType(dt)),
12480 }
12481 } else {
12482 Ok(e)
12483 }
12484 }
12485
12486 Action::BigQuerySafeDivide => {
12487 // Convert SafeDivide expression to IF/CASE form for most targets
12488 if let Expression::SafeDivide(sd) = e {
12489 let x = *sd.this;
12490 let y = *sd.expression;
12491 // Wrap x and y in parens if they're complex expressions
12492 let y_ref = match &y {
12493 Expression::Column(_)
12494 | Expression::Literal(_)
12495 | Expression::Identifier(_) => y.clone(),
12496 _ => Expression::Paren(Box::new(Paren {
12497 this: y.clone(),
12498 trailing_comments: vec![],
12499 })),
12500 };
12501 let x_ref = match &x {
12502 Expression::Column(_)
12503 | Expression::Literal(_)
12504 | Expression::Identifier(_) => x.clone(),
12505 _ => Expression::Paren(Box::new(Paren {
12506 this: x.clone(),
12507 trailing_comments: vec![],
12508 })),
12509 };
12510 let condition = Expression::Neq(Box::new(BinaryOp::new(
12511 y_ref.clone(),
12512 Expression::number(0),
12513 )));
12514 let div_expr = Expression::Div(Box::new(BinaryOp::new(x_ref, y_ref)));
12515
12516 if matches!(target, DialectType::Spark | DialectType::Databricks) {
12517 Ok(Expression::Function(Box::new(Function::new(
12518 "TRY_DIVIDE".to_string(),
12519 vec![x, y],
12520 ))))
12521 } else if matches!(target, DialectType::Presto | DialectType::Trino) {
12522 // Presto/Trino: IF(y <> 0, CAST(x AS DOUBLE) / y, NULL)
12523 let cast_x = Expression::Cast(Box::new(Cast {
12524 this: match &x {
12525 Expression::Column(_)
12526 | Expression::Literal(_)
12527 | Expression::Identifier(_) => x,
12528 _ => Expression::Paren(Box::new(Paren {
12529 this: x,
12530 trailing_comments: vec![],
12531 })),
12532 },
12533 to: DataType::Double {
12534 precision: None,
12535 scale: None,
12536 },
12537 trailing_comments: vec![],
12538 double_colon_syntax: false,
12539 format: None,
12540 default: None,
12541 inferred_type: None,
12542 }));
12543 let cast_div = Expression::Div(Box::new(BinaryOp::new(
12544 cast_x,
12545 match &y {
12546 Expression::Column(_)
12547 | Expression::Literal(_)
12548 | Expression::Identifier(_) => y,
12549 _ => Expression::Paren(Box::new(Paren {
12550 this: y,
12551 trailing_comments: vec![],
12552 })),
12553 },
12554 )));
12555 Ok(Expression::IfFunc(Box::new(crate::expressions::IfFunc {
12556 condition,
12557 true_value: cast_div,
12558 false_value: Some(Expression::Null(Null)),
12559 original_name: None,
12560 inferred_type: None,
12561 })))
12562 } else if matches!(target, DialectType::PostgreSQL) {
12563 // PostgreSQL: CASE WHEN y <> 0 THEN CAST(x AS DOUBLE PRECISION) / y ELSE NULL END
12564 let cast_x = Expression::Cast(Box::new(Cast {
12565 this: match &x {
12566 Expression::Column(_)
12567 | Expression::Literal(_)
12568 | Expression::Identifier(_) => x,
12569 _ => Expression::Paren(Box::new(Paren {
12570 this: x,
12571 trailing_comments: vec![],
12572 })),
12573 },
12574 to: DataType::Custom {
12575 name: "DOUBLE PRECISION".to_string(),
12576 },
12577 trailing_comments: vec![],
12578 double_colon_syntax: false,
12579 format: None,
12580 default: None,
12581 inferred_type: None,
12582 }));
12583 let y_paren = match &y {
12584 Expression::Column(_)
12585 | Expression::Literal(_)
12586 | Expression::Identifier(_) => y,
12587 _ => Expression::Paren(Box::new(Paren {
12588 this: y,
12589 trailing_comments: vec![],
12590 })),
12591 };
12592 let cast_div =
12593 Expression::Div(Box::new(BinaryOp::new(cast_x, y_paren)));
12594 Ok(Expression::Case(Box::new(Case {
12595 operand: None,
12596 whens: vec![(condition, cast_div)],
12597 else_: Some(Expression::Null(Null)),
12598 comments: Vec::new(),
12599 inferred_type: None,
12600 })))
12601 } else if matches!(target, DialectType::DuckDB) {
12602 // DuckDB: CASE WHEN y <> 0 THEN x / y ELSE NULL END
12603 Ok(Expression::Case(Box::new(Case {
12604 operand: None,
12605 whens: vec![(condition, div_expr)],
12606 else_: Some(Expression::Null(Null)),
12607 comments: Vec::new(),
12608 inferred_type: None,
12609 })))
12610 } else if matches!(target, DialectType::Snowflake) {
12611 // Snowflake: IFF(y <> 0, x / y, NULL)
12612 Ok(Expression::IfFunc(Box::new(crate::expressions::IfFunc {
12613 condition,
12614 true_value: div_expr,
12615 false_value: Some(Expression::Null(Null)),
12616 original_name: Some("IFF".to_string()),
12617 inferred_type: None,
12618 })))
12619 } else {
12620 // All others: IF(y <> 0, x / y, NULL)
12621 Ok(Expression::IfFunc(Box::new(crate::expressions::IfFunc {
12622 condition,
12623 true_value: div_expr,
12624 false_value: Some(Expression::Null(Null)),
12625 original_name: None,
12626 inferred_type: None,
12627 })))
12628 }
12629 } else {
12630 Ok(e)
12631 }
12632 }
12633
12634 Action::BigQueryLastDayStripUnit => {
12635 if let Expression::LastDay(mut ld) = e {
12636 ld.unit = None; // Strip the unit (MONTH is default)
12637 match target {
12638 DialectType::PostgreSQL => {
12639 // LAST_DAY(date) -> CAST(DATE_TRUNC('MONTH', date) + INTERVAL '1 MONTH' - INTERVAL '1 DAY' AS DATE)
12640 let date_trunc = Expression::Function(Box::new(Function::new(
12641 "DATE_TRUNC".to_string(),
12642 vec![
12643 Expression::Literal(Box::new(
12644 crate::expressions::Literal::String(
12645 "MONTH".to_string(),
12646 ),
12647 )),
12648 ld.this.clone(),
12649 ],
12650 )));
12651 let plus_month =
12652 Expression::Add(Box::new(crate::expressions::BinaryOp::new(
12653 date_trunc,
12654 Expression::Interval(Box::new(
12655 crate::expressions::Interval {
12656 this: Some(Expression::Literal(Box::new(
12657 crate::expressions::Literal::String(
12658 "1 MONTH".to_string(),
12659 ),
12660 ))),
12661 unit: None,
12662 },
12663 )),
12664 )));
12665 let minus_day =
12666 Expression::Sub(Box::new(crate::expressions::BinaryOp::new(
12667 plus_month,
12668 Expression::Interval(Box::new(
12669 crate::expressions::Interval {
12670 this: Some(Expression::Literal(Box::new(
12671 crate::expressions::Literal::String(
12672 "1 DAY".to_string(),
12673 ),
12674 ))),
12675 unit: None,
12676 },
12677 )),
12678 )));
12679 Ok(Expression::Cast(Box::new(Cast {
12680 this: minus_day,
12681 to: DataType::Date,
12682 trailing_comments: vec![],
12683 double_colon_syntax: false,
12684 format: None,
12685 default: None,
12686 inferred_type: None,
12687 })))
12688 }
12689 DialectType::Presto => {
12690 // LAST_DAY(date) -> LAST_DAY_OF_MONTH(date)
12691 Ok(Expression::Function(Box::new(Function::new(
12692 "LAST_DAY_OF_MONTH".to_string(),
12693 vec![ld.this],
12694 ))))
12695 }
12696 DialectType::ClickHouse => {
12697 // ClickHouse LAST_DAY(CAST(x AS Nullable(DATE)))
12698 // Need to wrap the DATE type in Nullable
12699 let nullable_date = match ld.this {
12700 Expression::Cast(mut c) => {
12701 c.to = DataType::Nullable {
12702 inner: Box::new(DataType::Date),
12703 };
12704 Expression::Cast(c)
12705 }
12706 other => other,
12707 };
12708 ld.this = nullable_date;
12709 Ok(Expression::LastDay(ld))
12710 }
12711 _ => Ok(Expression::LastDay(ld)),
12712 }
12713 } else {
12714 Ok(e)
12715 }
12716 }
12717
12718 Action::BigQueryCastFormat => {
12719 // CAST(x AS DATE FORMAT 'fmt') -> PARSE_DATE('%m/%d/%Y', x) for BigQuery
12720 // CAST(x AS TIMESTAMP FORMAT 'fmt') -> PARSE_TIMESTAMP(...) for BigQuery
12721 // SAFE_CAST(x AS DATE FORMAT 'fmt') -> CAST(TRY_STRPTIME(x, ...) AS DATE) for DuckDB
12722 let (this, to, format_expr, is_safe) = match e {
12723 Expression::Cast(ref c) if c.format.is_some() => (
12724 c.this.clone(),
12725 c.to.clone(),
12726 c.format.as_ref().unwrap().as_ref().clone(),
12727 false,
12728 ),
12729 Expression::SafeCast(ref c) if c.format.is_some() => (
12730 c.this.clone(),
12731 c.to.clone(),
12732 c.format.as_ref().unwrap().as_ref().clone(),
12733 true,
12734 ),
12735 _ => return Ok(e),
12736 };
12737 // For CAST(x AS STRING FORMAT ...) when target is BigQuery, keep as-is
12738 if matches!(target, DialectType::BigQuery) {
12739 match &to {
12740 DataType::String { .. } | DataType::VarChar { .. } | DataType::Text => {
12741 // CAST(x AS STRING FORMAT 'fmt') stays as CAST expression for BigQuery
12742 return Ok(e);
12743 }
12744 _ => {}
12745 }
12746 }
12747 // Extract timezone from format if AT TIME ZONE is present
12748 let (actual_format_expr, timezone) = match &format_expr {
12749 Expression::AtTimeZone(ref atz) => {
12750 (atz.this.clone(), Some(atz.zone.clone()))
12751 }
12752 _ => (format_expr.clone(), None),
12753 };
12754 let strftime_fmt = Self::bq_cast_format_to_strftime(&actual_format_expr);
12755 match target {
12756 DialectType::BigQuery => {
12757 // CAST(x AS DATE FORMAT 'fmt') -> PARSE_DATE(strftime_fmt, x)
12758 // CAST(x AS TIMESTAMP FORMAT 'fmt' AT TIME ZONE 'tz') -> PARSE_TIMESTAMP(strftime_fmt, x, tz)
12759 let func_name = match &to {
12760 DataType::Date => "PARSE_DATE",
12761 DataType::Timestamp { .. } => "PARSE_TIMESTAMP",
12762 DataType::Time { .. } => "PARSE_TIMESTAMP",
12763 _ => "PARSE_TIMESTAMP",
12764 };
12765 let mut func_args = vec![strftime_fmt, this];
12766 if let Some(tz) = timezone {
12767 func_args.push(tz);
12768 }
12769 Ok(Expression::Function(Box::new(Function::new(
12770 func_name.to_string(),
12771 func_args,
12772 ))))
12773 }
12774 DialectType::DuckDB => {
12775 // SAFE_CAST(x AS DATE FORMAT 'fmt') -> CAST(TRY_STRPTIME(x, fmt) AS DATE)
12776 // CAST(x AS DATE FORMAT 'fmt') -> CAST(STRPTIME(x, fmt) AS DATE)
12777 let duck_fmt = Self::bq_format_to_duckdb(&strftime_fmt);
12778 let parse_fn_name = if is_safe { "TRY_STRPTIME" } else { "STRPTIME" };
12779 let parse_call = Expression::Function(Box::new(Function::new(
12780 parse_fn_name.to_string(),
12781 vec![this, duck_fmt],
12782 )));
12783 Ok(Expression::Cast(Box::new(Cast {
12784 this: parse_call,
12785 to,
12786 trailing_comments: vec![],
12787 double_colon_syntax: false,
12788 format: None,
12789 default: None,
12790 inferred_type: None,
12791 })))
12792 }
12793 _ => Ok(e),
12794 }
12795 }
12796
12797 Action::BigQueryFunctionNormalize => {
12798 Self::normalize_bigquery_function(e, source, target)
12799 }
12800
12801 Action::BigQueryToHexBare => {
12802 // Not used anymore - handled directly in normalize_bigquery_function
12803 Ok(e)
12804 }
12805
12806 Action::BigQueryToHexLower => {
12807 if let Expression::Lower(uf) = e {
12808 match uf.this {
12809 // BQ->BQ: LOWER(TO_HEX(x)) -> TO_HEX(x)
12810 Expression::Function(f)
12811 if matches!(target, DialectType::BigQuery)
12812 && f.name == "TO_HEX" =>
12813 {
12814 Ok(Expression::Function(f))
12815 }
12816 // LOWER(LOWER(HEX/TO_HEX(x))) patterns
12817 Expression::Lower(inner_uf) => {
12818 if matches!(target, DialectType::BigQuery) {
12819 // BQ->BQ: extract TO_HEX
12820 if let Expression::Function(f) = inner_uf.this {
12821 Ok(Expression::Function(Box::new(Function::new(
12822 "TO_HEX".to_string(),
12823 f.args,
12824 ))))
12825 } else {
12826 Ok(Expression::Lower(inner_uf))
12827 }
12828 } else {
12829 // Flatten: LOWER(LOWER(x)) -> LOWER(x)
12830 Ok(Expression::Lower(inner_uf))
12831 }
12832 }
12833 other => {
12834 Ok(Expression::Lower(Box::new(crate::expressions::UnaryFunc {
12835 this: other,
12836 original_name: None,
12837 inferred_type: None,
12838 })))
12839 }
12840 }
12841 } else {
12842 Ok(e)
12843 }
12844 }
12845
12846 Action::BigQueryToHexUpper => {
12847 // UPPER(LOWER(HEX(x))) -> HEX(x) (UPPER cancels LOWER, HEX is already uppercase)
12848 // UPPER(LOWER(TO_HEX(x))) -> TO_HEX(x) for Presto/Trino
12849 if let Expression::Upper(uf) = e {
12850 if let Expression::Lower(inner_uf) = uf.this {
12851 // For BQ->BQ: UPPER(TO_HEX(x)) should stay as UPPER(TO_HEX(x))
12852 if matches!(target, DialectType::BigQuery) {
12853 // Restore TO_HEX name in inner function
12854 if let Expression::Function(f) = inner_uf.this {
12855 let restored = Expression::Function(Box::new(Function::new(
12856 "TO_HEX".to_string(),
12857 f.args,
12858 )));
12859 Ok(Expression::Upper(Box::new(
12860 crate::expressions::UnaryFunc::new(restored),
12861 )))
12862 } else {
12863 Ok(Expression::Upper(inner_uf))
12864 }
12865 } else {
12866 // Extract the inner HEX/TO_HEX function (UPPER(LOWER(x)) = x when HEX is uppercase)
12867 Ok(inner_uf.this)
12868 }
12869 } else {
12870 Ok(Expression::Upper(uf))
12871 }
12872 } else {
12873 Ok(e)
12874 }
12875 }
12876
12877 Action::BigQueryAnyValueHaving => {
12878 // ANY_VALUE(x HAVING MAX y) -> ARG_MAX_NULL(x, y)
12879 // ANY_VALUE(x HAVING MIN y) -> ARG_MIN_NULL(x, y)
12880 if let Expression::AnyValue(agg) = e {
12881 if let Some((having_expr, is_max)) = agg.having_max {
12882 let func_name = if is_max {
12883 "ARG_MAX_NULL"
12884 } else {
12885 "ARG_MIN_NULL"
12886 };
12887 Ok(Expression::Function(Box::new(Function::new(
12888 func_name.to_string(),
12889 vec![agg.this, *having_expr],
12890 ))))
12891 } else {
12892 Ok(Expression::AnyValue(agg))
12893 }
12894 } else {
12895 Ok(e)
12896 }
12897 }
12898
12899 Action::BigQueryApproxQuantiles => {
12900 // APPROX_QUANTILES(x, n) -> APPROX_QUANTILE(x, [0, 1/n, 2/n, ..., 1])
12901 // APPROX_QUANTILES(DISTINCT x, n) -> APPROX_QUANTILE(DISTINCT x, [0, 1/n, ..., 1])
12902 if let Expression::AggregateFunction(agg) = e {
12903 if agg.args.len() >= 2 {
12904 let x_expr = agg.args[0].clone();
12905 let n_expr = &agg.args[1];
12906
12907 // Extract the numeric value from n_expr
12908 let n = match n_expr {
12909 Expression::Literal(lit)
12910 if matches!(
12911 lit.as_ref(),
12912 crate::expressions::Literal::Number(_)
12913 ) =>
12914 {
12915 let crate::expressions::Literal::Number(s) = lit.as_ref()
12916 else {
12917 unreachable!()
12918 };
12919 s.parse::<usize>().unwrap_or(2)
12920 }
12921 _ => 2,
12922 };
12923
12924 // Generate quantile array: [0, 1/n, 2/n, ..., 1]
12925 let mut quantiles = Vec::new();
12926 for i in 0..=n {
12927 let q = i as f64 / n as f64;
12928 // Format nicely: 0 -> 0, 0.25 -> 0.25, 1 -> 1
12929 if q == 0.0 {
12930 quantiles.push(Expression::number(0));
12931 } else if q == 1.0 {
12932 quantiles.push(Expression::number(1));
12933 } else {
12934 quantiles.push(Expression::Literal(Box::new(
12935 crate::expressions::Literal::Number(format!("{}", q)),
12936 )));
12937 }
12938 }
12939
12940 let array_expr =
12941 Expression::Array(Box::new(crate::expressions::Array {
12942 expressions: quantiles,
12943 }));
12944
12945 // Preserve DISTINCT modifier
12946 let mut new_func = Function::new(
12947 "APPROX_QUANTILE".to_string(),
12948 vec![x_expr, array_expr],
12949 );
12950 new_func.distinct = agg.distinct;
12951 Ok(Expression::Function(Box::new(new_func)))
12952 } else {
12953 Ok(Expression::AggregateFunction(agg))
12954 }
12955 } else {
12956 Ok(e)
12957 }
12958 }
12959
12960 Action::GenericFunctionNormalize => {
12961 // Helper closure to convert ARBITRARY to target-specific function
12962 fn convert_arbitrary(arg: Expression, target: DialectType) -> Expression {
12963 let name = match target {
12964 DialectType::ClickHouse => "any",
12965 DialectType::TSQL | DialectType::SQLite => "MAX",
12966 DialectType::Hive => "FIRST",
12967 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
12968 "ARBITRARY"
12969 }
12970 _ => "ANY_VALUE",
12971 };
12972 Expression::Function(Box::new(Function::new(name.to_string(), vec![arg])))
12973 }
12974
12975 if let Expression::Function(f) = e {
12976 let name = f.name.to_ascii_uppercase();
12977 match name.as_str() {
12978 "ARBITRARY" if f.args.len() == 1 => {
12979 let arg = f.args.into_iter().next().unwrap();
12980 Ok(convert_arbitrary(arg, target))
12981 }
12982 "TO_NUMBER" if f.args.len() == 1 => {
12983 let arg = f.args.into_iter().next().unwrap();
12984 match target {
12985 DialectType::Oracle | DialectType::Snowflake => {
12986 Ok(Expression::Function(Box::new(Function::new(
12987 "TO_NUMBER".to_string(),
12988 vec![arg],
12989 ))))
12990 }
12991 _ => Ok(Expression::Cast(Box::new(crate::expressions::Cast {
12992 this: arg,
12993 to: crate::expressions::DataType::Double {
12994 precision: None,
12995 scale: None,
12996 },
12997 double_colon_syntax: false,
12998 trailing_comments: Vec::new(),
12999 format: None,
13000 default: None,
13001 inferred_type: None,
13002 }))),
13003 }
13004 }
13005 "AGGREGATE" if f.args.len() >= 3 => match target {
13006 DialectType::DuckDB
13007 | DialectType::Hive
13008 | DialectType::Presto
13009 | DialectType::Trino => Ok(Expression::Function(Box::new(
13010 Function::new("REDUCE".to_string(), f.args),
13011 ))),
13012 _ => Ok(Expression::Function(f)),
13013 },
13014 // REGEXP_MATCHES(x, y) -> RegexpLike for most targets, keep as-is for DuckDB
13015 "REGEXP_MATCHES" if f.args.len() >= 2 => {
13016 if matches!(target, DialectType::DuckDB) {
13017 Ok(Expression::Function(f))
13018 } else {
13019 let mut args = f.args;
13020 let this = args.remove(0);
13021 let pattern = args.remove(0);
13022 let flags = if args.is_empty() {
13023 None
13024 } else {
13025 Some(args.remove(0))
13026 };
13027 Ok(Expression::RegexpLike(Box::new(
13028 crate::expressions::RegexpFunc {
13029 this,
13030 pattern,
13031 flags,
13032 },
13033 )))
13034 }
13035 }
13036 // REGEXP_FULL_MATCH (Hive REGEXP) -> RegexpLike
13037 "REGEXP_FULL_MATCH" if f.args.len() >= 2 => {
13038 if matches!(target, DialectType::DuckDB) {
13039 Ok(Expression::Function(f))
13040 } else {
13041 let mut args = f.args;
13042 let this = args.remove(0);
13043 let pattern = args.remove(0);
13044 let flags = if args.is_empty() {
13045 None
13046 } else {
13047 Some(args.remove(0))
13048 };
13049 Ok(Expression::RegexpLike(Box::new(
13050 crate::expressions::RegexpFunc {
13051 this,
13052 pattern,
13053 flags,
13054 },
13055 )))
13056 }
13057 }
13058 // STRUCT_EXTRACT(x, 'field') -> x.field (StructExtract expression)
13059 "STRUCT_EXTRACT" if f.args.len() == 2 => {
13060 let mut args = f.args;
13061 let this = args.remove(0);
13062 let field_expr = args.remove(0);
13063 // Extract string literal to get field name
13064 let field_name = match &field_expr {
13065 Expression::Literal(lit)
13066 if matches!(
13067 lit.as_ref(),
13068 crate::expressions::Literal::String(_)
13069 ) =>
13070 {
13071 let crate::expressions::Literal::String(s) = lit.as_ref()
13072 else {
13073 unreachable!()
13074 };
13075 s.clone()
13076 }
13077 Expression::Identifier(id) => id.name.clone(),
13078 _ => {
13079 return Ok(Expression::Function(Box::new(Function::new(
13080 "STRUCT_EXTRACT".to_string(),
13081 vec![this, field_expr],
13082 ))))
13083 }
13084 };
13085 Ok(Expression::StructExtract(Box::new(
13086 crate::expressions::StructExtractFunc {
13087 this,
13088 field: crate::expressions::Identifier::new(field_name),
13089 },
13090 )))
13091 }
13092 // LIST_FILTER([4,5,6], x -> x > 4) -> FILTER(ARRAY(4,5,6), x -> x > 4)
13093 "LIST_FILTER" if f.args.len() == 2 => {
13094 let name = match target {
13095 DialectType::DuckDB => "LIST_FILTER",
13096 _ => "FILTER",
13097 };
13098 Ok(Expression::Function(Box::new(Function::new(
13099 name.to_string(),
13100 f.args,
13101 ))))
13102 }
13103 // LIST_TRANSFORM(x, y -> y + 1) -> TRANSFORM(x, y -> y + 1)
13104 "LIST_TRANSFORM" if f.args.len() == 2 => {
13105 let name = match target {
13106 DialectType::DuckDB => "LIST_TRANSFORM",
13107 _ => "TRANSFORM",
13108 };
13109 Ok(Expression::Function(Box::new(Function::new(
13110 name.to_string(),
13111 f.args,
13112 ))))
13113 }
13114 // LIST_SORT(x) -> LIST_SORT(x) for DuckDB, ARRAY_SORT(x) for Presto/Trino, SORT_ARRAY(x) for others
13115 "LIST_SORT" if f.args.len() >= 1 => {
13116 let name = match target {
13117 DialectType::DuckDB => "LIST_SORT",
13118 DialectType::Presto | DialectType::Trino => "ARRAY_SORT",
13119 _ => "SORT_ARRAY",
13120 };
13121 Ok(Expression::Function(Box::new(Function::new(
13122 name.to_string(),
13123 f.args,
13124 ))))
13125 }
13126 // LIST_REVERSE_SORT(x) -> SORT_ARRAY(x, FALSE) for Spark/Hive, ARRAY_SORT(x, lambda) for Presto
13127 "LIST_REVERSE_SORT" if f.args.len() >= 1 => {
13128 match target {
13129 DialectType::DuckDB => Ok(Expression::Function(Box::new(
13130 Function::new("ARRAY_REVERSE_SORT".to_string(), f.args),
13131 ))),
13132 DialectType::Spark
13133 | DialectType::Databricks
13134 | DialectType::Hive => {
13135 let mut args = f.args;
13136 args.push(Expression::Identifier(
13137 crate::expressions::Identifier::new("FALSE"),
13138 ));
13139 Ok(Expression::Function(Box::new(Function::new(
13140 "SORT_ARRAY".to_string(),
13141 args,
13142 ))))
13143 }
13144 DialectType::Presto
13145 | DialectType::Trino
13146 | DialectType::Athena => {
13147 // ARRAY_SORT(x, (a, b) -> CASE WHEN a < b THEN 1 WHEN a > b THEN -1 ELSE 0 END)
13148 let arr = f.args.into_iter().next().unwrap();
13149 let lambda = Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
13150 parameters: vec![
13151 crate::expressions::Identifier::new("a"),
13152 crate::expressions::Identifier::new("b"),
13153 ],
13154 body: Expression::Case(Box::new(Case {
13155 operand: None,
13156 whens: vec![
13157 (
13158 Expression::Lt(Box::new(BinaryOp::new(
13159 Expression::Identifier(crate::expressions::Identifier::new("a")),
13160 Expression::Identifier(crate::expressions::Identifier::new("b")),
13161 ))),
13162 Expression::number(1),
13163 ),
13164 (
13165 Expression::Gt(Box::new(BinaryOp::new(
13166 Expression::Identifier(crate::expressions::Identifier::new("a")),
13167 Expression::Identifier(crate::expressions::Identifier::new("b")),
13168 ))),
13169 Expression::Literal(Box::new(Literal::Number("-1".to_string()))),
13170 ),
13171 ],
13172 else_: Some(Expression::number(0)),
13173 comments: Vec::new(),
13174 inferred_type: None,
13175 })),
13176 colon: false,
13177 parameter_types: Vec::new(),
13178 }));
13179 Ok(Expression::Function(Box::new(Function::new(
13180 "ARRAY_SORT".to_string(),
13181 vec![arr, lambda],
13182 ))))
13183 }
13184 _ => Ok(Expression::Function(Box::new(Function::new(
13185 "LIST_REVERSE_SORT".to_string(),
13186 f.args,
13187 )))),
13188 }
13189 }
13190 // SPLIT_TO_ARRAY(x) with 1 arg -> add default ',' separator and rename
13191 "SPLIT_TO_ARRAY" if f.args.len() == 1 => {
13192 let mut args = f.args;
13193 args.push(Expression::string(","));
13194 let name = match target {
13195 DialectType::DuckDB => "STR_SPLIT",
13196 DialectType::Presto | DialectType::Trino => "SPLIT",
13197 DialectType::Spark
13198 | DialectType::Databricks
13199 | DialectType::Hive => "SPLIT",
13200 DialectType::PostgreSQL => "STRING_TO_ARRAY",
13201 DialectType::Redshift => "SPLIT_TO_ARRAY",
13202 _ => "SPLIT",
13203 };
13204 Ok(Expression::Function(Box::new(Function::new(
13205 name.to_string(),
13206 args,
13207 ))))
13208 }
13209 // SPLIT_TO_ARRAY(x, sep) with 2 args -> rename based on target
13210 "SPLIT_TO_ARRAY" if f.args.len() == 2 => {
13211 let name = match target {
13212 DialectType::DuckDB => "STR_SPLIT",
13213 DialectType::Presto | DialectType::Trino => "SPLIT",
13214 DialectType::Spark
13215 | DialectType::Databricks
13216 | DialectType::Hive => "SPLIT",
13217 DialectType::PostgreSQL => "STRING_TO_ARRAY",
13218 DialectType::Redshift => "SPLIT_TO_ARRAY",
13219 _ => "SPLIT",
13220 };
13221 Ok(Expression::Function(Box::new(Function::new(
13222 name.to_string(),
13223 f.args,
13224 ))))
13225 }
13226 // STRING_TO_ARRAY/STR_SPLIT -> target-specific split function
13227 "STRING_TO_ARRAY" | "STR_SPLIT" if f.args.len() >= 2 => {
13228 let name = match target {
13229 DialectType::DuckDB => "STR_SPLIT",
13230 DialectType::Presto | DialectType::Trino => "SPLIT",
13231 DialectType::Spark
13232 | DialectType::Databricks
13233 | DialectType::Hive => "SPLIT",
13234 DialectType::Doris | DialectType::StarRocks => {
13235 "SPLIT_BY_STRING"
13236 }
13237 DialectType::PostgreSQL | DialectType::Redshift => {
13238 "STRING_TO_ARRAY"
13239 }
13240 _ => "SPLIT",
13241 };
13242 // For Spark/Hive, SPLIT uses regex - need to escape literal with \Q...\E
13243 if matches!(
13244 target,
13245 DialectType::Spark
13246 | DialectType::Databricks
13247 | DialectType::Hive
13248 ) {
13249 let mut args = f.args;
13250 let x = args.remove(0);
13251 let sep = args.remove(0);
13252 // Wrap separator in CONCAT('\\Q', sep, '\\E')
13253 let escaped_sep =
13254 Expression::Function(Box::new(Function::new(
13255 "CONCAT".to_string(),
13256 vec![
13257 Expression::string("\\Q"),
13258 sep,
13259 Expression::string("\\E"),
13260 ],
13261 )));
13262 Ok(Expression::Function(Box::new(Function::new(
13263 name.to_string(),
13264 vec![x, escaped_sep],
13265 ))))
13266 } else {
13267 Ok(Expression::Function(Box::new(Function::new(
13268 name.to_string(),
13269 f.args,
13270 ))))
13271 }
13272 }
13273 // STR_SPLIT_REGEX(x, 'a') / REGEXP_SPLIT(x, 'a') -> target-specific regex split
13274 "STR_SPLIT_REGEX" | "REGEXP_SPLIT" if f.args.len() == 2 => {
13275 let name = match target {
13276 DialectType::DuckDB => "STR_SPLIT_REGEX",
13277 DialectType::Presto | DialectType::Trino => "REGEXP_SPLIT",
13278 DialectType::Spark
13279 | DialectType::Databricks
13280 | DialectType::Hive => "SPLIT",
13281 _ => "REGEXP_SPLIT",
13282 };
13283 Ok(Expression::Function(Box::new(Function::new(
13284 name.to_string(),
13285 f.args,
13286 ))))
13287 }
13288 // SPLIT(str, delim) from Snowflake -> DuckDB with CASE wrapper
13289 "SPLIT"
13290 if f.args.len() == 2
13291 && matches!(source, DialectType::Snowflake)
13292 && matches!(target, DialectType::DuckDB) =>
13293 {
13294 let mut args = f.args;
13295 let str_arg = args.remove(0);
13296 let delim_arg = args.remove(0);
13297
13298 // STR_SPLIT(str, delim) as the base
13299 let base_func = Expression::Function(Box::new(Function::new(
13300 "STR_SPLIT".to_string(),
13301 vec![str_arg.clone(), delim_arg.clone()],
13302 )));
13303
13304 // [str] - array with single element
13305 let array_with_input =
13306 Expression::Array(Box::new(crate::expressions::Array {
13307 expressions: vec![str_arg],
13308 }));
13309
13310 // CASE
13311 // WHEN delim IS NULL THEN NULL
13312 // WHEN delim = '' THEN [str]
13313 // ELSE STR_SPLIT(str, delim)
13314 // END
13315 Ok(Expression::Case(Box::new(Case {
13316 operand: None,
13317 whens: vec![
13318 (
13319 Expression::Is(Box::new(BinaryOp {
13320 left: delim_arg.clone(),
13321 right: Expression::Null(Null),
13322 left_comments: vec![],
13323 operator_comments: vec![],
13324 trailing_comments: vec![],
13325 inferred_type: None,
13326 })),
13327 Expression::Null(Null),
13328 ),
13329 (
13330 Expression::Eq(Box::new(BinaryOp {
13331 left: delim_arg,
13332 right: Expression::string(""),
13333 left_comments: vec![],
13334 operator_comments: vec![],
13335 trailing_comments: vec![],
13336 inferred_type: None,
13337 })),
13338 array_with_input,
13339 ),
13340 ],
13341 else_: Some(base_func),
13342 comments: vec![],
13343 inferred_type: None,
13344 })))
13345 }
13346 // SPLIT(x, sep) from Presto/StarRocks/Doris -> target-specific split with regex escaping for Hive/Spark
13347 "SPLIT"
13348 if f.args.len() == 2
13349 && matches!(
13350 source,
13351 DialectType::Presto
13352 | DialectType::Trino
13353 | DialectType::Athena
13354 | DialectType::StarRocks
13355 | DialectType::Doris
13356 )
13357 && matches!(
13358 target,
13359 DialectType::Spark
13360 | DialectType::Databricks
13361 | DialectType::Hive
13362 ) =>
13363 {
13364 // Presto/StarRocks SPLIT is literal, Hive/Spark SPLIT is regex
13365 let mut args = f.args;
13366 let x = args.remove(0);
13367 let sep = args.remove(0);
13368 let escaped_sep = Expression::Function(Box::new(Function::new(
13369 "CONCAT".to_string(),
13370 vec![Expression::string("\\Q"), sep, Expression::string("\\E")],
13371 )));
13372 Ok(Expression::Function(Box::new(Function::new(
13373 "SPLIT".to_string(),
13374 vec![x, escaped_sep],
13375 ))))
13376 }
13377 // SUBSTRINGINDEX -> SUBSTRING_INDEX (ClickHouse camelCase to standard)
13378 // For ClickHouse target, preserve original name to maintain camelCase
13379 "SUBSTRINGINDEX" => {
13380 let name = if matches!(target, DialectType::ClickHouse) {
13381 f.name.clone()
13382 } else {
13383 "SUBSTRING_INDEX".to_string()
13384 };
13385 Ok(Expression::Function(Box::new(Function::new(name, f.args))))
13386 }
13387 // ARRAY_LENGTH/SIZE/CARDINALITY -> target-specific array length function
13388 "ARRAY_LENGTH" | "SIZE" | "CARDINALITY" => {
13389 // DuckDB source CARDINALITY -> DuckDB target: keep as CARDINALITY (used for maps)
13390 if name == "CARDINALITY"
13391 && matches!(source, DialectType::DuckDB)
13392 && matches!(target, DialectType::DuckDB)
13393 {
13394 return Ok(Expression::Function(f));
13395 }
13396 // Get the array argument (first arg, drop dimension args)
13397 let mut args = f.args;
13398 let arr = if args.is_empty() {
13399 return Ok(Expression::Function(Box::new(Function::new(
13400 name.to_string(),
13401 args,
13402 ))));
13403 } else {
13404 args.remove(0)
13405 };
13406 let name =
13407 match target {
13408 DialectType::Spark
13409 | DialectType::Databricks
13410 | DialectType::Hive => "SIZE",
13411 DialectType::Presto | DialectType::Trino => "CARDINALITY",
13412 DialectType::BigQuery => "ARRAY_LENGTH",
13413 DialectType::DuckDB => {
13414 // DuckDB: use ARRAY_LENGTH with all args
13415 let mut all_args = vec![arr];
13416 all_args.extend(args);
13417 return Ok(Expression::Function(Box::new(
13418 Function::new("ARRAY_LENGTH".to_string(), all_args),
13419 )));
13420 }
13421 DialectType::PostgreSQL | DialectType::Redshift => {
13422 // Keep ARRAY_LENGTH with dimension arg
13423 let mut all_args = vec![arr];
13424 all_args.extend(args);
13425 return Ok(Expression::Function(Box::new(
13426 Function::new("ARRAY_LENGTH".to_string(), all_args),
13427 )));
13428 }
13429 DialectType::ClickHouse => "LENGTH",
13430 _ => "ARRAY_LENGTH",
13431 };
13432 Ok(Expression::Function(Box::new(Function::new(
13433 name.to_string(),
13434 vec![arr],
13435 ))))
13436 }
13437 // TO_VARIANT(x) -> CAST(x AS VARIANT) for DuckDB
13438 "TO_VARIANT" if f.args.len() == 1 => match target {
13439 DialectType::DuckDB => {
13440 let arg = f.args.into_iter().next().unwrap();
13441 Ok(Expression::Cast(Box::new(Cast {
13442 this: arg,
13443 to: DataType::Custom {
13444 name: "VARIANT".to_string(),
13445 },
13446 double_colon_syntax: false,
13447 trailing_comments: Vec::new(),
13448 format: None,
13449 default: None,
13450 inferred_type: None,
13451 })))
13452 }
13453 _ => Ok(Expression::Function(f)),
13454 },
13455 // JSON_GROUP_ARRAY(x) -> JSON_AGG(x) for PostgreSQL
13456 "JSON_GROUP_ARRAY" if f.args.len() == 1 => match target {
13457 DialectType::PostgreSQL => Ok(Expression::Function(Box::new(
13458 Function::new("JSON_AGG".to_string(), f.args),
13459 ))),
13460 _ => Ok(Expression::Function(f)),
13461 },
13462 // JSON_GROUP_OBJECT(key, value) -> JSON_OBJECT_AGG(key, value) for PostgreSQL
13463 "JSON_GROUP_OBJECT" if f.args.len() == 2 => match target {
13464 DialectType::PostgreSQL => Ok(Expression::Function(Box::new(
13465 Function::new("JSON_OBJECT_AGG".to_string(), f.args),
13466 ))),
13467 _ => Ok(Expression::Function(f)),
13468 },
13469 // UNICODE(x) -> target-specific codepoint function
13470 "UNICODE" if f.args.len() == 1 => {
13471 match target {
13472 DialectType::SQLite | DialectType::DuckDB => {
13473 Ok(Expression::Function(Box::new(Function::new(
13474 "UNICODE".to_string(),
13475 f.args,
13476 ))))
13477 }
13478 DialectType::Oracle => {
13479 // ASCII(UNISTR(x))
13480 let inner = Expression::Function(Box::new(Function::new(
13481 "UNISTR".to_string(),
13482 f.args,
13483 )));
13484 Ok(Expression::Function(Box::new(Function::new(
13485 "ASCII".to_string(),
13486 vec![inner],
13487 ))))
13488 }
13489 DialectType::MySQL => {
13490 // ORD(CONVERT(x USING utf32))
13491 let arg = f.args.into_iter().next().unwrap();
13492 let convert_expr = Expression::ConvertToCharset(Box::new(
13493 crate::expressions::ConvertToCharset {
13494 this: Box::new(arg),
13495 dest: Some(Box::new(Expression::Identifier(
13496 crate::expressions::Identifier::new("utf32"),
13497 ))),
13498 source: None,
13499 },
13500 ));
13501 Ok(Expression::Function(Box::new(Function::new(
13502 "ORD".to_string(),
13503 vec![convert_expr],
13504 ))))
13505 }
13506 _ => Ok(Expression::Function(Box::new(Function::new(
13507 "ASCII".to_string(),
13508 f.args,
13509 )))),
13510 }
13511 }
13512 // XOR(a, b, ...) -> a XOR b XOR ... for MySQL, BITWISE_XOR for Presto/Trino, # for PostgreSQL, ^ for BigQuery
13513 "XOR" if f.args.len() >= 2 => {
13514 match target {
13515 DialectType::ClickHouse => {
13516 // ClickHouse: keep as xor() function with lowercase name
13517 Ok(Expression::Function(Box::new(Function::new(
13518 "xor".to_string(),
13519 f.args,
13520 ))))
13521 }
13522 DialectType::Presto | DialectType::Trino => {
13523 if f.args.len() == 2 {
13524 Ok(Expression::Function(Box::new(Function::new(
13525 "BITWISE_XOR".to_string(),
13526 f.args,
13527 ))))
13528 } else {
13529 // Nest: BITWISE_XOR(BITWISE_XOR(a, b), c)
13530 let mut args = f.args;
13531 let first = args.remove(0);
13532 let second = args.remove(0);
13533 let mut result =
13534 Expression::Function(Box::new(Function::new(
13535 "BITWISE_XOR".to_string(),
13536 vec![first, second],
13537 )));
13538 for arg in args {
13539 result =
13540 Expression::Function(Box::new(Function::new(
13541 "BITWISE_XOR".to_string(),
13542 vec![result, arg],
13543 )));
13544 }
13545 Ok(result)
13546 }
13547 }
13548 DialectType::MySQL
13549 | DialectType::SingleStore
13550 | DialectType::Doris
13551 | DialectType::StarRocks => {
13552 // Convert XOR(a, b, c) -> Expression::Xor with expressions list
13553 let args = f.args;
13554 Ok(Expression::Xor(Box::new(crate::expressions::Xor {
13555 this: None,
13556 expression: None,
13557 expressions: args,
13558 })))
13559 }
13560 DialectType::PostgreSQL | DialectType::Redshift => {
13561 // PostgreSQL: a # b (hash operator for XOR)
13562 let mut args = f.args;
13563 let first = args.remove(0);
13564 let second = args.remove(0);
13565 let mut result = Expression::BitwiseXor(Box::new(
13566 BinaryOp::new(first, second),
13567 ));
13568 for arg in args {
13569 result = Expression::BitwiseXor(Box::new(
13570 BinaryOp::new(result, arg),
13571 ));
13572 }
13573 Ok(result)
13574 }
13575 DialectType::DuckDB => {
13576 // DuckDB: keep as XOR function (DuckDB ^ is Power, not XOR)
13577 Ok(Expression::Function(Box::new(Function::new(
13578 "XOR".to_string(),
13579 f.args,
13580 ))))
13581 }
13582 DialectType::BigQuery => {
13583 // BigQuery: a ^ b (caret operator for XOR)
13584 let mut args = f.args;
13585 let first = args.remove(0);
13586 let second = args.remove(0);
13587 let mut result = Expression::BitwiseXor(Box::new(
13588 BinaryOp::new(first, second),
13589 ));
13590 for arg in args {
13591 result = Expression::BitwiseXor(Box::new(
13592 BinaryOp::new(result, arg),
13593 ));
13594 }
13595 Ok(result)
13596 }
13597 _ => Ok(Expression::Function(Box::new(Function::new(
13598 "XOR".to_string(),
13599 f.args,
13600 )))),
13601 }
13602 }
13603 // ARRAY_REVERSE_SORT(x) -> SORT_ARRAY(x, FALSE) for Spark/Hive, ARRAY_SORT(x, lambda) for Presto
13604 "ARRAY_REVERSE_SORT" if f.args.len() >= 1 => {
13605 match target {
13606 DialectType::Spark
13607 | DialectType::Databricks
13608 | DialectType::Hive => {
13609 let mut args = f.args;
13610 args.push(Expression::Identifier(
13611 crate::expressions::Identifier::new("FALSE"),
13612 ));
13613 Ok(Expression::Function(Box::new(Function::new(
13614 "SORT_ARRAY".to_string(),
13615 args,
13616 ))))
13617 }
13618 DialectType::Presto
13619 | DialectType::Trino
13620 | DialectType::Athena => {
13621 // ARRAY_SORT(x, (a, b) -> CASE WHEN a < b THEN 1 WHEN a > b THEN -1 ELSE 0 END)
13622 let arr = f.args.into_iter().next().unwrap();
13623 let lambda = Expression::Lambda(Box::new(
13624 crate::expressions::LambdaExpr {
13625 parameters: vec![
13626 Identifier::new("a"),
13627 Identifier::new("b"),
13628 ],
13629 colon: false,
13630 parameter_types: Vec::new(),
13631 body: Expression::Case(Box::new(Case {
13632 operand: None,
13633 whens: vec![
13634 (
13635 Expression::Lt(Box::new(
13636 BinaryOp::new(
13637 Expression::Identifier(
13638 Identifier::new("a"),
13639 ),
13640 Expression::Identifier(
13641 Identifier::new("b"),
13642 ),
13643 ),
13644 )),
13645 Expression::number(1),
13646 ),
13647 (
13648 Expression::Gt(Box::new(
13649 BinaryOp::new(
13650 Expression::Identifier(
13651 Identifier::new("a"),
13652 ),
13653 Expression::Identifier(
13654 Identifier::new("b"),
13655 ),
13656 ),
13657 )),
13658 Expression::Neg(Box::new(
13659 crate::expressions::UnaryOp {
13660 this: Expression::number(1),
13661 inferred_type: None,
13662 },
13663 )),
13664 ),
13665 ],
13666 else_: Some(Expression::number(0)),
13667 comments: Vec::new(),
13668 inferred_type: None,
13669 })),
13670 },
13671 ));
13672 Ok(Expression::Function(Box::new(Function::new(
13673 "ARRAY_SORT".to_string(),
13674 vec![arr, lambda],
13675 ))))
13676 }
13677 _ => Ok(Expression::Function(Box::new(Function::new(
13678 "ARRAY_REVERSE_SORT".to_string(),
13679 f.args,
13680 )))),
13681 }
13682 }
13683 // ENCODE(x) -> ENCODE(x, 'utf-8') for Spark/Hive, TO_UTF8(x) for Presto
13684 "ENCODE" if f.args.len() == 1 => match target {
13685 DialectType::Spark
13686 | DialectType::Databricks
13687 | DialectType::Hive => {
13688 let mut args = f.args;
13689 args.push(Expression::string("utf-8"));
13690 Ok(Expression::Function(Box::new(Function::new(
13691 "ENCODE".to_string(),
13692 args,
13693 ))))
13694 }
13695 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
13696 Ok(Expression::Function(Box::new(Function::new(
13697 "TO_UTF8".to_string(),
13698 f.args,
13699 ))))
13700 }
13701 _ => Ok(Expression::Function(Box::new(Function::new(
13702 "ENCODE".to_string(),
13703 f.args,
13704 )))),
13705 },
13706 // DECODE(x) -> DECODE(x, 'utf-8') for Spark/Hive, FROM_UTF8(x) for Presto
13707 "DECODE" if f.args.len() == 1 => match target {
13708 DialectType::Spark
13709 | DialectType::Databricks
13710 | DialectType::Hive => {
13711 let mut args = f.args;
13712 args.push(Expression::string("utf-8"));
13713 Ok(Expression::Function(Box::new(Function::new(
13714 "DECODE".to_string(),
13715 args,
13716 ))))
13717 }
13718 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
13719 Ok(Expression::Function(Box::new(Function::new(
13720 "FROM_UTF8".to_string(),
13721 f.args,
13722 ))))
13723 }
13724 _ => Ok(Expression::Function(Box::new(Function::new(
13725 "DECODE".to_string(),
13726 f.args,
13727 )))),
13728 },
13729 // QUANTILE(x, p) -> PERCENTILE(x, p) for Spark/Hive
13730 "QUANTILE" if f.args.len() == 2 => {
13731 let name = match target {
13732 DialectType::Spark
13733 | DialectType::Databricks
13734 | DialectType::Hive => "PERCENTILE",
13735 DialectType::Presto | DialectType::Trino => "APPROX_PERCENTILE",
13736 DialectType::BigQuery => "PERCENTILE_CONT",
13737 _ => "QUANTILE",
13738 };
13739 Ok(Expression::Function(Box::new(Function::new(
13740 name.to_string(),
13741 f.args,
13742 ))))
13743 }
13744 // QUANTILE_CONT(x, q) -> PERCENTILE_CONT(q) WITHIN GROUP (ORDER BY x) for PostgreSQL/Snowflake
13745 "QUANTILE_CONT" if f.args.len() == 2 => {
13746 let mut args = f.args;
13747 let column = args.remove(0);
13748 let quantile = args.remove(0);
13749 match target {
13750 DialectType::DuckDB => {
13751 Ok(Expression::Function(Box::new(Function::new(
13752 "QUANTILE_CONT".to_string(),
13753 vec![column, quantile],
13754 ))))
13755 }
13756 DialectType::PostgreSQL
13757 | DialectType::Redshift
13758 | DialectType::Snowflake => {
13759 // PERCENTILE_CONT(q) WITHIN GROUP (ORDER BY x)
13760 let inner = Expression::PercentileCont(Box::new(
13761 crate::expressions::PercentileFunc {
13762 this: column.clone(),
13763 percentile: quantile,
13764 order_by: None,
13765 filter: None,
13766 },
13767 ));
13768 Ok(Expression::WithinGroup(Box::new(
13769 crate::expressions::WithinGroup {
13770 this: inner,
13771 order_by: vec![crate::expressions::Ordered {
13772 this: column,
13773 desc: false,
13774 nulls_first: None,
13775 explicit_asc: false,
13776 with_fill: None,
13777 }],
13778 },
13779 )))
13780 }
13781 _ => Ok(Expression::Function(Box::new(Function::new(
13782 "QUANTILE_CONT".to_string(),
13783 vec![column, quantile],
13784 )))),
13785 }
13786 }
13787 // QUANTILE_DISC(x, q) -> PERCENTILE_DISC(q) WITHIN GROUP (ORDER BY x) for PostgreSQL/Snowflake
13788 "QUANTILE_DISC" if f.args.len() == 2 => {
13789 let mut args = f.args;
13790 let column = args.remove(0);
13791 let quantile = args.remove(0);
13792 match target {
13793 DialectType::DuckDB => {
13794 Ok(Expression::Function(Box::new(Function::new(
13795 "QUANTILE_DISC".to_string(),
13796 vec![column, quantile],
13797 ))))
13798 }
13799 DialectType::PostgreSQL
13800 | DialectType::Redshift
13801 | DialectType::Snowflake => {
13802 // PERCENTILE_DISC(q) WITHIN GROUP (ORDER BY x)
13803 let inner = Expression::PercentileDisc(Box::new(
13804 crate::expressions::PercentileFunc {
13805 this: column.clone(),
13806 percentile: quantile,
13807 order_by: None,
13808 filter: None,
13809 },
13810 ));
13811 Ok(Expression::WithinGroup(Box::new(
13812 crate::expressions::WithinGroup {
13813 this: inner,
13814 order_by: vec![crate::expressions::Ordered {
13815 this: column,
13816 desc: false,
13817 nulls_first: None,
13818 explicit_asc: false,
13819 with_fill: None,
13820 }],
13821 },
13822 )))
13823 }
13824 _ => Ok(Expression::Function(Box::new(Function::new(
13825 "QUANTILE_DISC".to_string(),
13826 vec![column, quantile],
13827 )))),
13828 }
13829 }
13830 // PERCENTILE_APPROX(x, p) / APPROX_PERCENTILE(x, p) -> target-specific
13831 "PERCENTILE_APPROX" | "APPROX_PERCENTILE" if f.args.len() >= 2 => {
13832 let name = match target {
13833 DialectType::Presto
13834 | DialectType::Trino
13835 | DialectType::Athena => "APPROX_PERCENTILE",
13836 DialectType::Spark
13837 | DialectType::Databricks
13838 | DialectType::Hive => "PERCENTILE_APPROX",
13839 DialectType::DuckDB => "APPROX_QUANTILE",
13840 DialectType::PostgreSQL | DialectType::Redshift => {
13841 "PERCENTILE_CONT"
13842 }
13843 _ => &f.name,
13844 };
13845 Ok(Expression::Function(Box::new(Function::new(
13846 name.to_string(),
13847 f.args,
13848 ))))
13849 }
13850 // EPOCH(x) -> UNIX_TIMESTAMP(x) for Spark/Hive
13851 "EPOCH" if f.args.len() == 1 => {
13852 let name = match target {
13853 DialectType::Spark
13854 | DialectType::Databricks
13855 | DialectType::Hive => "UNIX_TIMESTAMP",
13856 DialectType::Presto | DialectType::Trino => "TO_UNIXTIME",
13857 _ => "EPOCH",
13858 };
13859 Ok(Expression::Function(Box::new(Function::new(
13860 name.to_string(),
13861 f.args,
13862 ))))
13863 }
13864 // EPOCH_MS(x) -> target-specific epoch milliseconds conversion
13865 "EPOCH_MS" if f.args.len() == 1 => {
13866 match target {
13867 DialectType::Spark | DialectType::Databricks => {
13868 Ok(Expression::Function(Box::new(Function::new(
13869 "TIMESTAMP_MILLIS".to_string(),
13870 f.args,
13871 ))))
13872 }
13873 DialectType::Hive => {
13874 // Hive: FROM_UNIXTIME(x / 1000)
13875 let arg = f.args.into_iter().next().unwrap();
13876 let div_expr = Expression::Div(Box::new(
13877 crate::expressions::BinaryOp::new(
13878 arg,
13879 Expression::number(1000),
13880 ),
13881 ));
13882 Ok(Expression::Function(Box::new(Function::new(
13883 "FROM_UNIXTIME".to_string(),
13884 vec![div_expr],
13885 ))))
13886 }
13887 DialectType::Presto | DialectType::Trino => {
13888 Ok(Expression::Function(Box::new(Function::new(
13889 "FROM_UNIXTIME".to_string(),
13890 vec![Expression::Div(Box::new(
13891 crate::expressions::BinaryOp::new(
13892 f.args.into_iter().next().unwrap(),
13893 Expression::number(1000),
13894 ),
13895 ))],
13896 ))))
13897 }
13898 _ => Ok(Expression::Function(Box::new(Function::new(
13899 "EPOCH_MS".to_string(),
13900 f.args,
13901 )))),
13902 }
13903 }
13904 // HASHBYTES('algorithm', x) -> target-specific hash function
13905 "HASHBYTES" if f.args.len() == 2 => {
13906 // Keep HASHBYTES as-is for TSQL target
13907 if matches!(target, DialectType::TSQL) {
13908 return Ok(Expression::Function(f));
13909 }
13910 let algo_expr = &f.args[0];
13911 let algo = match algo_expr {
13912 Expression::Literal(lit)
13913 if matches!(
13914 lit.as_ref(),
13915 crate::expressions::Literal::String(_)
13916 ) =>
13917 {
13918 let crate::expressions::Literal::String(s) = lit.as_ref()
13919 else {
13920 unreachable!()
13921 };
13922 s.to_ascii_uppercase()
13923 }
13924 _ => return Ok(Expression::Function(f)),
13925 };
13926 let data_arg = f.args.into_iter().nth(1).unwrap();
13927 match algo.as_str() {
13928 "SHA1" => {
13929 let name = match target {
13930 DialectType::Spark | DialectType::Databricks => "SHA",
13931 DialectType::Hive => "SHA1",
13932 _ => "SHA1",
13933 };
13934 Ok(Expression::Function(Box::new(Function::new(
13935 name.to_string(),
13936 vec![data_arg],
13937 ))))
13938 }
13939 "SHA2_256" => {
13940 Ok(Expression::Function(Box::new(Function::new(
13941 "SHA2".to_string(),
13942 vec![data_arg, Expression::number(256)],
13943 ))))
13944 }
13945 "SHA2_512" => {
13946 Ok(Expression::Function(Box::new(Function::new(
13947 "SHA2".to_string(),
13948 vec![data_arg, Expression::number(512)],
13949 ))))
13950 }
13951 "MD5" => Ok(Expression::Function(Box::new(Function::new(
13952 "MD5".to_string(),
13953 vec![data_arg],
13954 )))),
13955 _ => Ok(Expression::Function(Box::new(Function::new(
13956 "HASHBYTES".to_string(),
13957 vec![Expression::string(&algo), data_arg],
13958 )))),
13959 }
13960 }
13961 // JSON_EXTRACT_PATH(json, key1, key2, ...) -> target-specific JSON extraction
13962 "JSON_EXTRACT_PATH" | "JSON_EXTRACT_PATH_TEXT" if f.args.len() >= 2 => {
13963 let is_text = name == "JSON_EXTRACT_PATH_TEXT";
13964 let mut args = f.args;
13965 let json_expr = args.remove(0);
13966 // Build JSON path from remaining keys: $.key1.key2 or $.key1[0]
13967 let mut json_path = "$".to_string();
13968 for a in &args {
13969 match a {
13970 Expression::Literal(lit)
13971 if matches!(
13972 lit.as_ref(),
13973 crate::expressions::Literal::String(_)
13974 ) =>
13975 {
13976 let crate::expressions::Literal::String(s) =
13977 lit.as_ref()
13978 else {
13979 unreachable!()
13980 };
13981 // Numeric string keys become array indices: [0]
13982 if s.chars().all(|c| c.is_ascii_digit()) {
13983 json_path.push('[');
13984 json_path.push_str(s);
13985 json_path.push(']');
13986 } else {
13987 json_path.push('.');
13988 json_path.push_str(s);
13989 }
13990 }
13991 _ => {
13992 json_path.push_str(".?");
13993 }
13994 }
13995 }
13996 match target {
13997 DialectType::Spark
13998 | DialectType::Databricks
13999 | DialectType::Hive => {
14000 Ok(Expression::Function(Box::new(Function::new(
14001 "GET_JSON_OBJECT".to_string(),
14002 vec![json_expr, Expression::string(&json_path)],
14003 ))))
14004 }
14005 DialectType::Presto | DialectType::Trino => {
14006 let func_name = if is_text {
14007 "JSON_EXTRACT_SCALAR"
14008 } else {
14009 "JSON_EXTRACT"
14010 };
14011 Ok(Expression::Function(Box::new(Function::new(
14012 func_name.to_string(),
14013 vec![json_expr, Expression::string(&json_path)],
14014 ))))
14015 }
14016 DialectType::BigQuery | DialectType::MySQL => {
14017 let func_name = if is_text {
14018 "JSON_EXTRACT_SCALAR"
14019 } else {
14020 "JSON_EXTRACT"
14021 };
14022 Ok(Expression::Function(Box::new(Function::new(
14023 func_name.to_string(),
14024 vec![json_expr, Expression::string(&json_path)],
14025 ))))
14026 }
14027 DialectType::PostgreSQL | DialectType::Materialize => {
14028 // Keep as JSON_EXTRACT_PATH_TEXT / JSON_EXTRACT_PATH for PostgreSQL/Materialize
14029 let func_name = if is_text {
14030 "JSON_EXTRACT_PATH_TEXT"
14031 } else {
14032 "JSON_EXTRACT_PATH"
14033 };
14034 let mut new_args = vec![json_expr];
14035 new_args.extend(args);
14036 Ok(Expression::Function(Box::new(Function::new(
14037 func_name.to_string(),
14038 new_args,
14039 ))))
14040 }
14041 DialectType::DuckDB | DialectType::SQLite => {
14042 // Use -> for JSON_EXTRACT_PATH, ->> for JSON_EXTRACT_PATH_TEXT
14043 if is_text {
14044 Ok(Expression::JsonExtractScalar(Box::new(
14045 crate::expressions::JsonExtractFunc {
14046 this: json_expr,
14047 path: Expression::string(&json_path),
14048 returning: None,
14049 arrow_syntax: true,
14050 hash_arrow_syntax: false,
14051 wrapper_option: None,
14052 quotes_option: None,
14053 on_scalar_string: false,
14054 on_error: None,
14055 },
14056 )))
14057 } else {
14058 Ok(Expression::JsonExtract(Box::new(
14059 crate::expressions::JsonExtractFunc {
14060 this: json_expr,
14061 path: Expression::string(&json_path),
14062 returning: None,
14063 arrow_syntax: true,
14064 hash_arrow_syntax: false,
14065 wrapper_option: None,
14066 quotes_option: None,
14067 on_scalar_string: false,
14068 on_error: None,
14069 },
14070 )))
14071 }
14072 }
14073 DialectType::Redshift => {
14074 // Keep as JSON_EXTRACT_PATH_TEXT for Redshift
14075 let mut new_args = vec![json_expr];
14076 new_args.extend(args);
14077 Ok(Expression::Function(Box::new(Function::new(
14078 "JSON_EXTRACT_PATH_TEXT".to_string(),
14079 new_args,
14080 ))))
14081 }
14082 DialectType::TSQL => {
14083 // ISNULL(JSON_QUERY(json, '$.path'), JSON_VALUE(json, '$.path'))
14084 let jq = Expression::Function(Box::new(Function::new(
14085 "JSON_QUERY".to_string(),
14086 vec![json_expr.clone(), Expression::string(&json_path)],
14087 )));
14088 let jv = Expression::Function(Box::new(Function::new(
14089 "JSON_VALUE".to_string(),
14090 vec![json_expr, Expression::string(&json_path)],
14091 )));
14092 Ok(Expression::Function(Box::new(Function::new(
14093 "ISNULL".to_string(),
14094 vec![jq, jv],
14095 ))))
14096 }
14097 DialectType::ClickHouse => {
14098 let func_name = if is_text {
14099 "JSONExtractString"
14100 } else {
14101 "JSONExtractRaw"
14102 };
14103 let mut new_args = vec![json_expr];
14104 new_args.extend(args);
14105 Ok(Expression::Function(Box::new(Function::new(
14106 func_name.to_string(),
14107 new_args,
14108 ))))
14109 }
14110 _ => {
14111 let func_name = if is_text {
14112 "JSON_EXTRACT_SCALAR"
14113 } else {
14114 "JSON_EXTRACT"
14115 };
14116 Ok(Expression::Function(Box::new(Function::new(
14117 func_name.to_string(),
14118 vec![json_expr, Expression::string(&json_path)],
14119 ))))
14120 }
14121 }
14122 }
14123 // APPROX_DISTINCT(x) -> APPROX_COUNT_DISTINCT(x) for Spark/Hive/BigQuery
14124 "APPROX_DISTINCT" if f.args.len() >= 1 => {
14125 let name = match target {
14126 DialectType::Spark
14127 | DialectType::Databricks
14128 | DialectType::Hive
14129 | DialectType::BigQuery => "APPROX_COUNT_DISTINCT",
14130 _ => "APPROX_DISTINCT",
14131 };
14132 let mut args = f.args;
14133 // Hive doesn't support the accuracy parameter
14134 if name == "APPROX_COUNT_DISTINCT"
14135 && matches!(target, DialectType::Hive)
14136 {
14137 args.truncate(1);
14138 }
14139 Ok(Expression::Function(Box::new(Function::new(
14140 name.to_string(),
14141 args,
14142 ))))
14143 }
14144 // REGEXP_EXTRACT(x, pattern) - normalize default group index
14145 "REGEXP_EXTRACT" if f.args.len() == 2 => {
14146 // Determine source default group index
14147 let source_default = match source {
14148 DialectType::Presto
14149 | DialectType::Trino
14150 | DialectType::DuckDB => 0,
14151 _ => 1, // Hive/Spark/Databricks default = 1
14152 };
14153 // Determine target default group index
14154 let target_default = match target {
14155 DialectType::Presto
14156 | DialectType::Trino
14157 | DialectType::DuckDB
14158 | DialectType::BigQuery => 0,
14159 DialectType::Snowflake => {
14160 // Snowflake uses REGEXP_SUBSTR
14161 return Ok(Expression::Function(Box::new(Function::new(
14162 "REGEXP_SUBSTR".to_string(),
14163 f.args,
14164 ))));
14165 }
14166 _ => 1, // Hive/Spark/Databricks default = 1
14167 };
14168 if source_default != target_default {
14169 let mut args = f.args;
14170 args.push(Expression::number(source_default));
14171 Ok(Expression::Function(Box::new(Function::new(
14172 "REGEXP_EXTRACT".to_string(),
14173 args,
14174 ))))
14175 } else {
14176 Ok(Expression::Function(Box::new(Function::new(
14177 "REGEXP_EXTRACT".to_string(),
14178 f.args,
14179 ))))
14180 }
14181 }
14182 // RLIKE(str, pattern) -> RegexpLike expression (generates as target-specific form)
14183 "RLIKE" if f.args.len() == 2 => {
14184 let mut args = f.args;
14185 let str_expr = args.remove(0);
14186 let pattern = args.remove(0);
14187 match target {
14188 DialectType::DuckDB => {
14189 // REGEXP_MATCHES(str, pattern)
14190 Ok(Expression::Function(Box::new(Function::new(
14191 "REGEXP_MATCHES".to_string(),
14192 vec![str_expr, pattern],
14193 ))))
14194 }
14195 _ => {
14196 // Convert to RegexpLike which generates as RLIKE/~/REGEXP_LIKE per dialect
14197 Ok(Expression::RegexpLike(Box::new(
14198 crate::expressions::RegexpFunc {
14199 this: str_expr,
14200 pattern,
14201 flags: None,
14202 },
14203 )))
14204 }
14205 }
14206 }
14207 // EOMONTH(date[, month_offset]) -> target-specific
14208 "EOMONTH" if f.args.len() >= 1 => {
14209 let mut args = f.args;
14210 let date_arg = args.remove(0);
14211 let month_offset = if !args.is_empty() {
14212 Some(args.remove(0))
14213 } else {
14214 None
14215 };
14216
14217 // Helper: wrap date in CAST to DATE
14218 let cast_to_date = |e: Expression| -> Expression {
14219 Expression::Cast(Box::new(Cast {
14220 this: e,
14221 to: DataType::Date,
14222 trailing_comments: vec![],
14223 double_colon_syntax: false,
14224 format: None,
14225 default: None,
14226 inferred_type: None,
14227 }))
14228 };
14229
14230 match target {
14231 DialectType::TSQL | DialectType::Fabric => {
14232 // TSQL: EOMONTH(CAST(date AS DATE)) or EOMONTH(DATEADD(MONTH, offset, CAST(date AS DATE)))
14233 let date = cast_to_date(date_arg);
14234 let date = if let Some(offset) = month_offset {
14235 Expression::Function(Box::new(Function::new(
14236 "DATEADD".to_string(),
14237 vec![
14238 Expression::Identifier(Identifier::new(
14239 "MONTH",
14240 )),
14241 offset,
14242 date,
14243 ],
14244 )))
14245 } else {
14246 date
14247 };
14248 Ok(Expression::Function(Box::new(Function::new(
14249 "EOMONTH".to_string(),
14250 vec![date],
14251 ))))
14252 }
14253 DialectType::Presto
14254 | DialectType::Trino
14255 | DialectType::Athena => {
14256 // Presto: LAST_DAY_OF_MONTH(CAST(CAST(date AS TIMESTAMP) AS DATE))
14257 // or with offset: LAST_DAY_OF_MONTH(DATE_ADD('MONTH', offset, CAST(CAST(date AS TIMESTAMP) AS DATE)))
14258 let cast_ts = Expression::Cast(Box::new(Cast {
14259 this: date_arg,
14260 to: DataType::Timestamp {
14261 timezone: false,
14262 precision: None,
14263 },
14264 trailing_comments: vec![],
14265 double_colon_syntax: false,
14266 format: None,
14267 default: None,
14268 inferred_type: None,
14269 }));
14270 let date = cast_to_date(cast_ts);
14271 let date = if let Some(offset) = month_offset {
14272 Expression::Function(Box::new(Function::new(
14273 "DATE_ADD".to_string(),
14274 vec![Expression::string("MONTH"), offset, date],
14275 )))
14276 } else {
14277 date
14278 };
14279 Ok(Expression::Function(Box::new(Function::new(
14280 "LAST_DAY_OF_MONTH".to_string(),
14281 vec![date],
14282 ))))
14283 }
14284 DialectType::PostgreSQL => {
14285 // PostgreSQL: CAST(DATE_TRUNC('MONTH', CAST(date AS DATE) [+ INTERVAL 'offset MONTH']) + INTERVAL '1 MONTH' - INTERVAL '1 DAY' AS DATE)
14286 let date = cast_to_date(date_arg);
14287 let date = if let Some(offset) = month_offset {
14288 let interval_str = format!(
14289 "{} MONTH",
14290 Self::expr_to_string_static(&offset)
14291 );
14292 Expression::Add(Box::new(
14293 crate::expressions::BinaryOp::new(
14294 date,
14295 Expression::Interval(Box::new(
14296 crate::expressions::Interval {
14297 this: Some(Expression::string(
14298 &interval_str,
14299 )),
14300 unit: None,
14301 },
14302 )),
14303 ),
14304 ))
14305 } else {
14306 date
14307 };
14308 let truncated =
14309 Expression::Function(Box::new(Function::new(
14310 "DATE_TRUNC".to_string(),
14311 vec![Expression::string("MONTH"), date],
14312 )));
14313 let plus_month = Expression::Add(Box::new(
14314 crate::expressions::BinaryOp::new(
14315 truncated,
14316 Expression::Interval(Box::new(
14317 crate::expressions::Interval {
14318 this: Some(Expression::string("1 MONTH")),
14319 unit: None,
14320 },
14321 )),
14322 ),
14323 ));
14324 let minus_day = Expression::Sub(Box::new(
14325 crate::expressions::BinaryOp::new(
14326 plus_month,
14327 Expression::Interval(Box::new(
14328 crate::expressions::Interval {
14329 this: Some(Expression::string("1 DAY")),
14330 unit: None,
14331 },
14332 )),
14333 ),
14334 ));
14335 Ok(Expression::Cast(Box::new(Cast {
14336 this: minus_day,
14337 to: DataType::Date,
14338 trailing_comments: vec![],
14339 double_colon_syntax: false,
14340 format: None,
14341 default: None,
14342 inferred_type: None,
14343 })))
14344 }
14345 DialectType::DuckDB => {
14346 // DuckDB: LAST_DAY(CAST(date AS DATE) [+ INTERVAL (offset) MONTH])
14347 let date = cast_to_date(date_arg);
14348 let date = if let Some(offset) = month_offset {
14349 // Wrap negative numbers in parentheses for DuckDB INTERVAL
14350 let interval_val =
14351 if matches!(&offset, Expression::Neg(_)) {
14352 Expression::Paren(Box::new(
14353 crate::expressions::Paren {
14354 this: offset,
14355 trailing_comments: Vec::new(),
14356 },
14357 ))
14358 } else {
14359 offset
14360 };
14361 Expression::Add(Box::new(crate::expressions::BinaryOp::new(
14362 date,
14363 Expression::Interval(Box::new(crate::expressions::Interval {
14364 this: Some(interval_val),
14365 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
14366 unit: crate::expressions::IntervalUnit::Month,
14367 use_plural: false,
14368 }),
14369 })),
14370 )))
14371 } else {
14372 date
14373 };
14374 Ok(Expression::Function(Box::new(Function::new(
14375 "LAST_DAY".to_string(),
14376 vec![date],
14377 ))))
14378 }
14379 DialectType::Snowflake | DialectType::Redshift => {
14380 // Snowflake/Redshift: LAST_DAY(TO_DATE(date) or CAST(date AS DATE))
14381 // With offset: LAST_DAY(DATEADD(MONTH, offset, TO_DATE(date)))
14382 let date = if matches!(target, DialectType::Snowflake) {
14383 Expression::Function(Box::new(Function::new(
14384 "TO_DATE".to_string(),
14385 vec![date_arg],
14386 )))
14387 } else {
14388 cast_to_date(date_arg)
14389 };
14390 let date = if let Some(offset) = month_offset {
14391 Expression::Function(Box::new(Function::new(
14392 "DATEADD".to_string(),
14393 vec![
14394 Expression::Identifier(Identifier::new(
14395 "MONTH",
14396 )),
14397 offset,
14398 date,
14399 ],
14400 )))
14401 } else {
14402 date
14403 };
14404 Ok(Expression::Function(Box::new(Function::new(
14405 "LAST_DAY".to_string(),
14406 vec![date],
14407 ))))
14408 }
14409 DialectType::Spark | DialectType::Databricks => {
14410 // Spark: LAST_DAY(TO_DATE(date))
14411 // With offset: LAST_DAY(ADD_MONTHS(TO_DATE(date), offset))
14412 let date = Expression::Function(Box::new(Function::new(
14413 "TO_DATE".to_string(),
14414 vec![date_arg],
14415 )));
14416 let date = if let Some(offset) = month_offset {
14417 Expression::Function(Box::new(Function::new(
14418 "ADD_MONTHS".to_string(),
14419 vec![date, offset],
14420 )))
14421 } else {
14422 date
14423 };
14424 Ok(Expression::Function(Box::new(Function::new(
14425 "LAST_DAY".to_string(),
14426 vec![date],
14427 ))))
14428 }
14429 DialectType::MySQL => {
14430 // MySQL: LAST_DAY(DATE(date)) - no offset
14431 // With offset: LAST_DAY(DATE_ADD(date, INTERVAL offset MONTH)) - no DATE() wrapper
14432 let date = if let Some(offset) = month_offset {
14433 let iu = crate::expressions::IntervalUnit::Month;
14434 Expression::DateAdd(Box::new(
14435 crate::expressions::DateAddFunc {
14436 this: date_arg,
14437 interval: offset,
14438 unit: iu,
14439 },
14440 ))
14441 } else {
14442 Expression::Function(Box::new(Function::new(
14443 "DATE".to_string(),
14444 vec![date_arg],
14445 )))
14446 };
14447 Ok(Expression::Function(Box::new(Function::new(
14448 "LAST_DAY".to_string(),
14449 vec![date],
14450 ))))
14451 }
14452 DialectType::BigQuery => {
14453 // BigQuery: LAST_DAY(CAST(date AS DATE))
14454 // With offset: LAST_DAY(DATE_ADD(CAST(date AS DATE), INTERVAL offset MONTH))
14455 let date = cast_to_date(date_arg);
14456 let date = if let Some(offset) = month_offset {
14457 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
14458 this: Some(offset),
14459 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
14460 unit: crate::expressions::IntervalUnit::Month,
14461 use_plural: false,
14462 }),
14463 }));
14464 Expression::Function(Box::new(Function::new(
14465 "DATE_ADD".to_string(),
14466 vec![date, interval],
14467 )))
14468 } else {
14469 date
14470 };
14471 Ok(Expression::Function(Box::new(Function::new(
14472 "LAST_DAY".to_string(),
14473 vec![date],
14474 ))))
14475 }
14476 DialectType::ClickHouse => {
14477 // ClickHouse: LAST_DAY(CAST(date AS Nullable(DATE)))
14478 let date = Expression::Cast(Box::new(Cast {
14479 this: date_arg,
14480 to: DataType::Nullable {
14481 inner: Box::new(DataType::Date),
14482 },
14483 trailing_comments: vec![],
14484 double_colon_syntax: false,
14485 format: None,
14486 default: None,
14487 inferred_type: None,
14488 }));
14489 let date = if let Some(offset) = month_offset {
14490 Expression::Function(Box::new(Function::new(
14491 "DATE_ADD".to_string(),
14492 vec![
14493 Expression::Identifier(Identifier::new(
14494 "MONTH",
14495 )),
14496 offset,
14497 date,
14498 ],
14499 )))
14500 } else {
14501 date
14502 };
14503 Ok(Expression::Function(Box::new(Function::new(
14504 "LAST_DAY".to_string(),
14505 vec![date],
14506 ))))
14507 }
14508 DialectType::Hive => {
14509 // Hive: LAST_DAY(date)
14510 let date = if let Some(offset) = month_offset {
14511 Expression::Function(Box::new(Function::new(
14512 "ADD_MONTHS".to_string(),
14513 vec![date_arg, offset],
14514 )))
14515 } else {
14516 date_arg
14517 };
14518 Ok(Expression::Function(Box::new(Function::new(
14519 "LAST_DAY".to_string(),
14520 vec![date],
14521 ))))
14522 }
14523 _ => {
14524 // Default: LAST_DAY(date)
14525 let date = if let Some(offset) = month_offset {
14526 let unit =
14527 Expression::Identifier(Identifier::new("MONTH"));
14528 Expression::Function(Box::new(Function::new(
14529 "DATEADD".to_string(),
14530 vec![unit, offset, date_arg],
14531 )))
14532 } else {
14533 date_arg
14534 };
14535 Ok(Expression::Function(Box::new(Function::new(
14536 "LAST_DAY".to_string(),
14537 vec![date],
14538 ))))
14539 }
14540 }
14541 }
14542 // LAST_DAY(x) / LAST_DAY_OF_MONTH(x) -> target-specific
14543 "LAST_DAY" | "LAST_DAY_OF_MONTH"
14544 if !matches!(source, DialectType::BigQuery)
14545 && f.args.len() >= 1 =>
14546 {
14547 let first_arg = f.args.into_iter().next().unwrap();
14548 match target {
14549 DialectType::TSQL | DialectType::Fabric => {
14550 Ok(Expression::Function(Box::new(Function::new(
14551 "EOMONTH".to_string(),
14552 vec![first_arg],
14553 ))))
14554 }
14555 DialectType::Presto
14556 | DialectType::Trino
14557 | DialectType::Athena => {
14558 Ok(Expression::Function(Box::new(Function::new(
14559 "LAST_DAY_OF_MONTH".to_string(),
14560 vec![first_arg],
14561 ))))
14562 }
14563 _ => Ok(Expression::Function(Box::new(Function::new(
14564 "LAST_DAY".to_string(),
14565 vec![first_arg],
14566 )))),
14567 }
14568 }
14569 // MAP(keys_array, vals_array) from Presto (2-arg form) -> target-specific
14570 "MAP"
14571 if f.args.len() == 2
14572 && matches!(
14573 source,
14574 DialectType::Presto
14575 | DialectType::Trino
14576 | DialectType::Athena
14577 ) =>
14578 {
14579 let keys_arg = f.args[0].clone();
14580 let vals_arg = f.args[1].clone();
14581
14582 // Helper: extract array elements from Array/ArrayFunc/Function("ARRAY") expressions
14583 fn extract_array_elements(
14584 expr: &Expression,
14585 ) -> Option<&Vec<Expression>> {
14586 match expr {
14587 Expression::Array(arr) => Some(&arr.expressions),
14588 Expression::ArrayFunc(arr) => Some(&arr.expressions),
14589 Expression::Function(f)
14590 if f.name.eq_ignore_ascii_case("ARRAY") =>
14591 {
14592 Some(&f.args)
14593 }
14594 _ => None,
14595 }
14596 }
14597
14598 match target {
14599 DialectType::Spark | DialectType::Databricks => {
14600 // Presto MAP(keys, vals) -> Spark MAP_FROM_ARRAYS(keys, vals)
14601 Ok(Expression::Function(Box::new(Function::new(
14602 "MAP_FROM_ARRAYS".to_string(),
14603 f.args,
14604 ))))
14605 }
14606 DialectType::Hive => {
14607 // Presto MAP(ARRAY[k1,k2], ARRAY[v1,v2]) -> Hive MAP(k1, v1, k2, v2)
14608 if let (Some(keys), Some(vals)) = (
14609 extract_array_elements(&keys_arg),
14610 extract_array_elements(&vals_arg),
14611 ) {
14612 if keys.len() == vals.len() {
14613 let mut interleaved = Vec::new();
14614 for (k, v) in keys.iter().zip(vals.iter()) {
14615 interleaved.push(k.clone());
14616 interleaved.push(v.clone());
14617 }
14618 Ok(Expression::Function(Box::new(Function::new(
14619 "MAP".to_string(),
14620 interleaved,
14621 ))))
14622 } else {
14623 Ok(Expression::Function(Box::new(Function::new(
14624 "MAP".to_string(),
14625 f.args,
14626 ))))
14627 }
14628 } else {
14629 Ok(Expression::Function(Box::new(Function::new(
14630 "MAP".to_string(),
14631 f.args,
14632 ))))
14633 }
14634 }
14635 DialectType::Snowflake => {
14636 // Presto MAP(ARRAY[k1,k2], ARRAY[v1,v2]) -> Snowflake OBJECT_CONSTRUCT(k1, v1, k2, v2)
14637 if let (Some(keys), Some(vals)) = (
14638 extract_array_elements(&keys_arg),
14639 extract_array_elements(&vals_arg),
14640 ) {
14641 if keys.len() == vals.len() {
14642 let mut interleaved = Vec::new();
14643 for (k, v) in keys.iter().zip(vals.iter()) {
14644 interleaved.push(k.clone());
14645 interleaved.push(v.clone());
14646 }
14647 Ok(Expression::Function(Box::new(Function::new(
14648 "OBJECT_CONSTRUCT".to_string(),
14649 interleaved,
14650 ))))
14651 } else {
14652 Ok(Expression::Function(Box::new(Function::new(
14653 "MAP".to_string(),
14654 f.args,
14655 ))))
14656 }
14657 } else {
14658 Ok(Expression::Function(Box::new(Function::new(
14659 "MAP".to_string(),
14660 f.args,
14661 ))))
14662 }
14663 }
14664 _ => Ok(Expression::Function(f)),
14665 }
14666 }
14667 // MAP() with 0 args from Spark -> MAP(ARRAY[], ARRAY[]) for Presto/Trino
14668 "MAP"
14669 if f.args.is_empty()
14670 && matches!(
14671 source,
14672 DialectType::Hive
14673 | DialectType::Spark
14674 | DialectType::Databricks
14675 )
14676 && matches!(
14677 target,
14678 DialectType::Presto
14679 | DialectType::Trino
14680 | DialectType::Athena
14681 ) =>
14682 {
14683 let empty_keys =
14684 Expression::Array(Box::new(crate::expressions::Array {
14685 expressions: vec![],
14686 }));
14687 let empty_vals =
14688 Expression::Array(Box::new(crate::expressions::Array {
14689 expressions: vec![],
14690 }));
14691 Ok(Expression::Function(Box::new(Function::new(
14692 "MAP".to_string(),
14693 vec![empty_keys, empty_vals],
14694 ))))
14695 }
14696 // MAP(k1, v1, k2, v2, ...) from Hive/Spark -> target-specific
14697 "MAP"
14698 if f.args.len() >= 2
14699 && f.args.len() % 2 == 0
14700 && matches!(
14701 source,
14702 DialectType::Hive
14703 | DialectType::Spark
14704 | DialectType::Databricks
14705 | DialectType::ClickHouse
14706 ) =>
14707 {
14708 let args = f.args;
14709 match target {
14710 DialectType::DuckDB => {
14711 // MAP([k1, k2], [v1, v2])
14712 let mut keys = Vec::new();
14713 let mut vals = Vec::new();
14714 for (i, arg) in args.into_iter().enumerate() {
14715 if i % 2 == 0 {
14716 keys.push(arg);
14717 } else {
14718 vals.push(arg);
14719 }
14720 }
14721 let keys_arr = Expression::Array(Box::new(
14722 crate::expressions::Array { expressions: keys },
14723 ));
14724 let vals_arr = Expression::Array(Box::new(
14725 crate::expressions::Array { expressions: vals },
14726 ));
14727 Ok(Expression::Function(Box::new(Function::new(
14728 "MAP".to_string(),
14729 vec![keys_arr, vals_arr],
14730 ))))
14731 }
14732 DialectType::Presto | DialectType::Trino => {
14733 // MAP(ARRAY[k1, k2], ARRAY[v1, v2])
14734 let mut keys = Vec::new();
14735 let mut vals = Vec::new();
14736 for (i, arg) in args.into_iter().enumerate() {
14737 if i % 2 == 0 {
14738 keys.push(arg);
14739 } else {
14740 vals.push(arg);
14741 }
14742 }
14743 let keys_arr = Expression::Array(Box::new(
14744 crate::expressions::Array { expressions: keys },
14745 ));
14746 let vals_arr = Expression::Array(Box::new(
14747 crate::expressions::Array { expressions: vals },
14748 ));
14749 Ok(Expression::Function(Box::new(Function::new(
14750 "MAP".to_string(),
14751 vec![keys_arr, vals_arr],
14752 ))))
14753 }
14754 DialectType::Snowflake => Ok(Expression::Function(Box::new(
14755 Function::new("OBJECT_CONSTRUCT".to_string(), args),
14756 ))),
14757 DialectType::ClickHouse => Ok(Expression::Function(Box::new(
14758 Function::new("map".to_string(), args),
14759 ))),
14760 _ => Ok(Expression::Function(Box::new(Function::new(
14761 "MAP".to_string(),
14762 args,
14763 )))),
14764 }
14765 }
14766 // COLLECT_LIST(x) -> ARRAY_AGG(x) for most targets
14767 "COLLECT_LIST" if f.args.len() >= 1 => {
14768 let name = match target {
14769 DialectType::Spark
14770 | DialectType::Databricks
14771 | DialectType::Hive => "COLLECT_LIST",
14772 DialectType::DuckDB
14773 | DialectType::PostgreSQL
14774 | DialectType::Redshift
14775 | DialectType::Snowflake
14776 | DialectType::BigQuery => "ARRAY_AGG",
14777 DialectType::Presto | DialectType::Trino => "ARRAY_AGG",
14778 _ => "ARRAY_AGG",
14779 };
14780 Ok(Expression::Function(Box::new(Function::new(
14781 name.to_string(),
14782 f.args,
14783 ))))
14784 }
14785 // COLLECT_SET(x) -> target-specific distinct array aggregation
14786 "COLLECT_SET" if f.args.len() >= 1 => {
14787 let name = match target {
14788 DialectType::Spark
14789 | DialectType::Databricks
14790 | DialectType::Hive => "COLLECT_SET",
14791 DialectType::Presto
14792 | DialectType::Trino
14793 | DialectType::Athena => "SET_AGG",
14794 DialectType::Snowflake => "ARRAY_UNIQUE_AGG",
14795 _ => "ARRAY_AGG",
14796 };
14797 Ok(Expression::Function(Box::new(Function::new(
14798 name.to_string(),
14799 f.args,
14800 ))))
14801 }
14802 // ISNAN(x) / IS_NAN(x) - normalize
14803 "ISNAN" | "IS_NAN" => {
14804 let name = match target {
14805 DialectType::Spark
14806 | DialectType::Databricks
14807 | DialectType::Hive => "ISNAN",
14808 DialectType::Presto
14809 | DialectType::Trino
14810 | DialectType::Athena => "IS_NAN",
14811 DialectType::BigQuery
14812 | DialectType::PostgreSQL
14813 | DialectType::Redshift => "IS_NAN",
14814 DialectType::ClickHouse => "IS_NAN",
14815 _ => "ISNAN",
14816 };
14817 Ok(Expression::Function(Box::new(Function::new(
14818 name.to_string(),
14819 f.args,
14820 ))))
14821 }
14822 // SPLIT_PART(str, delim, index) -> target-specific
14823 "SPLIT_PART" if f.args.len() == 3 => {
14824 match target {
14825 DialectType::Spark | DialectType::Databricks => {
14826 // Keep as SPLIT_PART (Spark 3.4+)
14827 Ok(Expression::Function(Box::new(Function::new(
14828 "SPLIT_PART".to_string(),
14829 f.args,
14830 ))))
14831 }
14832 DialectType::DuckDB
14833 if matches!(source, DialectType::Snowflake) =>
14834 {
14835 // Snowflake SPLIT_PART -> DuckDB with CASE wrapper:
14836 // - part_index 0 treated as 1
14837 // - empty delimiter: return whole string if index 1 or -1, else ''
14838 let mut args = f.args;
14839 let str_arg = args.remove(0);
14840 let delim_arg = args.remove(0);
14841 let idx_arg = args.remove(0);
14842
14843 // (CASE WHEN idx = 0 THEN 1 ELSE idx END)
14844 let adjusted_idx = Expression::Paren(Box::new(Paren {
14845 this: Expression::Case(Box::new(Case {
14846 operand: None,
14847 whens: vec![(
14848 Expression::Eq(Box::new(BinaryOp {
14849 left: idx_arg.clone(),
14850 right: Expression::number(0),
14851 left_comments: vec![],
14852 operator_comments: vec![],
14853 trailing_comments: vec![],
14854 inferred_type: None,
14855 })),
14856 Expression::number(1),
14857 )],
14858 else_: Some(idx_arg.clone()),
14859 comments: vec![],
14860 inferred_type: None,
14861 })),
14862 trailing_comments: vec![],
14863 }));
14864
14865 // SPLIT_PART(str, delim, adjusted_idx)
14866 let base_func =
14867 Expression::Function(Box::new(Function::new(
14868 "SPLIT_PART".to_string(),
14869 vec![
14870 str_arg.clone(),
14871 delim_arg.clone(),
14872 adjusted_idx.clone(),
14873 ],
14874 )));
14875
14876 // (CASE WHEN adjusted_idx = 1 OR adjusted_idx = -1 THEN str ELSE '' END)
14877 let empty_delim_case = Expression::Paren(Box::new(Paren {
14878 this: Expression::Case(Box::new(Case {
14879 operand: None,
14880 whens: vec![(
14881 Expression::Or(Box::new(BinaryOp {
14882 left: Expression::Eq(Box::new(BinaryOp {
14883 left: adjusted_idx.clone(),
14884 right: Expression::number(1),
14885 left_comments: vec![],
14886 operator_comments: vec![],
14887 trailing_comments: vec![],
14888 inferred_type: None,
14889 })),
14890 right: Expression::Eq(Box::new(BinaryOp {
14891 left: adjusted_idx,
14892 right: Expression::number(-1),
14893 left_comments: vec![],
14894 operator_comments: vec![],
14895 trailing_comments: vec![],
14896 inferred_type: None,
14897 })),
14898 left_comments: vec![],
14899 operator_comments: vec![],
14900 trailing_comments: vec![],
14901 inferred_type: None,
14902 })),
14903 str_arg,
14904 )],
14905 else_: Some(Expression::string("")),
14906 comments: vec![],
14907 inferred_type: None,
14908 })),
14909 trailing_comments: vec![],
14910 }));
14911
14912 // CASE WHEN delim = '' THEN (empty case) ELSE SPLIT_PART(...) END
14913 Ok(Expression::Case(Box::new(Case {
14914 operand: None,
14915 whens: vec![(
14916 Expression::Eq(Box::new(BinaryOp {
14917 left: delim_arg,
14918 right: Expression::string(""),
14919 left_comments: vec![],
14920 operator_comments: vec![],
14921 trailing_comments: vec![],
14922 inferred_type: None,
14923 })),
14924 empty_delim_case,
14925 )],
14926 else_: Some(base_func),
14927 comments: vec![],
14928 inferred_type: None,
14929 })))
14930 }
14931 DialectType::DuckDB
14932 | DialectType::PostgreSQL
14933 | DialectType::Snowflake
14934 | DialectType::Redshift
14935 | DialectType::Trino
14936 | DialectType::Presto => Ok(Expression::Function(Box::new(
14937 Function::new("SPLIT_PART".to_string(), f.args),
14938 ))),
14939 DialectType::Hive => {
14940 // SPLIT(str, delim)[index]
14941 // Complex conversion, just keep as-is for now
14942 Ok(Expression::Function(Box::new(Function::new(
14943 "SPLIT_PART".to_string(),
14944 f.args,
14945 ))))
14946 }
14947 _ => Ok(Expression::Function(Box::new(Function::new(
14948 "SPLIT_PART".to_string(),
14949 f.args,
14950 )))),
14951 }
14952 }
14953 // JSON_EXTRACT(json, path) -> target-specific JSON extraction
14954 "JSON_EXTRACT" | "JSON_EXTRACT_SCALAR" if f.args.len() == 2 => {
14955 let is_scalar = name == "JSON_EXTRACT_SCALAR";
14956 match target {
14957 DialectType::Spark
14958 | DialectType::Databricks
14959 | DialectType::Hive => {
14960 let mut args = f.args;
14961 // Spark/Hive don't support Presto's TRY(expr) wrapper form here.
14962 // Mirror sqlglot by unwrapping TRY(expr) to expr before GET_JSON_OBJECT.
14963 if let Some(Expression::Function(inner)) = args.first() {
14964 if inner.name.eq_ignore_ascii_case("TRY")
14965 && inner.args.len() == 1
14966 {
14967 let mut inner_args = inner.args.clone();
14968 args[0] = inner_args.remove(0);
14969 }
14970 }
14971 Ok(Expression::Function(Box::new(Function::new(
14972 "GET_JSON_OBJECT".to_string(),
14973 args,
14974 ))))
14975 }
14976 DialectType::DuckDB | DialectType::SQLite => {
14977 // json -> path syntax
14978 let mut args = f.args;
14979 let json_expr = args.remove(0);
14980 let path = args.remove(0);
14981 Ok(Expression::JsonExtract(Box::new(
14982 crate::expressions::JsonExtractFunc {
14983 this: json_expr,
14984 path,
14985 returning: None,
14986 arrow_syntax: true,
14987 hash_arrow_syntax: false,
14988 wrapper_option: None,
14989 quotes_option: None,
14990 on_scalar_string: false,
14991 on_error: None,
14992 },
14993 )))
14994 }
14995 DialectType::TSQL => {
14996 let func_name = if is_scalar {
14997 "JSON_VALUE"
14998 } else {
14999 "JSON_QUERY"
15000 };
15001 Ok(Expression::Function(Box::new(Function::new(
15002 func_name.to_string(),
15003 f.args,
15004 ))))
15005 }
15006 DialectType::PostgreSQL | DialectType::Redshift => {
15007 let func_name = if is_scalar {
15008 "JSON_EXTRACT_PATH_TEXT"
15009 } else {
15010 "JSON_EXTRACT_PATH"
15011 };
15012 Ok(Expression::Function(Box::new(Function::new(
15013 func_name.to_string(),
15014 f.args,
15015 ))))
15016 }
15017 _ => Ok(Expression::Function(Box::new(Function::new(
15018 name.to_string(),
15019 f.args,
15020 )))),
15021 }
15022 }
15023 // MySQL JSON_SEARCH(json_doc, mode, search[, escape_char[, path]]) -> DuckDB json_tree-based lookup
15024 "JSON_SEARCH"
15025 if matches!(target, DialectType::DuckDB)
15026 && (3..=5).contains(&f.args.len()) =>
15027 {
15028 let args = &f.args;
15029
15030 // Only rewrite deterministic modes and NULL/no escape-char variant.
15031 let mode = match &args[1] {
15032 Expression::Literal(lit)
15033 if matches!(
15034 lit.as_ref(),
15035 crate::expressions::Literal::String(_)
15036 ) =>
15037 {
15038 let crate::expressions::Literal::String(s) = lit.as_ref()
15039 else {
15040 unreachable!()
15041 };
15042 s.to_ascii_lowercase()
15043 }
15044 _ => return Ok(Expression::Function(f)),
15045 };
15046 if mode != "one" && mode != "all" {
15047 return Ok(Expression::Function(f));
15048 }
15049 if args.len() >= 4 && !matches!(&args[3], Expression::Null(_)) {
15050 return Ok(Expression::Function(f));
15051 }
15052
15053 let json_doc_sql = match Generator::sql(&args[0]) {
15054 Ok(sql) => sql,
15055 Err(_) => return Ok(Expression::Function(f)),
15056 };
15057 let search_sql = match Generator::sql(&args[2]) {
15058 Ok(sql) => sql,
15059 Err(_) => return Ok(Expression::Function(f)),
15060 };
15061 let path_sql = if args.len() == 5 {
15062 match Generator::sql(&args[4]) {
15063 Ok(sql) => sql,
15064 Err(_) => return Ok(Expression::Function(f)),
15065 }
15066 } else {
15067 "'$'".to_string()
15068 };
15069
15070 let rewrite_sql = if mode == "all" {
15071 format!(
15072 "(SELECT TO_JSON(LIST(__jt.fullkey)) FROM json_tree({}, {}) AS __jt WHERE __jt.atom = TO_JSON({}))",
15073 json_doc_sql, path_sql, search_sql
15074 )
15075 } else {
15076 format!(
15077 "(SELECT TO_JSON(__jt.fullkey) FROM json_tree({}, {}) AS __jt WHERE __jt.atom = TO_JSON({}) ORDER BY __jt.id LIMIT 1)",
15078 json_doc_sql, path_sql, search_sql
15079 )
15080 };
15081
15082 Ok(Expression::Raw(crate::expressions::Raw {
15083 sql: rewrite_sql,
15084 }))
15085 }
15086 // SingleStore JSON_EXTRACT_JSON(json, key1, key2, ...) -> JSON_EXTRACT(json, '$.key1.key2' or '$.key1[key2]')
15087 // BSON_EXTRACT_BSON(json, key1, ...) -> JSONB_EXTRACT(json, '$.key1')
15088 "JSON_EXTRACT_JSON" | "BSON_EXTRACT_BSON"
15089 if f.args.len() >= 2
15090 && matches!(source, DialectType::SingleStore) =>
15091 {
15092 let is_bson = name == "BSON_EXTRACT_BSON";
15093 let mut args = f.args;
15094 let json_expr = args.remove(0);
15095
15096 // Build JSONPath from remaining arguments
15097 let mut path = String::from("$");
15098 for arg in &args {
15099 if let Expression::Literal(lit) = arg {
15100 if let crate::expressions::Literal::String(s) = lit.as_ref()
15101 {
15102 // Check if it's a numeric string (array index)
15103 if s.parse::<i64>().is_ok() {
15104 path.push('[');
15105 path.push_str(s);
15106 path.push(']');
15107 } else {
15108 path.push('.');
15109 path.push_str(s);
15110 }
15111 }
15112 }
15113 }
15114
15115 let target_func = if is_bson {
15116 "JSONB_EXTRACT"
15117 } else {
15118 "JSON_EXTRACT"
15119 };
15120 Ok(Expression::Function(Box::new(Function::new(
15121 target_func.to_string(),
15122 vec![json_expr, Expression::string(&path)],
15123 ))))
15124 }
15125 // ARRAY_SUM(lambda, array) from Doris -> ClickHouse arraySum
15126 "ARRAY_SUM" if matches!(target, DialectType::ClickHouse) => {
15127 Ok(Expression::Function(Box::new(Function {
15128 name: "arraySum".to_string(),
15129 args: f.args,
15130 distinct: f.distinct,
15131 trailing_comments: f.trailing_comments,
15132 use_bracket_syntax: f.use_bracket_syntax,
15133 no_parens: f.no_parens,
15134 quoted: f.quoted,
15135 span: None,
15136 inferred_type: None,
15137 })))
15138 }
15139 // TSQL JSON_QUERY/JSON_VALUE -> target-specific
15140 // Note: For TSQL->TSQL, JsonQuery stays as Expression::JsonQuery (source transform not called)
15141 // and is handled by JsonQueryValueConvert action. This handles the case where
15142 // TSQL read transform converted JsonQuery to Function("JSON_QUERY") for cross-dialect.
15143 "JSON_QUERY" | "JSON_VALUE"
15144 if f.args.len() == 2
15145 && matches!(
15146 source,
15147 DialectType::TSQL | DialectType::Fabric
15148 ) =>
15149 {
15150 match target {
15151 DialectType::Spark
15152 | DialectType::Databricks
15153 | DialectType::Hive => Ok(Expression::Function(Box::new(
15154 Function::new("GET_JSON_OBJECT".to_string(), f.args),
15155 ))),
15156 _ => Ok(Expression::Function(Box::new(Function::new(
15157 name.to_string(),
15158 f.args,
15159 )))),
15160 }
15161 }
15162 // UNIX_TIMESTAMP(x) -> TO_UNIXTIME(x) for Presto
15163 "UNIX_TIMESTAMP" if f.args.len() == 1 => {
15164 let arg = f.args.into_iter().next().unwrap();
15165 let is_hive_source = matches!(
15166 source,
15167 DialectType::Hive
15168 | DialectType::Spark
15169 | DialectType::Databricks
15170 );
15171 match target {
15172 DialectType::DuckDB if is_hive_source => {
15173 // DuckDB: EPOCH(STRPTIME(x, '%Y-%m-%d %H:%M:%S'))
15174 let strptime =
15175 Expression::Function(Box::new(Function::new(
15176 "STRPTIME".to_string(),
15177 vec![arg, Expression::string("%Y-%m-%d %H:%M:%S")],
15178 )));
15179 Ok(Expression::Function(Box::new(Function::new(
15180 "EPOCH".to_string(),
15181 vec![strptime],
15182 ))))
15183 }
15184 DialectType::Presto | DialectType::Trino if is_hive_source => {
15185 // Presto: TO_UNIXTIME(COALESCE(TRY(DATE_PARSE(CAST(x AS VARCHAR), '%Y-%m-%d %T')), PARSE_DATETIME(DATE_FORMAT(x, '%Y-%m-%d %T'), 'yyyy-MM-dd HH:mm:ss')))
15186 let cast_varchar =
15187 Expression::Cast(Box::new(crate::expressions::Cast {
15188 this: arg.clone(),
15189 to: DataType::VarChar {
15190 length: None,
15191 parenthesized_length: false,
15192 },
15193 trailing_comments: vec![],
15194 double_colon_syntax: false,
15195 format: None,
15196 default: None,
15197 inferred_type: None,
15198 }));
15199 let date_parse =
15200 Expression::Function(Box::new(Function::new(
15201 "DATE_PARSE".to_string(),
15202 vec![
15203 cast_varchar,
15204 Expression::string("%Y-%m-%d %T"),
15205 ],
15206 )));
15207 let try_expr = Expression::Function(Box::new(
15208 Function::new("TRY".to_string(), vec![date_parse]),
15209 ));
15210 let date_format =
15211 Expression::Function(Box::new(Function::new(
15212 "DATE_FORMAT".to_string(),
15213 vec![arg, Expression::string("%Y-%m-%d %T")],
15214 )));
15215 let parse_datetime =
15216 Expression::Function(Box::new(Function::new(
15217 "PARSE_DATETIME".to_string(),
15218 vec![
15219 date_format,
15220 Expression::string("yyyy-MM-dd HH:mm:ss"),
15221 ],
15222 )));
15223 let coalesce =
15224 Expression::Function(Box::new(Function::new(
15225 "COALESCE".to_string(),
15226 vec![try_expr, parse_datetime],
15227 )));
15228 Ok(Expression::Function(Box::new(Function::new(
15229 "TO_UNIXTIME".to_string(),
15230 vec![coalesce],
15231 ))))
15232 }
15233 DialectType::Presto | DialectType::Trino => {
15234 Ok(Expression::Function(Box::new(Function::new(
15235 "TO_UNIXTIME".to_string(),
15236 vec![arg],
15237 ))))
15238 }
15239 _ => Ok(Expression::Function(Box::new(Function::new(
15240 "UNIX_TIMESTAMP".to_string(),
15241 vec![arg],
15242 )))),
15243 }
15244 }
15245 // TO_UNIX_TIMESTAMP(x) -> UNIX_TIMESTAMP(x) for Spark/Hive
15246 "TO_UNIX_TIMESTAMP" if f.args.len() >= 1 => match target {
15247 DialectType::Spark
15248 | DialectType::Databricks
15249 | DialectType::Hive => Ok(Expression::Function(Box::new(
15250 Function::new("UNIX_TIMESTAMP".to_string(), f.args),
15251 ))),
15252 _ => Ok(Expression::Function(Box::new(Function::new(
15253 "TO_UNIX_TIMESTAMP".to_string(),
15254 f.args,
15255 )))),
15256 },
15257 // CURDATE() -> CURRENT_DATE
15258 "CURDATE" => {
15259 Ok(Expression::CurrentDate(crate::expressions::CurrentDate))
15260 }
15261 // CURTIME() -> CURRENT_TIME
15262 "CURTIME" => {
15263 Ok(Expression::CurrentTime(crate::expressions::CurrentTime {
15264 precision: None,
15265 }))
15266 }
15267 // ARRAY_SORT(x) or ARRAY_SORT(x, lambda) -> SORT_ARRAY(x) for Hive, LIST_SORT for DuckDB
15268 "ARRAY_SORT" if f.args.len() >= 1 => {
15269 match target {
15270 DialectType::Hive => {
15271 let mut args = f.args;
15272 args.truncate(1); // Drop lambda comparator
15273 Ok(Expression::Function(Box::new(Function::new(
15274 "SORT_ARRAY".to_string(),
15275 args,
15276 ))))
15277 }
15278 DialectType::DuckDB
15279 if matches!(source, DialectType::Snowflake) =>
15280 {
15281 // Snowflake ARRAY_SORT(arr[, asc_bool[, nulls_first_bool]]) -> DuckDB LIST_SORT(arr[, 'ASC'/'DESC'[, 'NULLS FIRST']])
15282 let mut args_iter = f.args.into_iter();
15283 let arr = args_iter.next().unwrap();
15284 let asc_arg = args_iter.next();
15285 let nulls_first_arg = args_iter.next();
15286
15287 let is_asc_bool = asc_arg
15288 .as_ref()
15289 .map(|a| matches!(a, Expression::Boolean(_)))
15290 .unwrap_or(false);
15291 let is_nf_bool = nulls_first_arg
15292 .as_ref()
15293 .map(|a| matches!(a, Expression::Boolean(_)))
15294 .unwrap_or(false);
15295
15296 // No boolean args: pass through as-is
15297 if !is_asc_bool && !is_nf_bool {
15298 let mut result_args = vec![arr];
15299 if let Some(asc) = asc_arg {
15300 result_args.push(asc);
15301 if let Some(nf) = nulls_first_arg {
15302 result_args.push(nf);
15303 }
15304 }
15305 Ok(Expression::Function(Box::new(Function::new(
15306 "LIST_SORT".to_string(),
15307 result_args,
15308 ))))
15309 } else {
15310 // Has boolean args: convert to DuckDB LIST_SORT format
15311 let descending = matches!(&asc_arg, Some(Expression::Boolean(b)) if !b.value);
15312
15313 // Snowflake defaults: nulls_first = TRUE for DESC, FALSE for ASC
15314 let nulls_are_first = match &nulls_first_arg {
15315 Some(Expression::Boolean(b)) => b.value,
15316 None if is_asc_bool => descending, // Snowflake default
15317 _ => false,
15318 };
15319 let nulls_first_sql = if nulls_are_first {
15320 Some(Expression::string("NULLS FIRST"))
15321 } else {
15322 None
15323 };
15324
15325 if !is_asc_bool {
15326 // asc is non-boolean expression, nulls_first is boolean
15327 let mut result_args = vec![arr];
15328 if let Some(asc) = asc_arg {
15329 result_args.push(asc);
15330 }
15331 if let Some(nf) = nulls_first_sql {
15332 result_args.push(nf);
15333 }
15334 Ok(Expression::Function(Box::new(Function::new(
15335 "LIST_SORT".to_string(),
15336 result_args,
15337 ))))
15338 } else {
15339 if !descending && !nulls_are_first {
15340 // ASC, NULLS LAST (default) -> LIST_SORT(arr)
15341 Ok(Expression::Function(Box::new(
15342 Function::new(
15343 "LIST_SORT".to_string(),
15344 vec![arr],
15345 ),
15346 )))
15347 } else if descending && !nulls_are_first {
15348 // DESC, NULLS LAST -> ARRAY_REVERSE_SORT(arr)
15349 Ok(Expression::Function(Box::new(
15350 Function::new(
15351 "ARRAY_REVERSE_SORT".to_string(),
15352 vec![arr],
15353 ),
15354 )))
15355 } else {
15356 // NULLS FIRST -> LIST_SORT(arr, 'ASC'/'DESC', 'NULLS FIRST')
15357 let order_str =
15358 if descending { "DESC" } else { "ASC" };
15359 Ok(Expression::Function(Box::new(
15360 Function::new(
15361 "LIST_SORT".to_string(),
15362 vec![
15363 arr,
15364 Expression::string(order_str),
15365 Expression::string("NULLS FIRST"),
15366 ],
15367 ),
15368 )))
15369 }
15370 }
15371 }
15372 }
15373 DialectType::DuckDB => {
15374 // Non-Snowflake source: ARRAY_SORT(x, lambda) -> ARRAY_SORT(x) (drop comparator)
15375 let mut args = f.args;
15376 args.truncate(1); // Drop lambda comparator for DuckDB
15377 Ok(Expression::Function(Box::new(Function::new(
15378 "ARRAY_SORT".to_string(),
15379 args,
15380 ))))
15381 }
15382 _ => Ok(Expression::Function(f)),
15383 }
15384 }
15385 // SORT_ARRAY(x) -> LIST_SORT(x) for DuckDB, ARRAY_SORT(x) for Presto/Trino, keep for Hive/Spark
15386 "SORT_ARRAY" if f.args.len() == 1 => match target {
15387 DialectType::Hive
15388 | DialectType::Spark
15389 | DialectType::Databricks => Ok(Expression::Function(f)),
15390 DialectType::DuckDB => Ok(Expression::Function(Box::new(
15391 Function::new("LIST_SORT".to_string(), f.args),
15392 ))),
15393 _ => Ok(Expression::Function(Box::new(Function::new(
15394 "ARRAY_SORT".to_string(),
15395 f.args,
15396 )))),
15397 },
15398 // SORT_ARRAY(x, FALSE) -> ARRAY_REVERSE_SORT(x) for DuckDB, ARRAY_SORT(x, lambda) for Presto
15399 "SORT_ARRAY" if f.args.len() == 2 => {
15400 let is_desc =
15401 matches!(&f.args[1], Expression::Boolean(b) if !b.value);
15402 if is_desc {
15403 match target {
15404 DialectType::DuckDB => {
15405 Ok(Expression::Function(Box::new(Function::new(
15406 "ARRAY_REVERSE_SORT".to_string(),
15407 vec![f.args.into_iter().next().unwrap()],
15408 ))))
15409 }
15410 DialectType::Presto | DialectType::Trino => {
15411 let arr_arg = f.args.into_iter().next().unwrap();
15412 let a = Expression::Column(Box::new(
15413 crate::expressions::Column {
15414 name: crate::expressions::Identifier::new("a"),
15415 table: None,
15416 join_mark: false,
15417 trailing_comments: Vec::new(),
15418 span: None,
15419 inferred_type: None,
15420 },
15421 ));
15422 let b = Expression::Column(Box::new(
15423 crate::expressions::Column {
15424 name: crate::expressions::Identifier::new("b"),
15425 table: None,
15426 join_mark: false,
15427 trailing_comments: Vec::new(),
15428 span: None,
15429 inferred_type: None,
15430 },
15431 ));
15432 let case_expr = Expression::Case(Box::new(
15433 crate::expressions::Case {
15434 operand: None,
15435 whens: vec![
15436 (
15437 Expression::Lt(Box::new(
15438 BinaryOp::new(a.clone(), b.clone()),
15439 )),
15440 Expression::Literal(Box::new(
15441 Literal::Number("1".to_string()),
15442 )),
15443 ),
15444 (
15445 Expression::Gt(Box::new(
15446 BinaryOp::new(a.clone(), b.clone()),
15447 )),
15448 Expression::Literal(Box::new(
15449 Literal::Number("-1".to_string()),
15450 )),
15451 ),
15452 ],
15453 else_: Some(Expression::Literal(Box::new(
15454 Literal::Number("0".to_string()),
15455 ))),
15456 comments: Vec::new(),
15457 inferred_type: None,
15458 },
15459 ));
15460 let lambda = Expression::Lambda(Box::new(
15461 crate::expressions::LambdaExpr {
15462 parameters: vec![
15463 crate::expressions::Identifier::new("a"),
15464 crate::expressions::Identifier::new("b"),
15465 ],
15466 body: case_expr,
15467 colon: false,
15468 parameter_types: Vec::new(),
15469 },
15470 ));
15471 Ok(Expression::Function(Box::new(Function::new(
15472 "ARRAY_SORT".to_string(),
15473 vec![arr_arg, lambda],
15474 ))))
15475 }
15476 _ => Ok(Expression::Function(f)),
15477 }
15478 } else {
15479 // SORT_ARRAY(x, TRUE) -> LIST_SORT(x) for DuckDB, ARRAY_SORT(x) for others
15480 match target {
15481 DialectType::Hive => Ok(Expression::Function(f)),
15482 DialectType::DuckDB => {
15483 Ok(Expression::Function(Box::new(Function::new(
15484 "LIST_SORT".to_string(),
15485 vec![f.args.into_iter().next().unwrap()],
15486 ))))
15487 }
15488 _ => Ok(Expression::Function(Box::new(Function::new(
15489 "ARRAY_SORT".to_string(),
15490 vec![f.args.into_iter().next().unwrap()],
15491 )))),
15492 }
15493 }
15494 }
15495 // LEFT(x, n), RIGHT(x, n) -> SUBSTRING for targets without LEFT/RIGHT
15496 "LEFT" if f.args.len() == 2 => {
15497 match target {
15498 DialectType::Hive
15499 | DialectType::Presto
15500 | DialectType::Trino
15501 | DialectType::Athena => {
15502 let x = f.args[0].clone();
15503 let n = f.args[1].clone();
15504 Ok(Expression::Function(Box::new(Function::new(
15505 "SUBSTRING".to_string(),
15506 vec![x, Expression::number(1), n],
15507 ))))
15508 }
15509 DialectType::Spark | DialectType::Databricks
15510 if matches!(
15511 source,
15512 DialectType::TSQL | DialectType::Fabric
15513 ) =>
15514 {
15515 // TSQL LEFT(x, n) -> LEFT(CAST(x AS STRING), n) for Spark
15516 let x = f.args[0].clone();
15517 let n = f.args[1].clone();
15518 let cast_x = Expression::Cast(Box::new(Cast {
15519 this: x,
15520 to: DataType::VarChar {
15521 length: None,
15522 parenthesized_length: false,
15523 },
15524 double_colon_syntax: false,
15525 trailing_comments: Vec::new(),
15526 format: None,
15527 default: None,
15528 inferred_type: None,
15529 }));
15530 Ok(Expression::Function(Box::new(Function::new(
15531 "LEFT".to_string(),
15532 vec![cast_x, n],
15533 ))))
15534 }
15535 _ => Ok(Expression::Function(f)),
15536 }
15537 }
15538 "RIGHT" if f.args.len() == 2 => {
15539 match target {
15540 DialectType::Hive
15541 | DialectType::Presto
15542 | DialectType::Trino
15543 | DialectType::Athena => {
15544 let x = f.args[0].clone();
15545 let n = f.args[1].clone();
15546 // SUBSTRING(x, LENGTH(x) - (n - 1))
15547 let len_x = Expression::Function(Box::new(Function::new(
15548 "LENGTH".to_string(),
15549 vec![x.clone()],
15550 )));
15551 let n_minus_1 = Expression::Sub(Box::new(
15552 crate::expressions::BinaryOp::new(
15553 n,
15554 Expression::number(1),
15555 ),
15556 ));
15557 let n_minus_1_paren = Expression::Paren(Box::new(
15558 crate::expressions::Paren {
15559 this: n_minus_1,
15560 trailing_comments: Vec::new(),
15561 },
15562 ));
15563 let offset = Expression::Sub(Box::new(
15564 crate::expressions::BinaryOp::new(
15565 len_x,
15566 n_minus_1_paren,
15567 ),
15568 ));
15569 Ok(Expression::Function(Box::new(Function::new(
15570 "SUBSTRING".to_string(),
15571 vec![x, offset],
15572 ))))
15573 }
15574 DialectType::Spark | DialectType::Databricks
15575 if matches!(
15576 source,
15577 DialectType::TSQL | DialectType::Fabric
15578 ) =>
15579 {
15580 // TSQL RIGHT(x, n) -> RIGHT(CAST(x AS STRING), n) for Spark
15581 let x = f.args[0].clone();
15582 let n = f.args[1].clone();
15583 let cast_x = Expression::Cast(Box::new(Cast {
15584 this: x,
15585 to: DataType::VarChar {
15586 length: None,
15587 parenthesized_length: false,
15588 },
15589 double_colon_syntax: false,
15590 trailing_comments: Vec::new(),
15591 format: None,
15592 default: None,
15593 inferred_type: None,
15594 }));
15595 Ok(Expression::Function(Box::new(Function::new(
15596 "RIGHT".to_string(),
15597 vec![cast_x, n],
15598 ))))
15599 }
15600 _ => Ok(Expression::Function(f)),
15601 }
15602 }
15603 // MAP_FROM_ARRAYS(keys, vals) -> target-specific map construction
15604 "MAP_FROM_ARRAYS" if f.args.len() == 2 => match target {
15605 DialectType::Snowflake => Ok(Expression::Function(Box::new(
15606 Function::new("OBJECT_CONSTRUCT".to_string(), f.args),
15607 ))),
15608 DialectType::Spark | DialectType::Databricks => {
15609 Ok(Expression::Function(Box::new(Function::new(
15610 "MAP_FROM_ARRAYS".to_string(),
15611 f.args,
15612 ))))
15613 }
15614 _ => Ok(Expression::Function(Box::new(Function::new(
15615 "MAP".to_string(),
15616 f.args,
15617 )))),
15618 },
15619 // LIKE(foo, 'pat') -> foo LIKE 'pat'; LIKE(foo, 'pat', '!') -> foo LIKE 'pat' ESCAPE '!'
15620 // SQLite uses LIKE(pattern, string[, escape]) with args in reverse order
15621 "LIKE" if f.args.len() >= 2 => {
15622 let (this, pattern) = if matches!(source, DialectType::SQLite) {
15623 // SQLite: LIKE(pattern, string) -> string LIKE pattern
15624 (f.args[1].clone(), f.args[0].clone())
15625 } else {
15626 // Standard: LIKE(string, pattern) -> string LIKE pattern
15627 (f.args[0].clone(), f.args[1].clone())
15628 };
15629 let escape = if f.args.len() >= 3 {
15630 Some(f.args[2].clone())
15631 } else {
15632 None
15633 };
15634 Ok(Expression::Like(Box::new(crate::expressions::LikeOp {
15635 left: this,
15636 right: pattern,
15637 escape,
15638 quantifier: None,
15639 inferred_type: None,
15640 })))
15641 }
15642 // ILIKE(foo, 'pat') -> foo ILIKE 'pat'
15643 "ILIKE" if f.args.len() >= 2 => {
15644 let this = f.args[0].clone();
15645 let pattern = f.args[1].clone();
15646 let escape = if f.args.len() >= 3 {
15647 Some(f.args[2].clone())
15648 } else {
15649 None
15650 };
15651 Ok(Expression::ILike(Box::new(crate::expressions::LikeOp {
15652 left: this,
15653 right: pattern,
15654 escape,
15655 quantifier: None,
15656 inferred_type: None,
15657 })))
15658 }
15659 // CHAR(n) -> CHR(n) for non-MySQL/non-TSQL targets
15660 "CHAR" if f.args.len() == 1 => match target {
15661 DialectType::MySQL
15662 | DialectType::SingleStore
15663 | DialectType::TSQL => Ok(Expression::Function(f)),
15664 _ => Ok(Expression::Function(Box::new(Function::new(
15665 "CHR".to_string(),
15666 f.args,
15667 )))),
15668 },
15669 // CONCAT(a, b) -> a || b for PostgreSQL
15670 "CONCAT"
15671 if f.args.len() == 2
15672 && matches!(target, DialectType::PostgreSQL)
15673 && matches!(
15674 source,
15675 DialectType::ClickHouse | DialectType::MySQL
15676 ) =>
15677 {
15678 let mut args = f.args;
15679 let right = args.pop().unwrap();
15680 let left = args.pop().unwrap();
15681 Ok(Expression::DPipe(Box::new(crate::expressions::DPipe {
15682 this: Box::new(left),
15683 expression: Box::new(right),
15684 safe: None,
15685 })))
15686 }
15687 // ARRAY_TO_STRING(arr, delim) -> target-specific
15688 "ARRAY_TO_STRING"
15689 if f.args.len() == 2
15690 && matches!(target, DialectType::DuckDB)
15691 && matches!(source, DialectType::Snowflake) =>
15692 {
15693 let mut args = f.args;
15694 let arr = args.remove(0);
15695 let sep = args.remove(0);
15696 // sep IS NULL
15697 let sep_is_null = Expression::IsNull(Box::new(IsNull {
15698 this: sep.clone(),
15699 not: false,
15700 postfix_form: false,
15701 }));
15702 // COALESCE(CAST(x AS TEXT), '')
15703 let cast_x = Expression::Cast(Box::new(Cast {
15704 this: Expression::Identifier(Identifier::new("x")),
15705 to: DataType::Text,
15706 trailing_comments: Vec::new(),
15707 double_colon_syntax: false,
15708 format: None,
15709 default: None,
15710 inferred_type: None,
15711 }));
15712 let coalesce = Expression::Coalesce(Box::new(
15713 crate::expressions::VarArgFunc {
15714 original_name: None,
15715 expressions: vec![
15716 cast_x,
15717 Expression::Literal(Box::new(Literal::String(
15718 String::new(),
15719 ))),
15720 ],
15721 inferred_type: None,
15722 },
15723 ));
15724 let lambda =
15725 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
15726 parameters: vec![Identifier::new("x")],
15727 body: coalesce,
15728 colon: false,
15729 parameter_types: Vec::new(),
15730 }));
15731 let list_transform = Expression::Function(Box::new(Function::new(
15732 "LIST_TRANSFORM".to_string(),
15733 vec![arr, lambda],
15734 )));
15735 let array_to_string =
15736 Expression::Function(Box::new(Function::new(
15737 "ARRAY_TO_STRING".to_string(),
15738 vec![list_transform, sep],
15739 )));
15740 Ok(Expression::Case(Box::new(Case {
15741 operand: None,
15742 whens: vec![(sep_is_null, Expression::Null(Null))],
15743 else_: Some(array_to_string),
15744 comments: Vec::new(),
15745 inferred_type: None,
15746 })))
15747 }
15748 "ARRAY_TO_STRING" if f.args.len() >= 2 => match target {
15749 DialectType::Presto | DialectType::Trino => {
15750 Ok(Expression::Function(Box::new(Function::new(
15751 "ARRAY_JOIN".to_string(),
15752 f.args,
15753 ))))
15754 }
15755 DialectType::TSQL => Ok(Expression::Function(Box::new(
15756 Function::new("STRING_AGG".to_string(), f.args),
15757 ))),
15758 _ => Ok(Expression::Function(f)),
15759 },
15760 // ARRAY_CONCAT / LIST_CONCAT -> target-specific
15761 "ARRAY_CONCAT" | "LIST_CONCAT" if f.args.len() == 2 => match target {
15762 DialectType::Spark
15763 | DialectType::Databricks
15764 | DialectType::Hive => Ok(Expression::Function(Box::new(
15765 Function::new("CONCAT".to_string(), f.args),
15766 ))),
15767 DialectType::Snowflake => Ok(Expression::Function(Box::new(
15768 Function::new("ARRAY_CAT".to_string(), f.args),
15769 ))),
15770 DialectType::Redshift => Ok(Expression::Function(Box::new(
15771 Function::new("ARRAY_CONCAT".to_string(), f.args),
15772 ))),
15773 DialectType::PostgreSQL => Ok(Expression::Function(Box::new(
15774 Function::new("ARRAY_CAT".to_string(), f.args),
15775 ))),
15776 DialectType::DuckDB => Ok(Expression::Function(Box::new(
15777 Function::new("LIST_CONCAT".to_string(), f.args),
15778 ))),
15779 DialectType::Presto | DialectType::Trino => {
15780 Ok(Expression::Function(Box::new(Function::new(
15781 "CONCAT".to_string(),
15782 f.args,
15783 ))))
15784 }
15785 DialectType::BigQuery => Ok(Expression::Function(Box::new(
15786 Function::new("ARRAY_CONCAT".to_string(), f.args),
15787 ))),
15788 _ => Ok(Expression::Function(f)),
15789 },
15790 // ARRAY_CONTAINS(arr, x) / HAS(arr, x) / CONTAINS(arr, x) normalization
15791 "HAS" if f.args.len() == 2 => match target {
15792 DialectType::Spark
15793 | DialectType::Databricks
15794 | DialectType::Hive => Ok(Expression::Function(Box::new(
15795 Function::new("ARRAY_CONTAINS".to_string(), f.args),
15796 ))),
15797 DialectType::Presto | DialectType::Trino => {
15798 Ok(Expression::Function(Box::new(Function::new(
15799 "CONTAINS".to_string(),
15800 f.args,
15801 ))))
15802 }
15803 _ => Ok(Expression::Function(f)),
15804 },
15805 // NVL(a, b, c, d) -> COALESCE(a, b, c, d) - NVL should keep all args
15806 "NVL" if f.args.len() > 2 => Ok(Expression::Function(Box::new(
15807 Function::new("COALESCE".to_string(), f.args),
15808 ))),
15809 // ISNULL(x) in MySQL -> (x IS NULL)
15810 "ISNULL"
15811 if f.args.len() == 1
15812 && matches!(source, DialectType::MySQL)
15813 && matches!(target, DialectType::MySQL) =>
15814 {
15815 let arg = f.args.into_iter().next().unwrap();
15816 Ok(Expression::Paren(Box::new(crate::expressions::Paren {
15817 this: Expression::IsNull(Box::new(
15818 crate::expressions::IsNull {
15819 this: arg,
15820 not: false,
15821 postfix_form: false,
15822 },
15823 )),
15824 trailing_comments: Vec::new(),
15825 })))
15826 }
15827 // MONTHNAME(x) -> DATE_FORMAT(x, '%M') for MySQL -> MySQL
15828 "MONTHNAME"
15829 if f.args.len() == 1 && matches!(target, DialectType::MySQL) =>
15830 {
15831 let arg = f.args.into_iter().next().unwrap();
15832 Ok(Expression::Function(Box::new(Function::new(
15833 "DATE_FORMAT".to_string(),
15834 vec![arg, Expression::string("%M")],
15835 ))))
15836 }
15837 // ClickHouse splitByString('s', x) -> DuckDB STR_SPLIT(x, 's') / Hive SPLIT(x, CONCAT('\\Q', 's', '\\E'))
15838 "SPLITBYSTRING" if f.args.len() == 2 => {
15839 let sep = f.args[0].clone();
15840 let str_arg = f.args[1].clone();
15841 match target {
15842 DialectType::DuckDB => Ok(Expression::Function(Box::new(
15843 Function::new("STR_SPLIT".to_string(), vec![str_arg, sep]),
15844 ))),
15845 DialectType::Doris => {
15846 Ok(Expression::Function(Box::new(Function::new(
15847 "SPLIT_BY_STRING".to_string(),
15848 vec![str_arg, sep],
15849 ))))
15850 }
15851 DialectType::Hive
15852 | DialectType::Spark
15853 | DialectType::Databricks => {
15854 // SPLIT(x, CONCAT('\\Q', sep, '\\E'))
15855 let escaped =
15856 Expression::Function(Box::new(Function::new(
15857 "CONCAT".to_string(),
15858 vec![
15859 Expression::string("\\Q"),
15860 sep,
15861 Expression::string("\\E"),
15862 ],
15863 )));
15864 Ok(Expression::Function(Box::new(Function::new(
15865 "SPLIT".to_string(),
15866 vec![str_arg, escaped],
15867 ))))
15868 }
15869 _ => Ok(Expression::Function(f)),
15870 }
15871 }
15872 // ClickHouse splitByRegexp('pattern', x) -> DuckDB STR_SPLIT_REGEX(x, 'pattern')
15873 "SPLITBYREGEXP" if f.args.len() == 2 => {
15874 let sep = f.args[0].clone();
15875 let str_arg = f.args[1].clone();
15876 match target {
15877 DialectType::DuckDB => {
15878 Ok(Expression::Function(Box::new(Function::new(
15879 "STR_SPLIT_REGEX".to_string(),
15880 vec![str_arg, sep],
15881 ))))
15882 }
15883 DialectType::Hive
15884 | DialectType::Spark
15885 | DialectType::Databricks => {
15886 Ok(Expression::Function(Box::new(Function::new(
15887 "SPLIT".to_string(),
15888 vec![str_arg, sep],
15889 ))))
15890 }
15891 _ => Ok(Expression::Function(f)),
15892 }
15893 }
15894 // ClickHouse toMonday(x) -> DATE_TRUNC('WEEK', x) / DATE_TRUNC(x, 'WEEK') for Doris
15895 "TOMONDAY" => {
15896 if f.args.len() == 1 {
15897 let arg = f.args.into_iter().next().unwrap();
15898 match target {
15899 DialectType::Doris => {
15900 Ok(Expression::Function(Box::new(Function::new(
15901 "DATE_TRUNC".to_string(),
15902 vec![arg, Expression::string("WEEK")],
15903 ))))
15904 }
15905 _ => Ok(Expression::Function(Box::new(Function::new(
15906 "DATE_TRUNC".to_string(),
15907 vec![Expression::string("WEEK"), arg],
15908 )))),
15909 }
15910 } else {
15911 Ok(Expression::Function(f))
15912 }
15913 }
15914 // COLLECT_LIST with FILTER(WHERE x IS NOT NULL) for targets that need it
15915 "COLLECT_LIST" if f.args.len() == 1 => match target {
15916 DialectType::Spark
15917 | DialectType::Databricks
15918 | DialectType::Hive => Ok(Expression::Function(f)),
15919 _ => Ok(Expression::Function(Box::new(Function::new(
15920 "ARRAY_AGG".to_string(),
15921 f.args,
15922 )))),
15923 },
15924 // TO_CHAR(x) with 1 arg -> CAST(x AS STRING) for Doris
15925 "TO_CHAR"
15926 if f.args.len() == 1 && matches!(target, DialectType::Doris) =>
15927 {
15928 let arg = f.args.into_iter().next().unwrap();
15929 Ok(Expression::Cast(Box::new(crate::expressions::Cast {
15930 this: arg,
15931 to: DataType::Custom {
15932 name: "STRING".to_string(),
15933 },
15934 double_colon_syntax: false,
15935 trailing_comments: Vec::new(),
15936 format: None,
15937 default: None,
15938 inferred_type: None,
15939 })))
15940 }
15941 // DBMS_RANDOM.VALUE() -> RANDOM() for PostgreSQL
15942 "DBMS_RANDOM.VALUE" if f.args.is_empty() => match target {
15943 DialectType::PostgreSQL => Ok(Expression::Function(Box::new(
15944 Function::new("RANDOM".to_string(), vec![]),
15945 ))),
15946 _ => Ok(Expression::Function(f)),
15947 },
15948 // ClickHouse formatDateTime -> target-specific
15949 "FORMATDATETIME" if f.args.len() >= 2 => match target {
15950 DialectType::MySQL => Ok(Expression::Function(Box::new(
15951 Function::new("DATE_FORMAT".to_string(), f.args),
15952 ))),
15953 _ => Ok(Expression::Function(f)),
15954 },
15955 // REPLICATE('x', n) -> REPEAT('x', n) for non-TSQL targets
15956 "REPLICATE" if f.args.len() == 2 => match target {
15957 DialectType::TSQL => Ok(Expression::Function(f)),
15958 _ => Ok(Expression::Function(Box::new(Function::new(
15959 "REPEAT".to_string(),
15960 f.args,
15961 )))),
15962 },
15963 // LEN(x) -> LENGTH(x) for non-TSQL targets
15964 // No CAST needed when arg is already a string literal
15965 "LEN" if f.args.len() == 1 => {
15966 match target {
15967 DialectType::TSQL => Ok(Expression::Function(f)),
15968 DialectType::Spark | DialectType::Databricks => {
15969 let arg = f.args.into_iter().next().unwrap();
15970 // Don't wrap string literals with CAST - they're already strings
15971 let is_string = matches!(
15972 &arg,
15973 Expression::Literal(lit) if matches!(lit.as_ref(), crate::expressions::Literal::String(_))
15974 );
15975 let final_arg = if is_string {
15976 arg
15977 } else {
15978 Expression::Cast(Box::new(Cast {
15979 this: arg,
15980 to: DataType::VarChar {
15981 length: None,
15982 parenthesized_length: false,
15983 },
15984 double_colon_syntax: false,
15985 trailing_comments: Vec::new(),
15986 format: None,
15987 default: None,
15988 inferred_type: None,
15989 }))
15990 };
15991 Ok(Expression::Function(Box::new(Function::new(
15992 "LENGTH".to_string(),
15993 vec![final_arg],
15994 ))))
15995 }
15996 _ => {
15997 let arg = f.args.into_iter().next().unwrap();
15998 Ok(Expression::Function(Box::new(Function::new(
15999 "LENGTH".to_string(),
16000 vec![arg],
16001 ))))
16002 }
16003 }
16004 }
16005 // COUNT_BIG(x) -> COUNT(x) for non-TSQL targets
16006 "COUNT_BIG" if f.args.len() == 1 => match target {
16007 DialectType::TSQL => Ok(Expression::Function(f)),
16008 _ => Ok(Expression::Function(Box::new(Function::new(
16009 "COUNT".to_string(),
16010 f.args,
16011 )))),
16012 },
16013 // DATEFROMPARTS(y, m, d) -> MAKE_DATE(y, m, d) for non-TSQL targets
16014 "DATEFROMPARTS" if f.args.len() == 3 => match target {
16015 DialectType::TSQL => Ok(Expression::Function(f)),
16016 _ => Ok(Expression::Function(Box::new(Function::new(
16017 "MAKE_DATE".to_string(),
16018 f.args,
16019 )))),
16020 },
16021 // REGEXP_LIKE(str, pattern) -> RegexpLike expression (target-specific output)
16022 "REGEXP_LIKE" if f.args.len() >= 2 => {
16023 let str_expr = f.args[0].clone();
16024 let pattern = f.args[1].clone();
16025 let flags = if f.args.len() >= 3 {
16026 Some(f.args[2].clone())
16027 } else {
16028 None
16029 };
16030 match target {
16031 DialectType::DuckDB => {
16032 let mut new_args = vec![str_expr, pattern];
16033 if let Some(fl) = flags {
16034 new_args.push(fl);
16035 }
16036 Ok(Expression::Function(Box::new(Function::new(
16037 "REGEXP_MATCHES".to_string(),
16038 new_args,
16039 ))))
16040 }
16041 _ => Ok(Expression::RegexpLike(Box::new(
16042 crate::expressions::RegexpFunc {
16043 this: str_expr,
16044 pattern,
16045 flags,
16046 },
16047 ))),
16048 }
16049 }
16050 // ClickHouse arrayJoin -> UNNEST for PostgreSQL
16051 "ARRAYJOIN" if f.args.len() == 1 => match target {
16052 DialectType::PostgreSQL => Ok(Expression::Function(Box::new(
16053 Function::new("UNNEST".to_string(), f.args),
16054 ))),
16055 _ => Ok(Expression::Function(f)),
16056 },
16057 // DATETIMEFROMPARTS(y, m, d, h, mi, s, ms) -> MAKE_TIMESTAMP / TIMESTAMP_FROM_PARTS
16058 "DATETIMEFROMPARTS" if f.args.len() == 7 => {
16059 match target {
16060 DialectType::TSQL => Ok(Expression::Function(f)),
16061 DialectType::DuckDB => {
16062 // MAKE_TIMESTAMP(y, m, d, h, mi, s + (ms / 1000.0))
16063 let mut args = f.args;
16064 let ms = args.pop().unwrap();
16065 let s = args.pop().unwrap();
16066 // s + (ms / 1000.0)
16067 let ms_frac = Expression::Div(Box::new(BinaryOp::new(
16068 ms,
16069 Expression::Literal(Box::new(
16070 crate::expressions::Literal::Number(
16071 "1000.0".to_string(),
16072 ),
16073 )),
16074 )));
16075 let s_with_ms = Expression::Add(Box::new(BinaryOp::new(
16076 s,
16077 Expression::Paren(Box::new(Paren {
16078 this: ms_frac,
16079 trailing_comments: vec![],
16080 })),
16081 )));
16082 args.push(s_with_ms);
16083 Ok(Expression::Function(Box::new(Function::new(
16084 "MAKE_TIMESTAMP".to_string(),
16085 args,
16086 ))))
16087 }
16088 DialectType::Snowflake => {
16089 // TIMESTAMP_FROM_PARTS(y, m, d, h, mi, s, ms * 1000000)
16090 let mut args = f.args;
16091 let ms = args.pop().unwrap();
16092 // ms * 1000000
16093 let ns = Expression::Mul(Box::new(BinaryOp::new(
16094 ms,
16095 Expression::number(1000000),
16096 )));
16097 args.push(ns);
16098 Ok(Expression::Function(Box::new(Function::new(
16099 "TIMESTAMP_FROM_PARTS".to_string(),
16100 args,
16101 ))))
16102 }
16103 _ => {
16104 // Default: keep function name for other targets
16105 Ok(Expression::Function(Box::new(Function::new(
16106 "DATETIMEFROMPARTS".to_string(),
16107 f.args,
16108 ))))
16109 }
16110 }
16111 }
16112 // CONVERT(type, expr [, style]) -> CAST(expr AS type) for non-TSQL targets
16113 // TRY_CONVERT(type, expr [, style]) -> TRY_CAST(expr AS type) for non-TSQL targets
16114 "CONVERT" | "TRY_CONVERT" if f.args.len() >= 2 => {
16115 let is_try = name == "TRY_CONVERT";
16116 let type_expr = f.args[0].clone();
16117 let value_expr = f.args[1].clone();
16118 let style = if f.args.len() >= 3 {
16119 Some(&f.args[2])
16120 } else {
16121 None
16122 };
16123
16124 // For TSQL->TSQL, normalize types and preserve CONVERT/TRY_CONVERT
16125 if matches!(target, DialectType::TSQL) {
16126 let normalized_type = match &type_expr {
16127 Expression::DataType(dt) => {
16128 let new_dt = match dt {
16129 DataType::Int { .. } => DataType::Custom {
16130 name: "INTEGER".to_string(),
16131 },
16132 _ => dt.clone(),
16133 };
16134 Expression::DataType(new_dt)
16135 }
16136 Expression::Identifier(id) => {
16137 if id.name.eq_ignore_ascii_case("INT") {
16138 Expression::Identifier(
16139 crate::expressions::Identifier::new("INTEGER"),
16140 )
16141 } else {
16142 let upper = id.name.to_ascii_uppercase();
16143 Expression::Identifier(
16144 crate::expressions::Identifier::new(upper),
16145 )
16146 }
16147 }
16148 Expression::Column(col) => {
16149 if col.name.name.eq_ignore_ascii_case("INT") {
16150 Expression::Identifier(
16151 crate::expressions::Identifier::new("INTEGER"),
16152 )
16153 } else {
16154 let upper = col.name.name.to_ascii_uppercase();
16155 Expression::Identifier(
16156 crate::expressions::Identifier::new(upper),
16157 )
16158 }
16159 }
16160 _ => type_expr.clone(),
16161 };
16162 let func_name = if is_try { "TRY_CONVERT" } else { "CONVERT" };
16163 let mut new_args = vec![normalized_type, value_expr];
16164 if let Some(s) = style {
16165 new_args.push(s.clone());
16166 }
16167 return Ok(Expression::Function(Box::new(Function::new(
16168 func_name.to_string(),
16169 new_args,
16170 ))));
16171 }
16172
16173 // For other targets: CONVERT(type, expr) -> CAST(expr AS type)
16174 fn expr_to_datatype(e: &Expression) -> Option<DataType> {
16175 match e {
16176 Expression::DataType(dt) => {
16177 // Convert NVARCHAR/NCHAR Custom types to standard VarChar/Char
16178 match dt {
16179 DataType::Custom { name }
16180 if name.starts_with("NVARCHAR(")
16181 || name.starts_with("NCHAR(") =>
16182 {
16183 // Extract the length from "NVARCHAR(200)" or "NCHAR(40)"
16184 let inner = &name[name.find('(').unwrap() + 1
16185 ..name.len() - 1];
16186 if inner.eq_ignore_ascii_case("MAX") {
16187 Some(DataType::Text)
16188 } else if let Ok(len) = inner.parse::<u32>() {
16189 if name.starts_with("NCHAR") {
16190 Some(DataType::Char {
16191 length: Some(len),
16192 })
16193 } else {
16194 Some(DataType::VarChar {
16195 length: Some(len),
16196 parenthesized_length: false,
16197 })
16198 }
16199 } else {
16200 Some(dt.clone())
16201 }
16202 }
16203 DataType::Custom { name } if name == "NVARCHAR" => {
16204 Some(DataType::VarChar {
16205 length: None,
16206 parenthesized_length: false,
16207 })
16208 }
16209 DataType::Custom { name } if name == "NCHAR" => {
16210 Some(DataType::Char { length: None })
16211 }
16212 DataType::Custom { name }
16213 if name == "NVARCHAR(MAX)"
16214 || name == "VARCHAR(MAX)" =>
16215 {
16216 Some(DataType::Text)
16217 }
16218 _ => Some(dt.clone()),
16219 }
16220 }
16221 Expression::Identifier(id) => {
16222 let name = id.name.to_ascii_uppercase();
16223 match name.as_str() {
16224 "INT" | "INTEGER" => Some(DataType::Int {
16225 length: None,
16226 integer_spelling: false,
16227 }),
16228 "BIGINT" => Some(DataType::BigInt { length: None }),
16229 "SMALLINT" => {
16230 Some(DataType::SmallInt { length: None })
16231 }
16232 "TINYINT" => {
16233 Some(DataType::TinyInt { length: None })
16234 }
16235 "FLOAT" => Some(DataType::Float {
16236 precision: None,
16237 scale: None,
16238 real_spelling: false,
16239 }),
16240 "REAL" => Some(DataType::Float {
16241 precision: None,
16242 scale: None,
16243 real_spelling: true,
16244 }),
16245 "DATETIME" | "DATETIME2" => {
16246 Some(DataType::Timestamp {
16247 timezone: false,
16248 precision: None,
16249 })
16250 }
16251 "DATE" => Some(DataType::Date),
16252 "BIT" => Some(DataType::Boolean),
16253 "TEXT" => Some(DataType::Text),
16254 "NUMERIC" => Some(DataType::Decimal {
16255 precision: None,
16256 scale: None,
16257 }),
16258 "MONEY" => Some(DataType::Decimal {
16259 precision: Some(15),
16260 scale: Some(4),
16261 }),
16262 "SMALLMONEY" => Some(DataType::Decimal {
16263 precision: Some(6),
16264 scale: Some(4),
16265 }),
16266 "VARCHAR" => Some(DataType::VarChar {
16267 length: None,
16268 parenthesized_length: false,
16269 }),
16270 "NVARCHAR" => Some(DataType::VarChar {
16271 length: None,
16272 parenthesized_length: false,
16273 }),
16274 "CHAR" => Some(DataType::Char { length: None }),
16275 "NCHAR" => Some(DataType::Char { length: None }),
16276 _ => Some(DataType::Custom { name }),
16277 }
16278 }
16279 Expression::Column(col) => {
16280 let name = col.name.name.to_ascii_uppercase();
16281 match name.as_str() {
16282 "INT" | "INTEGER" => Some(DataType::Int {
16283 length: None,
16284 integer_spelling: false,
16285 }),
16286 "BIGINT" => Some(DataType::BigInt { length: None }),
16287 "FLOAT" => Some(DataType::Float {
16288 precision: None,
16289 scale: None,
16290 real_spelling: false,
16291 }),
16292 "DATETIME" | "DATETIME2" => {
16293 Some(DataType::Timestamp {
16294 timezone: false,
16295 precision: None,
16296 })
16297 }
16298 "DATE" => Some(DataType::Date),
16299 "NUMERIC" => Some(DataType::Decimal {
16300 precision: None,
16301 scale: None,
16302 }),
16303 "VARCHAR" => Some(DataType::VarChar {
16304 length: None,
16305 parenthesized_length: false,
16306 }),
16307 "NVARCHAR" => Some(DataType::VarChar {
16308 length: None,
16309 parenthesized_length: false,
16310 }),
16311 "CHAR" => Some(DataType::Char { length: None }),
16312 "NCHAR" => Some(DataType::Char { length: None }),
16313 _ => Some(DataType::Custom { name }),
16314 }
16315 }
16316 // NVARCHAR(200) parsed as Function("NVARCHAR", [200])
16317 Expression::Function(f) => {
16318 let fname = f.name.to_ascii_uppercase();
16319 match fname.as_str() {
16320 "VARCHAR" | "NVARCHAR" => {
16321 let len = f.args.first().and_then(|a| {
16322 if let Expression::Literal(lit) = a
16323 {
16324 if let crate::expressions::Literal::Number(n) = lit.as_ref() {
16325 n.parse::<u32>().ok()
16326 } else { None }
16327 } else if let Expression::Identifier(id) = a
16328 {
16329 if id.name.eq_ignore_ascii_case("MAX") {
16330 None
16331 } else {
16332 None
16333 }
16334 } else {
16335 None
16336 }
16337 });
16338 // Check for VARCHAR(MAX) -> TEXT
16339 let is_max = f.args.first().map_or(false, |a| {
16340 matches!(a, Expression::Identifier(id) if id.name.eq_ignore_ascii_case("MAX"))
16341 || matches!(a, Expression::Column(col) if col.name.name.eq_ignore_ascii_case("MAX"))
16342 });
16343 if is_max {
16344 Some(DataType::Text)
16345 } else {
16346 Some(DataType::VarChar {
16347 length: len,
16348 parenthesized_length: false,
16349 })
16350 }
16351 }
16352 "NCHAR" | "CHAR" => {
16353 let len = f.args.first().and_then(|a| {
16354 if let Expression::Literal(lit) = a
16355 {
16356 if let crate::expressions::Literal::Number(n) = lit.as_ref() {
16357 n.parse::<u32>().ok()
16358 } else { None }
16359 } else {
16360 None
16361 }
16362 });
16363 Some(DataType::Char { length: len })
16364 }
16365 "NUMERIC" | "DECIMAL" => {
16366 let precision = f.args.first().and_then(|a| {
16367 if let Expression::Literal(lit) = a
16368 {
16369 if let crate::expressions::Literal::Number(n) = lit.as_ref() {
16370 n.parse::<u32>().ok()
16371 } else { None }
16372 } else {
16373 None
16374 }
16375 });
16376 let scale = f.args.get(1).and_then(|a| {
16377 if let Expression::Literal(lit) = a
16378 {
16379 if let crate::expressions::Literal::Number(n) = lit.as_ref() {
16380 n.parse::<u32>().ok()
16381 } else { None }
16382 } else {
16383 None
16384 }
16385 });
16386 Some(DataType::Decimal { precision, scale })
16387 }
16388 _ => None,
16389 }
16390 }
16391 _ => None,
16392 }
16393 }
16394
16395 if let Some(mut dt) = expr_to_datatype(&type_expr) {
16396 // For TSQL source: VARCHAR/CHAR without length defaults to 30
16397 let is_tsql_source =
16398 matches!(source, DialectType::TSQL | DialectType::Fabric);
16399 if is_tsql_source {
16400 match &dt {
16401 DataType::VarChar { length: None, .. } => {
16402 dt = DataType::VarChar {
16403 length: Some(30),
16404 parenthesized_length: false,
16405 };
16406 }
16407 DataType::Char { length: None } => {
16408 dt = DataType::Char { length: Some(30) };
16409 }
16410 _ => {}
16411 }
16412 }
16413
16414 // Determine if this is a string type
16415 let is_string_type = matches!(
16416 dt,
16417 DataType::VarChar { .. }
16418 | DataType::Char { .. }
16419 | DataType::Text
16420 ) || matches!(&dt, DataType::Custom { name } if name == "NVARCHAR" || name == "NCHAR"
16421 || name.starts_with("NVARCHAR(") || name.starts_with("NCHAR(")
16422 || name.starts_with("VARCHAR(") || name == "VARCHAR"
16423 || name == "STRING");
16424
16425 // Determine if this is a date/time type
16426 let is_datetime_type = matches!(
16427 dt,
16428 DataType::Timestamp { .. } | DataType::Date
16429 ) || matches!(&dt, DataType::Custom { name } if name == "DATETIME"
16430 || name == "DATETIME2" || name == "SMALLDATETIME");
16431
16432 // Check for date conversion with style
16433 if style.is_some() {
16434 let style_num = style.and_then(|s| {
16435 if let Expression::Literal(lit) = s {
16436 if let crate::expressions::Literal::Number(n) =
16437 lit.as_ref()
16438 {
16439 n.parse::<u32>().ok()
16440 } else {
16441 None
16442 }
16443 } else {
16444 None
16445 }
16446 });
16447
16448 // TSQL CONVERT date styles (Java format)
16449 let format_str = style_num.and_then(|n| match n {
16450 101 => Some("MM/dd/yyyy"),
16451 102 => Some("yyyy.MM.dd"),
16452 103 => Some("dd/MM/yyyy"),
16453 104 => Some("dd.MM.yyyy"),
16454 105 => Some("dd-MM-yyyy"),
16455 108 => Some("HH:mm:ss"),
16456 110 => Some("MM-dd-yyyy"),
16457 112 => Some("yyyyMMdd"),
16458 120 | 20 => Some("yyyy-MM-dd HH:mm:ss"),
16459 121 | 21 => Some("yyyy-MM-dd HH:mm:ss.SSSSSS"),
16460 126 | 127 => Some("yyyy-MM-dd'T'HH:mm:ss.SSS"),
16461 _ => None,
16462 });
16463
16464 // Non-string, non-datetime types with style: just CAST, ignore the style
16465 if !is_string_type && !is_datetime_type {
16466 let cast_expr = if is_try {
16467 Expression::TryCast(Box::new(
16468 crate::expressions::Cast {
16469 this: value_expr,
16470 to: dt,
16471 trailing_comments: Vec::new(),
16472 double_colon_syntax: false,
16473 format: None,
16474 default: None,
16475 inferred_type: None,
16476 },
16477 ))
16478 } else {
16479 Expression::Cast(Box::new(
16480 crate::expressions::Cast {
16481 this: value_expr,
16482 to: dt,
16483 trailing_comments: Vec::new(),
16484 double_colon_syntax: false,
16485 format: None,
16486 default: None,
16487 inferred_type: None,
16488 },
16489 ))
16490 };
16491 return Ok(cast_expr);
16492 }
16493
16494 if let Some(java_fmt) = format_str {
16495 let c_fmt = java_fmt
16496 .replace("yyyy", "%Y")
16497 .replace("MM", "%m")
16498 .replace("dd", "%d")
16499 .replace("HH", "%H")
16500 .replace("mm", "%M")
16501 .replace("ss", "%S")
16502 .replace("SSSSSS", "%f")
16503 .replace("SSS", "%f")
16504 .replace("'T'", "T");
16505
16506 // For datetime target types: style is the INPUT format for parsing strings -> dates
16507 if is_datetime_type {
16508 match target {
16509 DialectType::DuckDB => {
16510 return Ok(Expression::Function(Box::new(
16511 Function::new(
16512 "STRPTIME".to_string(),
16513 vec![
16514 value_expr,
16515 Expression::string(&c_fmt),
16516 ],
16517 ),
16518 )));
16519 }
16520 DialectType::Spark
16521 | DialectType::Databricks => {
16522 // CONVERT(DATETIME, x, style) -> TO_TIMESTAMP(x, fmt)
16523 // CONVERT(DATE, x, style) -> TO_DATE(x, fmt)
16524 let func_name =
16525 if matches!(dt, DataType::Date) {
16526 "TO_DATE"
16527 } else {
16528 "TO_TIMESTAMP"
16529 };
16530 return Ok(Expression::Function(Box::new(
16531 Function::new(
16532 func_name.to_string(),
16533 vec![
16534 value_expr,
16535 Expression::string(java_fmt),
16536 ],
16537 ),
16538 )));
16539 }
16540 DialectType::Hive => {
16541 return Ok(Expression::Function(Box::new(
16542 Function::new(
16543 "TO_TIMESTAMP".to_string(),
16544 vec![
16545 value_expr,
16546 Expression::string(java_fmt),
16547 ],
16548 ),
16549 )));
16550 }
16551 _ => {
16552 return Ok(Expression::Cast(Box::new(
16553 crate::expressions::Cast {
16554 this: value_expr,
16555 to: dt,
16556 trailing_comments: Vec::new(),
16557 double_colon_syntax: false,
16558 format: None,
16559 default: None,
16560 inferred_type: None,
16561 },
16562 )));
16563 }
16564 }
16565 }
16566
16567 // For string target types: style is the OUTPUT format for dates -> strings
16568 match target {
16569 DialectType::DuckDB => Ok(Expression::Function(
16570 Box::new(Function::new(
16571 "STRPTIME".to_string(),
16572 vec![
16573 value_expr,
16574 Expression::string(&c_fmt),
16575 ],
16576 )),
16577 )),
16578 DialectType::Spark | DialectType::Databricks => {
16579 // For string target types with style: CAST(DATE_FORMAT(x, fmt) AS type)
16580 // Determine the target string type
16581 let string_dt = match &dt {
16582 DataType::VarChar {
16583 length: Some(l),
16584 ..
16585 } => DataType::VarChar {
16586 length: Some(*l),
16587 parenthesized_length: false,
16588 },
16589 DataType::Text => DataType::Custom {
16590 name: "STRING".to_string(),
16591 },
16592 _ => DataType::Custom {
16593 name: "STRING".to_string(),
16594 },
16595 };
16596 let date_format_expr = Expression::Function(
16597 Box::new(Function::new(
16598 "DATE_FORMAT".to_string(),
16599 vec![
16600 value_expr,
16601 Expression::string(java_fmt),
16602 ],
16603 )),
16604 );
16605 let cast_expr = if is_try {
16606 Expression::TryCast(Box::new(
16607 crate::expressions::Cast {
16608 this: date_format_expr,
16609 to: string_dt,
16610 trailing_comments: Vec::new(),
16611 double_colon_syntax: false,
16612 format: None,
16613 default: None,
16614 inferred_type: None,
16615 },
16616 ))
16617 } else {
16618 Expression::Cast(Box::new(
16619 crate::expressions::Cast {
16620 this: date_format_expr,
16621 to: string_dt,
16622 trailing_comments: Vec::new(),
16623 double_colon_syntax: false,
16624 format: None,
16625 default: None,
16626 inferred_type: None,
16627 },
16628 ))
16629 };
16630 Ok(cast_expr)
16631 }
16632 DialectType::MySQL | DialectType::SingleStore => {
16633 // For MySQL: CAST(DATE_FORMAT(x, mysql_fmt) AS CHAR(n))
16634 let mysql_fmt = java_fmt
16635 .replace("yyyy", "%Y")
16636 .replace("MM", "%m")
16637 .replace("dd", "%d")
16638 .replace("HH:mm:ss.SSSSSS", "%T")
16639 .replace("HH:mm:ss", "%T")
16640 .replace("HH", "%H")
16641 .replace("mm", "%i")
16642 .replace("ss", "%S");
16643 let date_format_expr = Expression::Function(
16644 Box::new(Function::new(
16645 "DATE_FORMAT".to_string(),
16646 vec![
16647 value_expr,
16648 Expression::string(&mysql_fmt),
16649 ],
16650 )),
16651 );
16652 // MySQL uses CHAR for string casts
16653 let mysql_dt = match &dt {
16654 DataType::VarChar { length, .. } => {
16655 DataType::Char { length: *length }
16656 }
16657 _ => dt,
16658 };
16659 Ok(Expression::Cast(Box::new(
16660 crate::expressions::Cast {
16661 this: date_format_expr,
16662 to: mysql_dt,
16663 trailing_comments: Vec::new(),
16664 double_colon_syntax: false,
16665 format: None,
16666 default: None,
16667 inferred_type: None,
16668 },
16669 )))
16670 }
16671 DialectType::Hive => {
16672 let func_name = "TO_TIMESTAMP";
16673 Ok(Expression::Function(Box::new(
16674 Function::new(
16675 func_name.to_string(),
16676 vec![
16677 value_expr,
16678 Expression::string(java_fmt),
16679 ],
16680 ),
16681 )))
16682 }
16683 _ => Ok(Expression::Cast(Box::new(
16684 crate::expressions::Cast {
16685 this: value_expr,
16686 to: dt,
16687 trailing_comments: Vec::new(),
16688 double_colon_syntax: false,
16689 format: None,
16690 default: None,
16691 inferred_type: None,
16692 },
16693 ))),
16694 }
16695 } else {
16696 // Unknown style, just CAST
16697 let cast_expr = if is_try {
16698 Expression::TryCast(Box::new(
16699 crate::expressions::Cast {
16700 this: value_expr,
16701 to: dt,
16702 trailing_comments: Vec::new(),
16703 double_colon_syntax: false,
16704 format: None,
16705 default: None,
16706 inferred_type: None,
16707 },
16708 ))
16709 } else {
16710 Expression::Cast(Box::new(
16711 crate::expressions::Cast {
16712 this: value_expr,
16713 to: dt,
16714 trailing_comments: Vec::new(),
16715 double_colon_syntax: false,
16716 format: None,
16717 default: None,
16718 inferred_type: None,
16719 },
16720 ))
16721 };
16722 Ok(cast_expr)
16723 }
16724 } else {
16725 // No style - simple CAST
16726 let final_dt = if matches!(
16727 target,
16728 DialectType::MySQL | DialectType::SingleStore
16729 ) {
16730 match &dt {
16731 DataType::Int { .. }
16732 | DataType::BigInt { .. }
16733 | DataType::SmallInt { .. }
16734 | DataType::TinyInt { .. } => DataType::Custom {
16735 name: "SIGNED".to_string(),
16736 },
16737 DataType::VarChar { length, .. } => {
16738 DataType::Char { length: *length }
16739 }
16740 _ => dt,
16741 }
16742 } else {
16743 dt
16744 };
16745 let cast_expr = if is_try {
16746 Expression::TryCast(Box::new(
16747 crate::expressions::Cast {
16748 this: value_expr,
16749 to: final_dt,
16750 trailing_comments: Vec::new(),
16751 double_colon_syntax: false,
16752 format: None,
16753 default: None,
16754 inferred_type: None,
16755 },
16756 ))
16757 } else {
16758 Expression::Cast(Box::new(crate::expressions::Cast {
16759 this: value_expr,
16760 to: final_dt,
16761 trailing_comments: Vec::new(),
16762 double_colon_syntax: false,
16763 format: None,
16764 default: None,
16765 inferred_type: None,
16766 }))
16767 };
16768 Ok(cast_expr)
16769 }
16770 } else {
16771 // Can't convert type expression - keep as CONVERT/TRY_CONVERT function
16772 Ok(Expression::Function(f))
16773 }
16774 }
16775 // STRFTIME(val, fmt) from DuckDB / STRFTIME(fmt, val) from SQLite -> target-specific
16776 "STRFTIME" if f.args.len() == 2 => {
16777 // SQLite uses STRFTIME(fmt, val); DuckDB uses STRFTIME(val, fmt)
16778 let (val, fmt_expr) = if matches!(source, DialectType::SQLite) {
16779 // SQLite: args[0] = format, args[1] = value
16780 (f.args[1].clone(), &f.args[0])
16781 } else {
16782 // DuckDB and others: args[0] = value, args[1] = format
16783 (f.args[0].clone(), &f.args[1])
16784 };
16785
16786 // Helper to convert C-style format to Java-style
16787 fn c_to_java_format(fmt: &str) -> String {
16788 fmt.replace("%Y", "yyyy")
16789 .replace("%m", "MM")
16790 .replace("%d", "dd")
16791 .replace("%H", "HH")
16792 .replace("%M", "mm")
16793 .replace("%S", "ss")
16794 .replace("%f", "SSSSSS")
16795 .replace("%y", "yy")
16796 .replace("%-m", "M")
16797 .replace("%-d", "d")
16798 .replace("%-H", "H")
16799 .replace("%-I", "h")
16800 .replace("%I", "hh")
16801 .replace("%p", "a")
16802 .replace("%j", "DDD")
16803 .replace("%a", "EEE")
16804 .replace("%b", "MMM")
16805 .replace("%F", "yyyy-MM-dd")
16806 .replace("%T", "HH:mm:ss")
16807 }
16808
16809 // Helper: recursively convert format strings within expressions (handles CONCAT)
16810 fn convert_fmt_expr(
16811 expr: &Expression,
16812 converter: &dyn Fn(&str) -> String,
16813 ) -> Expression {
16814 match expr {
16815 Expression::Literal(lit)
16816 if matches!(
16817 lit.as_ref(),
16818 crate::expressions::Literal::String(_)
16819 ) =>
16820 {
16821 let crate::expressions::Literal::String(s) =
16822 lit.as_ref()
16823 else {
16824 unreachable!()
16825 };
16826 Expression::string(&converter(s))
16827 }
16828 Expression::Function(func)
16829 if func.name.eq_ignore_ascii_case("CONCAT") =>
16830 {
16831 let new_args: Vec<Expression> = func
16832 .args
16833 .iter()
16834 .map(|a| convert_fmt_expr(a, converter))
16835 .collect();
16836 Expression::Function(Box::new(Function::new(
16837 "CONCAT".to_string(),
16838 new_args,
16839 )))
16840 }
16841 other => other.clone(),
16842 }
16843 }
16844
16845 match target {
16846 DialectType::DuckDB => {
16847 if matches!(source, DialectType::SQLite) {
16848 // SQLite STRFTIME(fmt, val) -> DuckDB STRFTIME(CAST(val AS TIMESTAMP), fmt)
16849 let cast_val = Expression::Cast(Box::new(Cast {
16850 this: val,
16851 to: crate::expressions::DataType::Timestamp {
16852 precision: None,
16853 timezone: false,
16854 },
16855 trailing_comments: Vec::new(),
16856 double_colon_syntax: false,
16857 format: None,
16858 default: None,
16859 inferred_type: None,
16860 }));
16861 Ok(Expression::Function(Box::new(Function::new(
16862 "STRFTIME".to_string(),
16863 vec![cast_val, fmt_expr.clone()],
16864 ))))
16865 } else {
16866 Ok(Expression::Function(f))
16867 }
16868 }
16869 DialectType::Spark
16870 | DialectType::Databricks
16871 | DialectType::Hive => {
16872 // STRFTIME(val, fmt) -> DATE_FORMAT(val, java_fmt)
16873 let converted_fmt =
16874 convert_fmt_expr(fmt_expr, &c_to_java_format);
16875 Ok(Expression::Function(Box::new(Function::new(
16876 "DATE_FORMAT".to_string(),
16877 vec![val, converted_fmt],
16878 ))))
16879 }
16880 DialectType::TSQL | DialectType::Fabric => {
16881 // STRFTIME(val, fmt) -> FORMAT(val, java_fmt)
16882 let converted_fmt =
16883 convert_fmt_expr(fmt_expr, &c_to_java_format);
16884 Ok(Expression::Function(Box::new(Function::new(
16885 "FORMAT".to_string(),
16886 vec![val, converted_fmt],
16887 ))))
16888 }
16889 DialectType::Presto
16890 | DialectType::Trino
16891 | DialectType::Athena => {
16892 // STRFTIME(val, fmt) -> DATE_FORMAT(val, presto_fmt) (convert DuckDB format to Presto)
16893 if let Expression::Literal(lit) = fmt_expr {
16894 if let crate::expressions::Literal::String(s) =
16895 lit.as_ref()
16896 {
16897 let presto_fmt = duckdb_to_presto_format(s);
16898 Ok(Expression::Function(Box::new(Function::new(
16899 "DATE_FORMAT".to_string(),
16900 vec![val, Expression::string(&presto_fmt)],
16901 ))))
16902 } else {
16903 Ok(Expression::Function(Box::new(Function::new(
16904 "DATE_FORMAT".to_string(),
16905 vec![val, fmt_expr.clone()],
16906 ))))
16907 }
16908 } else {
16909 Ok(Expression::Function(Box::new(Function::new(
16910 "DATE_FORMAT".to_string(),
16911 vec![val, fmt_expr.clone()],
16912 ))))
16913 }
16914 }
16915 DialectType::BigQuery => {
16916 // STRFTIME(val, fmt) -> FORMAT_DATE(bq_fmt, val) - note reversed arg order
16917 if let Expression::Literal(lit) = fmt_expr {
16918 if let crate::expressions::Literal::String(s) =
16919 lit.as_ref()
16920 {
16921 let bq_fmt = duckdb_to_bigquery_format(s);
16922 Ok(Expression::Function(Box::new(Function::new(
16923 "FORMAT_DATE".to_string(),
16924 vec![Expression::string(&bq_fmt), val],
16925 ))))
16926 } else {
16927 Ok(Expression::Function(Box::new(Function::new(
16928 "FORMAT_DATE".to_string(),
16929 vec![fmt_expr.clone(), val],
16930 ))))
16931 }
16932 } else {
16933 Ok(Expression::Function(Box::new(Function::new(
16934 "FORMAT_DATE".to_string(),
16935 vec![fmt_expr.clone(), val],
16936 ))))
16937 }
16938 }
16939 DialectType::PostgreSQL | DialectType::Redshift => {
16940 // STRFTIME(val, fmt) -> TO_CHAR(val, pg_fmt)
16941 if let Expression::Literal(lit) = fmt_expr {
16942 if let crate::expressions::Literal::String(s) =
16943 lit.as_ref()
16944 {
16945 let pg_fmt = s
16946 .replace("%Y", "YYYY")
16947 .replace("%m", "MM")
16948 .replace("%d", "DD")
16949 .replace("%H", "HH24")
16950 .replace("%M", "MI")
16951 .replace("%S", "SS")
16952 .replace("%y", "YY")
16953 .replace("%-m", "FMMM")
16954 .replace("%-d", "FMDD")
16955 .replace("%-H", "FMHH24")
16956 .replace("%-I", "FMHH12")
16957 .replace("%p", "AM")
16958 .replace("%F", "YYYY-MM-DD")
16959 .replace("%T", "HH24:MI:SS");
16960 Ok(Expression::Function(Box::new(Function::new(
16961 "TO_CHAR".to_string(),
16962 vec![val, Expression::string(&pg_fmt)],
16963 ))))
16964 } else {
16965 Ok(Expression::Function(Box::new(Function::new(
16966 "TO_CHAR".to_string(),
16967 vec![val, fmt_expr.clone()],
16968 ))))
16969 }
16970 } else {
16971 Ok(Expression::Function(Box::new(Function::new(
16972 "TO_CHAR".to_string(),
16973 vec![val, fmt_expr.clone()],
16974 ))))
16975 }
16976 }
16977 _ => Ok(Expression::Function(f)),
16978 }
16979 }
16980 // STRPTIME(val, fmt) from DuckDB -> target-specific date parse function
16981 "STRPTIME" if f.args.len() == 2 => {
16982 let val = f.args[0].clone();
16983 let fmt_expr = &f.args[1];
16984
16985 fn c_to_java_format_parse(fmt: &str) -> String {
16986 fmt.replace("%Y", "yyyy")
16987 .replace("%m", "MM")
16988 .replace("%d", "dd")
16989 .replace("%H", "HH")
16990 .replace("%M", "mm")
16991 .replace("%S", "ss")
16992 .replace("%f", "SSSSSS")
16993 .replace("%y", "yy")
16994 .replace("%-m", "M")
16995 .replace("%-d", "d")
16996 .replace("%-H", "H")
16997 .replace("%-I", "h")
16998 .replace("%I", "hh")
16999 .replace("%p", "a")
17000 .replace("%F", "yyyy-MM-dd")
17001 .replace("%T", "HH:mm:ss")
17002 }
17003
17004 match target {
17005 DialectType::DuckDB => Ok(Expression::Function(f)),
17006 DialectType::Spark | DialectType::Databricks => {
17007 // STRPTIME(val, fmt) -> TO_TIMESTAMP(val, java_fmt)
17008 if let Expression::Literal(lit) = fmt_expr {
17009 if let crate::expressions::Literal::String(s) =
17010 lit.as_ref()
17011 {
17012 let java_fmt = c_to_java_format_parse(s);
17013 Ok(Expression::Function(Box::new(Function::new(
17014 "TO_TIMESTAMP".to_string(),
17015 vec![val, Expression::string(&java_fmt)],
17016 ))))
17017 } else {
17018 Ok(Expression::Function(Box::new(Function::new(
17019 "TO_TIMESTAMP".to_string(),
17020 vec![val, fmt_expr.clone()],
17021 ))))
17022 }
17023 } else {
17024 Ok(Expression::Function(Box::new(Function::new(
17025 "TO_TIMESTAMP".to_string(),
17026 vec![val, fmt_expr.clone()],
17027 ))))
17028 }
17029 }
17030 DialectType::Hive => {
17031 // STRPTIME(val, fmt) -> CAST(FROM_UNIXTIME(UNIX_TIMESTAMP(val, java_fmt)) AS TIMESTAMP)
17032 if let Expression::Literal(lit) = fmt_expr {
17033 if let crate::expressions::Literal::String(s) =
17034 lit.as_ref()
17035 {
17036 let java_fmt = c_to_java_format_parse(s);
17037 let unix_ts =
17038 Expression::Function(Box::new(Function::new(
17039 "UNIX_TIMESTAMP".to_string(),
17040 vec![val, Expression::string(&java_fmt)],
17041 )));
17042 let from_unix =
17043 Expression::Function(Box::new(Function::new(
17044 "FROM_UNIXTIME".to_string(),
17045 vec![unix_ts],
17046 )));
17047 Ok(Expression::Cast(Box::new(
17048 crate::expressions::Cast {
17049 this: from_unix,
17050 to: DataType::Timestamp {
17051 timezone: false,
17052 precision: None,
17053 },
17054 trailing_comments: Vec::new(),
17055 double_colon_syntax: false,
17056 format: None,
17057 default: None,
17058 inferred_type: None,
17059 },
17060 )))
17061 } else {
17062 Ok(Expression::Function(f))
17063 }
17064 } else {
17065 Ok(Expression::Function(f))
17066 }
17067 }
17068 DialectType::Presto
17069 | DialectType::Trino
17070 | DialectType::Athena => {
17071 // STRPTIME(val, fmt) -> DATE_PARSE(val, presto_fmt) (convert DuckDB format to Presto)
17072 if let Expression::Literal(lit) = fmt_expr {
17073 if let crate::expressions::Literal::String(s) =
17074 lit.as_ref()
17075 {
17076 let presto_fmt = duckdb_to_presto_format(s);
17077 Ok(Expression::Function(Box::new(Function::new(
17078 "DATE_PARSE".to_string(),
17079 vec![val, Expression::string(&presto_fmt)],
17080 ))))
17081 } else {
17082 Ok(Expression::Function(Box::new(Function::new(
17083 "DATE_PARSE".to_string(),
17084 vec![val, fmt_expr.clone()],
17085 ))))
17086 }
17087 } else {
17088 Ok(Expression::Function(Box::new(Function::new(
17089 "DATE_PARSE".to_string(),
17090 vec![val, fmt_expr.clone()],
17091 ))))
17092 }
17093 }
17094 DialectType::BigQuery => {
17095 // STRPTIME(val, fmt) -> PARSE_TIMESTAMP(bq_fmt, val) - note reversed arg order
17096 if let Expression::Literal(lit) = fmt_expr {
17097 if let crate::expressions::Literal::String(s) =
17098 lit.as_ref()
17099 {
17100 let bq_fmt = duckdb_to_bigquery_format(s);
17101 Ok(Expression::Function(Box::new(Function::new(
17102 "PARSE_TIMESTAMP".to_string(),
17103 vec![Expression::string(&bq_fmt), val],
17104 ))))
17105 } else {
17106 Ok(Expression::Function(Box::new(Function::new(
17107 "PARSE_TIMESTAMP".to_string(),
17108 vec![fmt_expr.clone(), val],
17109 ))))
17110 }
17111 } else {
17112 Ok(Expression::Function(Box::new(Function::new(
17113 "PARSE_TIMESTAMP".to_string(),
17114 vec![fmt_expr.clone(), val],
17115 ))))
17116 }
17117 }
17118 _ => Ok(Expression::Function(f)),
17119 }
17120 }
17121 // DATE_FORMAT(val, fmt) from Presto source (C-style format) -> target-specific
17122 "DATE_FORMAT"
17123 if f.args.len() >= 2
17124 && matches!(
17125 source,
17126 DialectType::Presto
17127 | DialectType::Trino
17128 | DialectType::Athena
17129 ) =>
17130 {
17131 let val = f.args[0].clone();
17132 let fmt_expr = &f.args[1];
17133
17134 match target {
17135 DialectType::Presto
17136 | DialectType::Trino
17137 | DialectType::Athena => {
17138 // Presto -> Presto: normalize format (e.g., %H:%i:%S -> %T)
17139 if let Expression::Literal(lit) = fmt_expr {
17140 if let crate::expressions::Literal::String(s) =
17141 lit.as_ref()
17142 {
17143 let normalized = normalize_presto_format(s);
17144 Ok(Expression::Function(Box::new(Function::new(
17145 "DATE_FORMAT".to_string(),
17146 vec![val, Expression::string(&normalized)],
17147 ))))
17148 } else {
17149 Ok(Expression::Function(f))
17150 }
17151 } else {
17152 Ok(Expression::Function(f))
17153 }
17154 }
17155 DialectType::Hive
17156 | DialectType::Spark
17157 | DialectType::Databricks => {
17158 // Convert Presto C-style to Java-style format
17159 if let Expression::Literal(lit) = fmt_expr {
17160 if let crate::expressions::Literal::String(s) =
17161 lit.as_ref()
17162 {
17163 let java_fmt = presto_to_java_format(s);
17164 Ok(Expression::Function(Box::new(Function::new(
17165 "DATE_FORMAT".to_string(),
17166 vec![val, Expression::string(&java_fmt)],
17167 ))))
17168 } else {
17169 Ok(Expression::Function(f))
17170 }
17171 } else {
17172 Ok(Expression::Function(f))
17173 }
17174 }
17175 DialectType::DuckDB => {
17176 // Convert to STRFTIME(val, duckdb_fmt)
17177 if let Expression::Literal(lit) = fmt_expr {
17178 if let crate::expressions::Literal::String(s) =
17179 lit.as_ref()
17180 {
17181 let duckdb_fmt = presto_to_duckdb_format(s);
17182 Ok(Expression::Function(Box::new(Function::new(
17183 "STRFTIME".to_string(),
17184 vec![val, Expression::string(&duckdb_fmt)],
17185 ))))
17186 } else {
17187 Ok(Expression::Function(Box::new(Function::new(
17188 "STRFTIME".to_string(),
17189 vec![val, fmt_expr.clone()],
17190 ))))
17191 }
17192 } else {
17193 Ok(Expression::Function(Box::new(Function::new(
17194 "STRFTIME".to_string(),
17195 vec![val, fmt_expr.clone()],
17196 ))))
17197 }
17198 }
17199 DialectType::BigQuery => {
17200 // Convert to FORMAT_DATE(bq_fmt, val) - reversed args
17201 if let Expression::Literal(lit) = fmt_expr {
17202 if let crate::expressions::Literal::String(s) =
17203 lit.as_ref()
17204 {
17205 let bq_fmt = presto_to_bigquery_format(s);
17206 Ok(Expression::Function(Box::new(Function::new(
17207 "FORMAT_DATE".to_string(),
17208 vec![Expression::string(&bq_fmt), val],
17209 ))))
17210 } else {
17211 Ok(Expression::Function(Box::new(Function::new(
17212 "FORMAT_DATE".to_string(),
17213 vec![fmt_expr.clone(), val],
17214 ))))
17215 }
17216 } else {
17217 Ok(Expression::Function(Box::new(Function::new(
17218 "FORMAT_DATE".to_string(),
17219 vec![fmt_expr.clone(), val],
17220 ))))
17221 }
17222 }
17223 _ => Ok(Expression::Function(f)),
17224 }
17225 }
17226 // DATE_PARSE(val, fmt) from Presto source -> target-specific parse function
17227 "DATE_PARSE"
17228 if f.args.len() >= 2
17229 && matches!(
17230 source,
17231 DialectType::Presto
17232 | DialectType::Trino
17233 | DialectType::Athena
17234 ) =>
17235 {
17236 let val = f.args[0].clone();
17237 let fmt_expr = &f.args[1];
17238
17239 match target {
17240 DialectType::Presto
17241 | DialectType::Trino
17242 | DialectType::Athena => {
17243 // Presto -> Presto: normalize format
17244 if let Expression::Literal(lit) = fmt_expr {
17245 if let crate::expressions::Literal::String(s) =
17246 lit.as_ref()
17247 {
17248 let normalized = normalize_presto_format(s);
17249 Ok(Expression::Function(Box::new(Function::new(
17250 "DATE_PARSE".to_string(),
17251 vec![val, Expression::string(&normalized)],
17252 ))))
17253 } else {
17254 Ok(Expression::Function(f))
17255 }
17256 } else {
17257 Ok(Expression::Function(f))
17258 }
17259 }
17260 DialectType::Hive => {
17261 // Presto -> Hive: if default format, just CAST(x AS TIMESTAMP)
17262 if let Expression::Literal(lit) = fmt_expr {
17263 if let crate::expressions::Literal::String(s) =
17264 lit.as_ref()
17265 {
17266 if is_default_presto_timestamp_format(s)
17267 || is_default_presto_date_format(s)
17268 {
17269 Ok(Expression::Cast(Box::new(
17270 crate::expressions::Cast {
17271 this: val,
17272 to: DataType::Timestamp {
17273 timezone: false,
17274 precision: None,
17275 },
17276 trailing_comments: Vec::new(),
17277 double_colon_syntax: false,
17278 format: None,
17279 default: None,
17280 inferred_type: None,
17281 },
17282 )))
17283 } else {
17284 let java_fmt = presto_to_java_format(s);
17285 Ok(Expression::Function(Box::new(
17286 Function::new(
17287 "TO_TIMESTAMP".to_string(),
17288 vec![
17289 val,
17290 Expression::string(&java_fmt),
17291 ],
17292 ),
17293 )))
17294 }
17295 } else {
17296 Ok(Expression::Function(f))
17297 }
17298 } else {
17299 Ok(Expression::Function(f))
17300 }
17301 }
17302 DialectType::Spark | DialectType::Databricks => {
17303 // Presto -> Spark: TO_TIMESTAMP(val, java_fmt)
17304 if let Expression::Literal(lit) = fmt_expr {
17305 if let crate::expressions::Literal::String(s) =
17306 lit.as_ref()
17307 {
17308 let java_fmt = presto_to_java_format(s);
17309 Ok(Expression::Function(Box::new(Function::new(
17310 "TO_TIMESTAMP".to_string(),
17311 vec![val, Expression::string(&java_fmt)],
17312 ))))
17313 } else {
17314 Ok(Expression::Function(f))
17315 }
17316 } else {
17317 Ok(Expression::Function(f))
17318 }
17319 }
17320 DialectType::DuckDB => {
17321 // Presto -> DuckDB: STRPTIME(val, duckdb_fmt)
17322 if let Expression::Literal(lit) = fmt_expr {
17323 if let crate::expressions::Literal::String(s) =
17324 lit.as_ref()
17325 {
17326 let duckdb_fmt = presto_to_duckdb_format(s);
17327 Ok(Expression::Function(Box::new(Function::new(
17328 "STRPTIME".to_string(),
17329 vec![val, Expression::string(&duckdb_fmt)],
17330 ))))
17331 } else {
17332 Ok(Expression::Function(Box::new(Function::new(
17333 "STRPTIME".to_string(),
17334 vec![val, fmt_expr.clone()],
17335 ))))
17336 }
17337 } else {
17338 Ok(Expression::Function(Box::new(Function::new(
17339 "STRPTIME".to_string(),
17340 vec![val, fmt_expr.clone()],
17341 ))))
17342 }
17343 }
17344 _ => Ok(Expression::Function(f)),
17345 }
17346 }
17347 // FROM_BASE64(x) / TO_BASE64(x) from Presto -> Hive-specific renames
17348 "FROM_BASE64"
17349 if f.args.len() == 1 && matches!(target, DialectType::Hive) =>
17350 {
17351 Ok(Expression::Function(Box::new(Function::new(
17352 "UNBASE64".to_string(),
17353 f.args,
17354 ))))
17355 }
17356 "TO_BASE64"
17357 if f.args.len() == 1 && matches!(target, DialectType::Hive) =>
17358 {
17359 Ok(Expression::Function(Box::new(Function::new(
17360 "BASE64".to_string(),
17361 f.args,
17362 ))))
17363 }
17364 // FROM_UNIXTIME(x) -> CAST(FROM_UNIXTIME(x) AS TIMESTAMP) for Spark
17365 "FROM_UNIXTIME"
17366 if f.args.len() == 1
17367 && matches!(
17368 source,
17369 DialectType::Presto
17370 | DialectType::Trino
17371 | DialectType::Athena
17372 )
17373 && matches!(
17374 target,
17375 DialectType::Spark | DialectType::Databricks
17376 ) =>
17377 {
17378 // Wrap FROM_UNIXTIME(x) in CAST(... AS TIMESTAMP)
17379 let from_unix = Expression::Function(Box::new(Function::new(
17380 "FROM_UNIXTIME".to_string(),
17381 f.args,
17382 )));
17383 Ok(Expression::Cast(Box::new(crate::expressions::Cast {
17384 this: from_unix,
17385 to: DataType::Timestamp {
17386 timezone: false,
17387 precision: None,
17388 },
17389 trailing_comments: Vec::new(),
17390 double_colon_syntax: false,
17391 format: None,
17392 default: None,
17393 inferred_type: None,
17394 })))
17395 }
17396 // DATE_FORMAT(val, fmt) from Hive/Spark/MySQL -> target-specific format function
17397 "DATE_FORMAT"
17398 if f.args.len() >= 2
17399 && !matches!(
17400 target,
17401 DialectType::Hive
17402 | DialectType::Spark
17403 | DialectType::Databricks
17404 | DialectType::MySQL
17405 | DialectType::SingleStore
17406 ) =>
17407 {
17408 let val = f.args[0].clone();
17409 let fmt_expr = &f.args[1];
17410 let is_hive_source = matches!(
17411 source,
17412 DialectType::Hive
17413 | DialectType::Spark
17414 | DialectType::Databricks
17415 );
17416
17417 fn java_to_c_format(fmt: &str) -> String {
17418 // Replace Java patterns with C strftime patterns.
17419 // Uses multi-pass to handle patterns that conflict.
17420 // First pass: replace multi-char patterns (longer first)
17421 let result = fmt
17422 .replace("yyyy", "%Y")
17423 .replace("SSSSSS", "%f")
17424 .replace("EEEE", "%W")
17425 .replace("MM", "%m")
17426 .replace("dd", "%d")
17427 .replace("HH", "%H")
17428 .replace("mm", "%M")
17429 .replace("ss", "%S")
17430 .replace("yy", "%y");
17431 // Second pass: handle single-char timezone patterns
17432 // z -> %Z (timezone name), Z -> %z (timezone offset)
17433 // Must be careful not to replace 'z'/'Z' inside already-replaced %Y, %M etc.
17434 let mut out = String::new();
17435 let chars: Vec<char> = result.chars().collect();
17436 let mut i = 0;
17437 while i < chars.len() {
17438 if chars[i] == '%' && i + 1 < chars.len() {
17439 // Already a format specifier, skip both chars
17440 out.push(chars[i]);
17441 out.push(chars[i + 1]);
17442 i += 2;
17443 } else if chars[i] == 'z' {
17444 out.push_str("%Z");
17445 i += 1;
17446 } else if chars[i] == 'Z' {
17447 out.push_str("%z");
17448 i += 1;
17449 } else {
17450 out.push(chars[i]);
17451 i += 1;
17452 }
17453 }
17454 out
17455 }
17456
17457 fn java_to_presto_format(fmt: &str) -> String {
17458 // Presto uses %T for HH:MM:SS
17459 let c_fmt = java_to_c_format(fmt);
17460 c_fmt.replace("%H:%M:%S", "%T")
17461 }
17462
17463 fn java_to_bq_format(fmt: &str) -> String {
17464 // BigQuery uses %F for yyyy-MM-dd and %T for HH:mm:ss
17465 let c_fmt = java_to_c_format(fmt);
17466 c_fmt.replace("%Y-%m-%d", "%F").replace("%H:%M:%S", "%T")
17467 }
17468
17469 // For Hive source, CAST string literals to appropriate type
17470 let cast_val = if is_hive_source {
17471 match &val {
17472 Expression::Literal(lit)
17473 if matches!(
17474 lit.as_ref(),
17475 crate::expressions::Literal::String(_)
17476 ) =>
17477 {
17478 match target {
17479 DialectType::DuckDB
17480 | DialectType::Presto
17481 | DialectType::Trino
17482 | DialectType::Athena => {
17483 Self::ensure_cast_timestamp(val.clone())
17484 }
17485 DialectType::BigQuery => {
17486 // BigQuery: CAST(val AS DATETIME)
17487 Expression::Cast(Box::new(
17488 crate::expressions::Cast {
17489 this: val.clone(),
17490 to: DataType::Custom {
17491 name: "DATETIME".to_string(),
17492 },
17493 trailing_comments: vec![],
17494 double_colon_syntax: false,
17495 format: None,
17496 default: None,
17497 inferred_type: None,
17498 },
17499 ))
17500 }
17501 _ => val.clone(),
17502 }
17503 }
17504 // For CAST(x AS DATE) or DATE literal, Presto needs CAST(CAST(x AS DATE) AS TIMESTAMP)
17505 Expression::Cast(c)
17506 if matches!(c.to, DataType::Date)
17507 && matches!(
17508 target,
17509 DialectType::Presto
17510 | DialectType::Trino
17511 | DialectType::Athena
17512 ) =>
17513 {
17514 Expression::Cast(Box::new(crate::expressions::Cast {
17515 this: val.clone(),
17516 to: DataType::Timestamp {
17517 timezone: false,
17518 precision: None,
17519 },
17520 trailing_comments: vec![],
17521 double_colon_syntax: false,
17522 format: None,
17523 default: None,
17524 inferred_type: None,
17525 }))
17526 }
17527 Expression::Literal(lit)
17528 if matches!(
17529 lit.as_ref(),
17530 crate::expressions::Literal::Date(_)
17531 ) && matches!(
17532 target,
17533 DialectType::Presto
17534 | DialectType::Trino
17535 | DialectType::Athena
17536 ) =>
17537 {
17538 // DATE 'x' -> CAST(CAST('x' AS DATE) AS TIMESTAMP)
17539 let cast_date = Self::date_literal_to_cast(val.clone());
17540 Expression::Cast(Box::new(crate::expressions::Cast {
17541 this: cast_date,
17542 to: DataType::Timestamp {
17543 timezone: false,
17544 precision: None,
17545 },
17546 trailing_comments: vec![],
17547 double_colon_syntax: false,
17548 format: None,
17549 default: None,
17550 inferred_type: None,
17551 }))
17552 }
17553 _ => val.clone(),
17554 }
17555 } else {
17556 val.clone()
17557 };
17558
17559 match target {
17560 DialectType::DuckDB => {
17561 if let Expression::Literal(lit) = fmt_expr {
17562 if let crate::expressions::Literal::String(s) =
17563 lit.as_ref()
17564 {
17565 let c_fmt = if is_hive_source {
17566 java_to_c_format(s)
17567 } else {
17568 s.clone()
17569 };
17570 Ok(Expression::Function(Box::new(Function::new(
17571 "STRFTIME".to_string(),
17572 vec![cast_val, Expression::string(&c_fmt)],
17573 ))))
17574 } else {
17575 Ok(Expression::Function(Box::new(Function::new(
17576 "STRFTIME".to_string(),
17577 vec![cast_val, fmt_expr.clone()],
17578 ))))
17579 }
17580 } else {
17581 Ok(Expression::Function(Box::new(Function::new(
17582 "STRFTIME".to_string(),
17583 vec![cast_val, fmt_expr.clone()],
17584 ))))
17585 }
17586 }
17587 DialectType::Presto
17588 | DialectType::Trino
17589 | DialectType::Athena => {
17590 if is_hive_source {
17591 if let Expression::Literal(lit) = fmt_expr {
17592 if let crate::expressions::Literal::String(s) =
17593 lit.as_ref()
17594 {
17595 let p_fmt = java_to_presto_format(s);
17596 Ok(Expression::Function(Box::new(
17597 Function::new(
17598 "DATE_FORMAT".to_string(),
17599 vec![
17600 cast_val,
17601 Expression::string(&p_fmt),
17602 ],
17603 ),
17604 )))
17605 } else {
17606 Ok(Expression::Function(Box::new(
17607 Function::new(
17608 "DATE_FORMAT".to_string(),
17609 vec![cast_val, fmt_expr.clone()],
17610 ),
17611 )))
17612 }
17613 } else {
17614 Ok(Expression::Function(Box::new(Function::new(
17615 "DATE_FORMAT".to_string(),
17616 vec![cast_val, fmt_expr.clone()],
17617 ))))
17618 }
17619 } else {
17620 Ok(Expression::Function(Box::new(Function::new(
17621 "DATE_FORMAT".to_string(),
17622 f.args,
17623 ))))
17624 }
17625 }
17626 DialectType::BigQuery => {
17627 // DATE_FORMAT(val, fmt) -> FORMAT_DATE(fmt, val)
17628 if let Expression::Literal(lit) = fmt_expr {
17629 if let crate::expressions::Literal::String(s) =
17630 lit.as_ref()
17631 {
17632 let bq_fmt = if is_hive_source {
17633 java_to_bq_format(s)
17634 } else {
17635 java_to_c_format(s)
17636 };
17637 Ok(Expression::Function(Box::new(Function::new(
17638 "FORMAT_DATE".to_string(),
17639 vec![Expression::string(&bq_fmt), cast_val],
17640 ))))
17641 } else {
17642 Ok(Expression::Function(Box::new(Function::new(
17643 "FORMAT_DATE".to_string(),
17644 vec![fmt_expr.clone(), cast_val],
17645 ))))
17646 }
17647 } else {
17648 Ok(Expression::Function(Box::new(Function::new(
17649 "FORMAT_DATE".to_string(),
17650 vec![fmt_expr.clone(), cast_val],
17651 ))))
17652 }
17653 }
17654 DialectType::PostgreSQL | DialectType::Redshift => {
17655 if let Expression::Literal(lit) = fmt_expr {
17656 if let crate::expressions::Literal::String(s) =
17657 lit.as_ref()
17658 {
17659 let pg_fmt = s
17660 .replace("yyyy", "YYYY")
17661 .replace("MM", "MM")
17662 .replace("dd", "DD")
17663 .replace("HH", "HH24")
17664 .replace("mm", "MI")
17665 .replace("ss", "SS")
17666 .replace("yy", "YY");
17667 Ok(Expression::Function(Box::new(Function::new(
17668 "TO_CHAR".to_string(),
17669 vec![val, Expression::string(&pg_fmt)],
17670 ))))
17671 } else {
17672 Ok(Expression::Function(Box::new(Function::new(
17673 "TO_CHAR".to_string(),
17674 vec![val, fmt_expr.clone()],
17675 ))))
17676 }
17677 } else {
17678 Ok(Expression::Function(Box::new(Function::new(
17679 "TO_CHAR".to_string(),
17680 vec![val, fmt_expr.clone()],
17681 ))))
17682 }
17683 }
17684 _ => Ok(Expression::Function(f)),
17685 }
17686 }
17687 // DATEDIFF(unit, start, end) - 3-arg form
17688 // SQLite uses DATEDIFF(date1, date2, unit_string) instead
17689 "DATEDIFF" if f.args.len() == 3 => {
17690 let mut args = f.args;
17691 // SQLite source: args = (date1, date2, unit_string)
17692 // Standard source: args = (unit, start, end)
17693 let (_arg0, arg1, arg2, unit_str) =
17694 if matches!(source, DialectType::SQLite) {
17695 let date1 = args.remove(0);
17696 let date2 = args.remove(0);
17697 let unit_expr = args.remove(0);
17698 let unit_s = Self::get_unit_str_static(&unit_expr);
17699
17700 // For SQLite target, generate JULIANDAY arithmetic directly
17701 if matches!(target, DialectType::SQLite) {
17702 let jd_first = Expression::Function(Box::new(
17703 Function::new("JULIANDAY".to_string(), vec![date1]),
17704 ));
17705 let jd_second = Expression::Function(Box::new(
17706 Function::new("JULIANDAY".to_string(), vec![date2]),
17707 ));
17708 let diff = Expression::Sub(Box::new(
17709 crate::expressions::BinaryOp::new(
17710 jd_first, jd_second,
17711 ),
17712 ));
17713 let paren_diff = Expression::Paren(Box::new(
17714 crate::expressions::Paren {
17715 this: diff,
17716 trailing_comments: Vec::new(),
17717 },
17718 ));
17719 let adjusted = match unit_s.as_str() {
17720 "HOUR" => Expression::Mul(Box::new(
17721 crate::expressions::BinaryOp::new(
17722 paren_diff,
17723 Expression::Literal(Box::new(
17724 Literal::Number("24.0".to_string()),
17725 )),
17726 ),
17727 )),
17728 "MINUTE" => Expression::Mul(Box::new(
17729 crate::expressions::BinaryOp::new(
17730 paren_diff,
17731 Expression::Literal(Box::new(
17732 Literal::Number("1440.0".to_string()),
17733 )),
17734 ),
17735 )),
17736 "SECOND" => Expression::Mul(Box::new(
17737 crate::expressions::BinaryOp::new(
17738 paren_diff,
17739 Expression::Literal(Box::new(
17740 Literal::Number("86400.0".to_string()),
17741 )),
17742 ),
17743 )),
17744 "MONTH" => Expression::Div(Box::new(
17745 crate::expressions::BinaryOp::new(
17746 paren_diff,
17747 Expression::Literal(Box::new(
17748 Literal::Number("30.0".to_string()),
17749 )),
17750 ),
17751 )),
17752 "YEAR" => Expression::Div(Box::new(
17753 crate::expressions::BinaryOp::new(
17754 paren_diff,
17755 Expression::Literal(Box::new(
17756 Literal::Number("365.0".to_string()),
17757 )),
17758 ),
17759 )),
17760 _ => paren_diff,
17761 };
17762 return Ok(Expression::Cast(Box::new(Cast {
17763 this: adjusted,
17764 to: DataType::Int {
17765 length: None,
17766 integer_spelling: true,
17767 },
17768 trailing_comments: vec![],
17769 double_colon_syntax: false,
17770 format: None,
17771 default: None,
17772 inferred_type: None,
17773 })));
17774 }
17775
17776 // For other targets, remap to standard (unit, start, end) form
17777 let unit_ident =
17778 Expression::Identifier(Identifier::new(&unit_s));
17779 (unit_ident, date1, date2, unit_s)
17780 } else {
17781 let arg0 = args.remove(0);
17782 let arg1 = args.remove(0);
17783 let arg2 = args.remove(0);
17784 let unit_s = Self::get_unit_str_static(&arg0);
17785 (arg0, arg1, arg2, unit_s)
17786 };
17787
17788 // For Hive/Spark source, string literal dates need to be cast
17789 // Note: Databricks is excluded - it handles string args like standard SQL
17790 let is_hive_spark =
17791 matches!(source, DialectType::Hive | DialectType::Spark);
17792
17793 match target {
17794 DialectType::Snowflake => {
17795 let unit =
17796 Expression::Identifier(Identifier::new(&unit_str));
17797 // Use ensure_to_date_preserved to add TO_DATE with a marker
17798 // that prevents the Snowflake TO_DATE handler from converting it to CAST
17799 let d1 = if is_hive_spark {
17800 Self::ensure_to_date_preserved(arg1)
17801 } else {
17802 arg1
17803 };
17804 let d2 = if is_hive_spark {
17805 Self::ensure_to_date_preserved(arg2)
17806 } else {
17807 arg2
17808 };
17809 Ok(Expression::Function(Box::new(Function::new(
17810 "DATEDIFF".to_string(),
17811 vec![unit, d1, d2],
17812 ))))
17813 }
17814 DialectType::Redshift => {
17815 let unit =
17816 Expression::Identifier(Identifier::new(&unit_str));
17817 let d1 = if is_hive_spark {
17818 Self::ensure_cast_date(arg1)
17819 } else {
17820 arg1
17821 };
17822 let d2 = if is_hive_spark {
17823 Self::ensure_cast_date(arg2)
17824 } else {
17825 arg2
17826 };
17827 Ok(Expression::Function(Box::new(Function::new(
17828 "DATEDIFF".to_string(),
17829 vec![unit, d1, d2],
17830 ))))
17831 }
17832 DialectType::TSQL => {
17833 let unit =
17834 Expression::Identifier(Identifier::new(&unit_str));
17835 Ok(Expression::Function(Box::new(Function::new(
17836 "DATEDIFF".to_string(),
17837 vec![unit, arg1, arg2],
17838 ))))
17839 }
17840 DialectType::DuckDB => {
17841 let is_redshift_tsql = matches!(
17842 source,
17843 DialectType::Redshift | DialectType::TSQL
17844 );
17845 if is_hive_spark {
17846 // For Hive/Spark source, CAST string args to DATE and emit DATE_DIFF directly
17847 let d1 = Self::ensure_cast_date(arg1);
17848 let d2 = Self::ensure_cast_date(arg2);
17849 Ok(Expression::Function(Box::new(Function::new(
17850 "DATE_DIFF".to_string(),
17851 vec![Expression::string(&unit_str), d1, d2],
17852 ))))
17853 } else if matches!(source, DialectType::Snowflake) {
17854 // For Snowflake source: special handling per unit
17855 match unit_str.as_str() {
17856 "NANOSECOND" => {
17857 // DATEDIFF(NANOSECOND, start, end) -> EPOCH_NS(CAST(end AS TIMESTAMP_NS)) - EPOCH_NS(CAST(start AS TIMESTAMP_NS))
17858 fn cast_to_timestamp_ns(
17859 expr: Expression,
17860 ) -> Expression
17861 {
17862 Expression::Cast(Box::new(Cast {
17863 this: expr,
17864 to: DataType::Custom {
17865 name: "TIMESTAMP_NS".to_string(),
17866 },
17867 trailing_comments: vec![],
17868 double_colon_syntax: false,
17869 format: None,
17870 default: None,
17871 inferred_type: None,
17872 }))
17873 }
17874 let epoch_end = Expression::Function(Box::new(
17875 Function::new(
17876 "EPOCH_NS".to_string(),
17877 vec![cast_to_timestamp_ns(arg2)],
17878 ),
17879 ));
17880 let epoch_start = Expression::Function(
17881 Box::new(Function::new(
17882 "EPOCH_NS".to_string(),
17883 vec![cast_to_timestamp_ns(arg1)],
17884 )),
17885 );
17886 Ok(Expression::Sub(Box::new(BinaryOp::new(
17887 epoch_end,
17888 epoch_start,
17889 ))))
17890 }
17891 "WEEK" => {
17892 // DATE_DIFF('WEEK', DATE_TRUNC('WEEK', CAST(x AS DATE)), DATE_TRUNC('WEEK', CAST(y AS DATE)))
17893 let d1 = Self::force_cast_date(arg1);
17894 let d2 = Self::force_cast_date(arg2);
17895 let dt1 = Expression::Function(Box::new(
17896 Function::new(
17897 "DATE_TRUNC".to_string(),
17898 vec![Expression::string("WEEK"), d1],
17899 ),
17900 ));
17901 let dt2 = Expression::Function(Box::new(
17902 Function::new(
17903 "DATE_TRUNC".to_string(),
17904 vec![Expression::string("WEEK"), d2],
17905 ),
17906 ));
17907 Ok(Expression::Function(Box::new(
17908 Function::new(
17909 "DATE_DIFF".to_string(),
17910 vec![
17911 Expression::string(&unit_str),
17912 dt1,
17913 dt2,
17914 ],
17915 ),
17916 )))
17917 }
17918 _ => {
17919 // YEAR, MONTH, QUARTER, DAY, etc.: CAST to DATE
17920 let d1 = Self::force_cast_date(arg1);
17921 let d2 = Self::force_cast_date(arg2);
17922 Ok(Expression::Function(Box::new(
17923 Function::new(
17924 "DATE_DIFF".to_string(),
17925 vec![
17926 Expression::string(&unit_str),
17927 d1,
17928 d2,
17929 ],
17930 ),
17931 )))
17932 }
17933 }
17934 } else if is_redshift_tsql {
17935 // For Redshift/TSQL source, CAST args to TIMESTAMP (always)
17936 let d1 = Self::force_cast_timestamp(arg1);
17937 let d2 = Self::force_cast_timestamp(arg2);
17938 Ok(Expression::Function(Box::new(Function::new(
17939 "DATE_DIFF".to_string(),
17940 vec![Expression::string(&unit_str), d1, d2],
17941 ))))
17942 } else {
17943 // Keep as DATEDIFF so DuckDB's transform_datediff handles
17944 // DATE_TRUNC for WEEK, CAST for string literals, etc.
17945 let unit =
17946 Expression::Identifier(Identifier::new(&unit_str));
17947 Ok(Expression::Function(Box::new(Function::new(
17948 "DATEDIFF".to_string(),
17949 vec![unit, arg1, arg2],
17950 ))))
17951 }
17952 }
17953 DialectType::BigQuery => {
17954 let is_redshift_tsql = matches!(
17955 source,
17956 DialectType::Redshift
17957 | DialectType::TSQL
17958 | DialectType::Snowflake
17959 );
17960 let cast_d1 = if is_hive_spark {
17961 Self::ensure_cast_date(arg1)
17962 } else if is_redshift_tsql {
17963 Self::force_cast_datetime(arg1)
17964 } else {
17965 Self::ensure_cast_datetime(arg1)
17966 };
17967 let cast_d2 = if is_hive_spark {
17968 Self::ensure_cast_date(arg2)
17969 } else if is_redshift_tsql {
17970 Self::force_cast_datetime(arg2)
17971 } else {
17972 Self::ensure_cast_datetime(arg2)
17973 };
17974 let unit =
17975 Expression::Identifier(Identifier::new(&unit_str));
17976 Ok(Expression::Function(Box::new(Function::new(
17977 "DATE_DIFF".to_string(),
17978 vec![cast_d2, cast_d1, unit],
17979 ))))
17980 }
17981 DialectType::Presto
17982 | DialectType::Trino
17983 | DialectType::Athena => {
17984 // For Hive/Spark source, string literals need double-cast: CAST(CAST(x AS TIMESTAMP) AS DATE)
17985 // For Redshift/TSQL source, args need CAST to TIMESTAMP (always)
17986 let is_redshift_tsql = matches!(
17987 source,
17988 DialectType::Redshift
17989 | DialectType::TSQL
17990 | DialectType::Snowflake
17991 );
17992 let d1 = if is_hive_spark {
17993 Self::double_cast_timestamp_date(arg1)
17994 } else if is_redshift_tsql {
17995 Self::force_cast_timestamp(arg1)
17996 } else {
17997 arg1
17998 };
17999 let d2 = if is_hive_spark {
18000 Self::double_cast_timestamp_date(arg2)
18001 } else if is_redshift_tsql {
18002 Self::force_cast_timestamp(arg2)
18003 } else {
18004 arg2
18005 };
18006 Ok(Expression::Function(Box::new(Function::new(
18007 "DATE_DIFF".to_string(),
18008 vec![Expression::string(&unit_str), d1, d2],
18009 ))))
18010 }
18011 DialectType::Hive => match unit_str.as_str() {
18012 "MONTH" => Ok(Expression::Cast(Box::new(Cast {
18013 this: Expression::Function(Box::new(Function::new(
18014 "MONTHS_BETWEEN".to_string(),
18015 vec![arg2, arg1],
18016 ))),
18017 to: DataType::Int {
18018 length: None,
18019 integer_spelling: false,
18020 },
18021 trailing_comments: vec![],
18022 double_colon_syntax: false,
18023 format: None,
18024 default: None,
18025 inferred_type: None,
18026 }))),
18027 "WEEK" => Ok(Expression::Cast(Box::new(Cast {
18028 this: Expression::Div(Box::new(
18029 crate::expressions::BinaryOp::new(
18030 Expression::Function(Box::new(Function::new(
18031 "DATEDIFF".to_string(),
18032 vec![arg2, arg1],
18033 ))),
18034 Expression::number(7),
18035 ),
18036 )),
18037 to: DataType::Int {
18038 length: None,
18039 integer_spelling: false,
18040 },
18041 trailing_comments: vec![],
18042 double_colon_syntax: false,
18043 format: None,
18044 default: None,
18045 inferred_type: None,
18046 }))),
18047 _ => Ok(Expression::Function(Box::new(Function::new(
18048 "DATEDIFF".to_string(),
18049 vec![arg2, arg1],
18050 )))),
18051 },
18052 DialectType::Spark | DialectType::Databricks => {
18053 let unit =
18054 Expression::Identifier(Identifier::new(&unit_str));
18055 Ok(Expression::Function(Box::new(Function::new(
18056 "DATEDIFF".to_string(),
18057 vec![unit, arg1, arg2],
18058 ))))
18059 }
18060 _ => {
18061 // For Hive/Spark source targeting PostgreSQL etc., cast string literals to DATE
18062 let d1 = if is_hive_spark {
18063 Self::ensure_cast_date(arg1)
18064 } else {
18065 arg1
18066 };
18067 let d2 = if is_hive_spark {
18068 Self::ensure_cast_date(arg2)
18069 } else {
18070 arg2
18071 };
18072 let unit =
18073 Expression::Identifier(Identifier::new(&unit_str));
18074 Ok(Expression::Function(Box::new(Function::new(
18075 "DATEDIFF".to_string(),
18076 vec![unit, d1, d2],
18077 ))))
18078 }
18079 }
18080 }
18081 // DATEDIFF(end, start) - 2-arg form from Hive/MySQL
18082 "DATEDIFF" if f.args.len() == 2 => {
18083 let mut args = f.args;
18084 let arg0 = args.remove(0);
18085 let arg1 = args.remove(0);
18086
18087 // Helper: unwrap TO_DATE(x) -> x (extracts inner arg)
18088 // Also recognizes TryCast/Cast to DATE that may have been produced by
18089 // cross-dialect TO_DATE -> TRY_CAST conversion
18090 let unwrap_to_date = |e: Expression| -> (Expression, bool) {
18091 if let Expression::Function(ref f) = e {
18092 if f.name.eq_ignore_ascii_case("TO_DATE")
18093 && f.args.len() == 1
18094 {
18095 return (f.args[0].clone(), true);
18096 }
18097 }
18098 // Also recognize TryCast(x, Date) as an already-converted TO_DATE
18099 if let Expression::TryCast(ref c) = e {
18100 if matches!(c.to, DataType::Date) {
18101 return (e, true); // Already properly cast, return as-is
18102 }
18103 }
18104 (e, false)
18105 };
18106
18107 match target {
18108 DialectType::DuckDB => {
18109 // For Hive source, always CAST to DATE
18110 // If arg is TO_DATE(x) or TRY_CAST(x AS DATE), use it directly
18111 let cast_d0 = if matches!(
18112 source,
18113 DialectType::Hive
18114 | DialectType::Spark
18115 | DialectType::Databricks
18116 ) {
18117 let (inner, was_to_date) = unwrap_to_date(arg1);
18118 if was_to_date {
18119 // Already a date expression, use directly
18120 if matches!(&inner, Expression::TryCast(_)) {
18121 inner // Already TRY_CAST(x AS DATE)
18122 } else {
18123 Self::try_cast_date(inner)
18124 }
18125 } else {
18126 Self::force_cast_date(inner)
18127 }
18128 } else {
18129 Self::ensure_cast_date(arg1)
18130 };
18131 let cast_d1 = if matches!(
18132 source,
18133 DialectType::Hive
18134 | DialectType::Spark
18135 | DialectType::Databricks
18136 ) {
18137 let (inner, was_to_date) = unwrap_to_date(arg0);
18138 if was_to_date {
18139 if matches!(&inner, Expression::TryCast(_)) {
18140 inner
18141 } else {
18142 Self::try_cast_date(inner)
18143 }
18144 } else {
18145 Self::force_cast_date(inner)
18146 }
18147 } else {
18148 Self::ensure_cast_date(arg0)
18149 };
18150 Ok(Expression::Function(Box::new(Function::new(
18151 "DATE_DIFF".to_string(),
18152 vec![Expression::string("DAY"), cast_d0, cast_d1],
18153 ))))
18154 }
18155 DialectType::Presto
18156 | DialectType::Trino
18157 | DialectType::Athena => {
18158 // For Hive/Spark source, apply double_cast_timestamp_date
18159 // For other sources (MySQL etc.), just swap args without casting
18160 if matches!(
18161 source,
18162 DialectType::Hive
18163 | DialectType::Spark
18164 | DialectType::Databricks
18165 ) {
18166 let cast_fn = |e: Expression| -> Expression {
18167 let (inner, was_to_date) = unwrap_to_date(e);
18168 if was_to_date {
18169 let first_cast =
18170 Self::double_cast_timestamp_date(inner);
18171 Self::double_cast_timestamp_date(first_cast)
18172 } else {
18173 Self::double_cast_timestamp_date(inner)
18174 }
18175 };
18176 Ok(Expression::Function(Box::new(Function::new(
18177 "DATE_DIFF".to_string(),
18178 vec![
18179 Expression::string("DAY"),
18180 cast_fn(arg1),
18181 cast_fn(arg0),
18182 ],
18183 ))))
18184 } else {
18185 Ok(Expression::Function(Box::new(Function::new(
18186 "DATE_DIFF".to_string(),
18187 vec![Expression::string("DAY"), arg1, arg0],
18188 ))))
18189 }
18190 }
18191 DialectType::Redshift => {
18192 let unit = Expression::Identifier(Identifier::new("DAY"));
18193 Ok(Expression::Function(Box::new(Function::new(
18194 "DATEDIFF".to_string(),
18195 vec![unit, arg1, arg0],
18196 ))))
18197 }
18198 _ => Ok(Expression::Function(Box::new(Function::new(
18199 "DATEDIFF".to_string(),
18200 vec![arg0, arg1],
18201 )))),
18202 }
18203 }
18204 // DATE_DIFF(unit, start, end) - 3-arg with string unit (ClickHouse/DuckDB style)
18205 "DATE_DIFF" if f.args.len() == 3 => {
18206 let mut args = f.args;
18207 let arg0 = args.remove(0);
18208 let arg1 = args.remove(0);
18209 let arg2 = args.remove(0);
18210 let unit_str = Self::get_unit_str_static(&arg0);
18211
18212 match target {
18213 DialectType::DuckDB => {
18214 // DuckDB: DATE_DIFF('UNIT', start, end)
18215 Ok(Expression::Function(Box::new(Function::new(
18216 "DATE_DIFF".to_string(),
18217 vec![Expression::string(&unit_str), arg1, arg2],
18218 ))))
18219 }
18220 DialectType::Presto
18221 | DialectType::Trino
18222 | DialectType::Athena => {
18223 Ok(Expression::Function(Box::new(Function::new(
18224 "DATE_DIFF".to_string(),
18225 vec![Expression::string(&unit_str), arg1, arg2],
18226 ))))
18227 }
18228 DialectType::ClickHouse => {
18229 // ClickHouse: DATE_DIFF(UNIT, start, end) - identifier unit
18230 let unit =
18231 Expression::Identifier(Identifier::new(&unit_str));
18232 Ok(Expression::Function(Box::new(Function::new(
18233 "DATE_DIFF".to_string(),
18234 vec![unit, arg1, arg2],
18235 ))))
18236 }
18237 DialectType::Snowflake | DialectType::Redshift => {
18238 let unit =
18239 Expression::Identifier(Identifier::new(&unit_str));
18240 Ok(Expression::Function(Box::new(Function::new(
18241 "DATEDIFF".to_string(),
18242 vec![unit, arg1, arg2],
18243 ))))
18244 }
18245 _ => {
18246 let unit =
18247 Expression::Identifier(Identifier::new(&unit_str));
18248 Ok(Expression::Function(Box::new(Function::new(
18249 "DATEDIFF".to_string(),
18250 vec![unit, arg1, arg2],
18251 ))))
18252 }
18253 }
18254 }
18255 // DATEADD(unit, val, date) - 3-arg form
18256 "DATEADD" if f.args.len() == 3 => {
18257 let mut args = f.args;
18258 let arg0 = args.remove(0);
18259 let arg1 = args.remove(0);
18260 let arg2 = args.remove(0);
18261 let unit_str = Self::get_unit_str_static(&arg0);
18262
18263 // Normalize TSQL unit abbreviations to standard names
18264 let unit_str = match unit_str.as_str() {
18265 "YY" | "YYYY" => "YEAR".to_string(),
18266 "QQ" | "Q" => "QUARTER".to_string(),
18267 "MM" | "M" => "MONTH".to_string(),
18268 "WK" | "WW" => "WEEK".to_string(),
18269 "DD" | "D" | "DY" => "DAY".to_string(),
18270 "HH" => "HOUR".to_string(),
18271 "MI" | "N" => "MINUTE".to_string(),
18272 "SS" | "S" => "SECOND".to_string(),
18273 "MS" => "MILLISECOND".to_string(),
18274 "MCS" | "US" => "MICROSECOND".to_string(),
18275 _ => unit_str,
18276 };
18277 match target {
18278 DialectType::Snowflake => {
18279 let unit =
18280 Expression::Identifier(Identifier::new(&unit_str));
18281 // Cast string literal to TIMESTAMP, but not for Snowflake source
18282 // (Snowflake natively accepts string literals in DATEADD)
18283 let arg2 = if matches!(
18284 &arg2,
18285 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_))
18286 ) && !matches!(source, DialectType::Snowflake)
18287 {
18288 Expression::Cast(Box::new(Cast {
18289 this: arg2,
18290 to: DataType::Timestamp {
18291 precision: None,
18292 timezone: false,
18293 },
18294 trailing_comments: Vec::new(),
18295 double_colon_syntax: false,
18296 format: None,
18297 default: None,
18298 inferred_type: None,
18299 }))
18300 } else {
18301 arg2
18302 };
18303 Ok(Expression::Function(Box::new(Function::new(
18304 "DATEADD".to_string(),
18305 vec![unit, arg1, arg2],
18306 ))))
18307 }
18308 DialectType::TSQL => {
18309 let unit =
18310 Expression::Identifier(Identifier::new(&unit_str));
18311 // Cast string literal to DATETIME2, but not when source is Spark/Databricks family
18312 let arg2 = if matches!(
18313 &arg2,
18314 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_))
18315 ) && !matches!(
18316 source,
18317 DialectType::Spark
18318 | DialectType::Databricks
18319 | DialectType::Hive
18320 ) {
18321 Expression::Cast(Box::new(Cast {
18322 this: arg2,
18323 to: DataType::Custom {
18324 name: "DATETIME2".to_string(),
18325 },
18326 trailing_comments: Vec::new(),
18327 double_colon_syntax: false,
18328 format: None,
18329 default: None,
18330 inferred_type: None,
18331 }))
18332 } else {
18333 arg2
18334 };
18335 Ok(Expression::Function(Box::new(Function::new(
18336 "DATEADD".to_string(),
18337 vec![unit, arg1, arg2],
18338 ))))
18339 }
18340 DialectType::Redshift => {
18341 let unit =
18342 Expression::Identifier(Identifier::new(&unit_str));
18343 Ok(Expression::Function(Box::new(Function::new(
18344 "DATEADD".to_string(),
18345 vec![unit, arg1, arg2],
18346 ))))
18347 }
18348 DialectType::Databricks => {
18349 let unit =
18350 Expression::Identifier(Identifier::new(&unit_str));
18351 // Sources with native DATEADD (TSQL, Databricks, Snowflake) -> DATEADD
18352 // Other sources (Redshift TsOrDsAdd, etc.) -> DATE_ADD
18353 let func_name = if matches!(
18354 source,
18355 DialectType::TSQL
18356 | DialectType::Fabric
18357 | DialectType::Databricks
18358 | DialectType::Snowflake
18359 ) {
18360 "DATEADD"
18361 } else {
18362 "DATE_ADD"
18363 };
18364 Ok(Expression::Function(Box::new(Function::new(
18365 func_name.to_string(),
18366 vec![unit, arg1, arg2],
18367 ))))
18368 }
18369 DialectType::DuckDB => {
18370 // Special handling for NANOSECOND from Snowflake
18371 if unit_str == "NANOSECOND"
18372 && matches!(source, DialectType::Snowflake)
18373 {
18374 // DATEADD(NANOSECOND, offset, ts) -> MAKE_TIMESTAMP_NS(EPOCH_NS(CAST(ts AS TIMESTAMP_NS)) + offset)
18375 let cast_ts = Expression::Cast(Box::new(Cast {
18376 this: arg2,
18377 to: DataType::Custom {
18378 name: "TIMESTAMP_NS".to_string(),
18379 },
18380 trailing_comments: vec![],
18381 double_colon_syntax: false,
18382 format: None,
18383 default: None,
18384 inferred_type: None,
18385 }));
18386 let epoch_ns =
18387 Expression::Function(Box::new(Function::new(
18388 "EPOCH_NS".to_string(),
18389 vec![cast_ts],
18390 )));
18391 let sum = Expression::Add(Box::new(BinaryOp::new(
18392 epoch_ns, arg1,
18393 )));
18394 Ok(Expression::Function(Box::new(Function::new(
18395 "MAKE_TIMESTAMP_NS".to_string(),
18396 vec![sum],
18397 ))))
18398 } else {
18399 // DuckDB: convert to date + INTERVAL syntax with CAST
18400 let iu = Self::parse_interval_unit_static(&unit_str);
18401 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
18402 this: Some(arg1),
18403 unit: Some(crate::expressions::IntervalUnitSpec::Simple { unit: iu, use_plural: false }),
18404 }));
18405 // Cast string literal to TIMESTAMP
18406 let arg2 = if matches!(
18407 &arg2,
18408 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_))
18409 ) {
18410 Expression::Cast(Box::new(Cast {
18411 this: arg2,
18412 to: DataType::Timestamp {
18413 precision: None,
18414 timezone: false,
18415 },
18416 trailing_comments: Vec::new(),
18417 double_colon_syntax: false,
18418 format: None,
18419 default: None,
18420 inferred_type: None,
18421 }))
18422 } else {
18423 arg2
18424 };
18425 Ok(Expression::Add(Box::new(
18426 crate::expressions::BinaryOp::new(arg2, interval),
18427 )))
18428 }
18429 }
18430 DialectType::Spark => {
18431 // For TSQL source: convert to ADD_MONTHS/DATE_ADD(date, val)
18432 // For other sources: keep 3-arg DATE_ADD(UNIT, val, date) form
18433 if matches!(source, DialectType::TSQL | DialectType::Fabric)
18434 {
18435 fn multiply_expr_spark(
18436 expr: Expression,
18437 factor: i64,
18438 ) -> Expression
18439 {
18440 if let Expression::Literal(lit) = &expr {
18441 if let crate::expressions::Literal::Number(n) =
18442 lit.as_ref()
18443 {
18444 if let Ok(val) = n.parse::<i64>() {
18445 return Expression::Literal(Box::new(
18446 crate::expressions::Literal::Number(
18447 (val * factor).to_string(),
18448 ),
18449 ));
18450 }
18451 }
18452 }
18453 Expression::Mul(Box::new(
18454 crate::expressions::BinaryOp::new(
18455 expr,
18456 Expression::Literal(Box::new(
18457 crate::expressions::Literal::Number(
18458 factor.to_string(),
18459 ),
18460 )),
18461 ),
18462 ))
18463 }
18464 let normalized_unit = match unit_str.as_str() {
18465 "YEAR" | "YY" | "YYYY" => "YEAR",
18466 "QUARTER" | "QQ" | "Q" => "QUARTER",
18467 "MONTH" | "MM" | "M" => "MONTH",
18468 "WEEK" | "WK" | "WW" => "WEEK",
18469 "DAY" | "DD" | "D" | "DY" => "DAY",
18470 _ => &unit_str,
18471 };
18472 match normalized_unit {
18473 "YEAR" => {
18474 let months = multiply_expr_spark(arg1, 12);
18475 Ok(Expression::Function(Box::new(
18476 Function::new(
18477 "ADD_MONTHS".to_string(),
18478 vec![arg2, months],
18479 ),
18480 )))
18481 }
18482 "QUARTER" => {
18483 let months = multiply_expr_spark(arg1, 3);
18484 Ok(Expression::Function(Box::new(
18485 Function::new(
18486 "ADD_MONTHS".to_string(),
18487 vec![arg2, months],
18488 ),
18489 )))
18490 }
18491 "MONTH" => Ok(Expression::Function(Box::new(
18492 Function::new(
18493 "ADD_MONTHS".to_string(),
18494 vec![arg2, arg1],
18495 ),
18496 ))),
18497 "WEEK" => {
18498 let days = multiply_expr_spark(arg1, 7);
18499 Ok(Expression::Function(Box::new(
18500 Function::new(
18501 "DATE_ADD".to_string(),
18502 vec![arg2, days],
18503 ),
18504 )))
18505 }
18506 "DAY" => Ok(Expression::Function(Box::new(
18507 Function::new(
18508 "DATE_ADD".to_string(),
18509 vec![arg2, arg1],
18510 ),
18511 ))),
18512 _ => {
18513 let unit = Expression::Identifier(
18514 Identifier::new(&unit_str),
18515 );
18516 Ok(Expression::Function(Box::new(
18517 Function::new(
18518 "DATE_ADD".to_string(),
18519 vec![unit, arg1, arg2],
18520 ),
18521 )))
18522 }
18523 }
18524 } else {
18525 // Non-TSQL source: keep 3-arg DATE_ADD(UNIT, val, date)
18526 let unit =
18527 Expression::Identifier(Identifier::new(&unit_str));
18528 Ok(Expression::Function(Box::new(Function::new(
18529 "DATE_ADD".to_string(),
18530 vec![unit, arg1, arg2],
18531 ))))
18532 }
18533 }
18534 DialectType::Hive => match unit_str.as_str() {
18535 "MONTH" => {
18536 Ok(Expression::Function(Box::new(Function::new(
18537 "ADD_MONTHS".to_string(),
18538 vec![arg2, arg1],
18539 ))))
18540 }
18541 _ => Ok(Expression::Function(Box::new(Function::new(
18542 "DATE_ADD".to_string(),
18543 vec![arg2, arg1],
18544 )))),
18545 },
18546 DialectType::Presto
18547 | DialectType::Trino
18548 | DialectType::Athena => {
18549 // Cast string literal date to TIMESTAMP
18550 let arg2 = if matches!(
18551 &arg2,
18552 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_))
18553 ) {
18554 Expression::Cast(Box::new(Cast {
18555 this: arg2,
18556 to: DataType::Timestamp {
18557 precision: None,
18558 timezone: false,
18559 },
18560 trailing_comments: Vec::new(),
18561 double_colon_syntax: false,
18562 format: None,
18563 default: None,
18564 inferred_type: None,
18565 }))
18566 } else {
18567 arg2
18568 };
18569 Ok(Expression::Function(Box::new(Function::new(
18570 "DATE_ADD".to_string(),
18571 vec![Expression::string(&unit_str), arg1, arg2],
18572 ))))
18573 }
18574 DialectType::MySQL => {
18575 let iu = Self::parse_interval_unit_static(&unit_str);
18576 Ok(Expression::DateAdd(Box::new(
18577 crate::expressions::DateAddFunc {
18578 this: arg2,
18579 interval: arg1,
18580 unit: iu,
18581 },
18582 )))
18583 }
18584 DialectType::PostgreSQL => {
18585 // Cast string literal date to TIMESTAMP
18586 let arg2 = if matches!(
18587 &arg2,
18588 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_))
18589 ) {
18590 Expression::Cast(Box::new(Cast {
18591 this: arg2,
18592 to: DataType::Timestamp {
18593 precision: None,
18594 timezone: false,
18595 },
18596 trailing_comments: Vec::new(),
18597 double_colon_syntax: false,
18598 format: None,
18599 default: None,
18600 inferred_type: None,
18601 }))
18602 } else {
18603 arg2
18604 };
18605 let interval = Expression::Interval(Box::new(
18606 crate::expressions::Interval {
18607 this: Some(Expression::string(&format!(
18608 "{} {}",
18609 Self::expr_to_string_static(&arg1),
18610 unit_str
18611 ))),
18612 unit: None,
18613 },
18614 ));
18615 Ok(Expression::Add(Box::new(
18616 crate::expressions::BinaryOp::new(arg2, interval),
18617 )))
18618 }
18619 DialectType::BigQuery => {
18620 let iu = Self::parse_interval_unit_static(&unit_str);
18621 let interval = Expression::Interval(Box::new(
18622 crate::expressions::Interval {
18623 this: Some(arg1),
18624 unit: Some(
18625 crate::expressions::IntervalUnitSpec::Simple {
18626 unit: iu,
18627 use_plural: false,
18628 },
18629 ),
18630 },
18631 ));
18632 // Non-TSQL sources: CAST string literal to DATETIME
18633 let arg2 = if !matches!(
18634 source,
18635 DialectType::TSQL | DialectType::Fabric
18636 ) && matches!(
18637 &arg2,
18638 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_))
18639 ) {
18640 Expression::Cast(Box::new(Cast {
18641 this: arg2,
18642 to: DataType::Custom {
18643 name: "DATETIME".to_string(),
18644 },
18645 trailing_comments: Vec::new(),
18646 double_colon_syntax: false,
18647 format: None,
18648 default: None,
18649 inferred_type: None,
18650 }))
18651 } else {
18652 arg2
18653 };
18654 Ok(Expression::Function(Box::new(Function::new(
18655 "DATE_ADD".to_string(),
18656 vec![arg2, interval],
18657 ))))
18658 }
18659 _ => {
18660 let unit =
18661 Expression::Identifier(Identifier::new(&unit_str));
18662 Ok(Expression::Function(Box::new(Function::new(
18663 "DATEADD".to_string(),
18664 vec![unit, arg1, arg2],
18665 ))))
18666 }
18667 }
18668 }
18669 // DATE_ADD - 3-arg: either (unit, val, date) from Presto/ClickHouse
18670 // or (date, val, 'UNIT') from Generic canonical form
18671 "DATE_ADD" if f.args.len() == 3 => {
18672 let mut args = f.args;
18673 let arg0 = args.remove(0);
18674 let arg1 = args.remove(0);
18675 let arg2 = args.remove(0);
18676 // Detect Generic canonical form: DATE_ADD(date, amount, 'UNIT')
18677 // where arg2 is a string literal matching a unit name
18678 let arg2_unit = match &arg2 {
18679 Expression::Literal(lit)
18680 if matches!(lit.as_ref(), Literal::String(_)) =>
18681 {
18682 let Literal::String(s) = lit.as_ref() else {
18683 unreachable!()
18684 };
18685 let u = s.to_ascii_uppercase();
18686 if matches!(
18687 u.as_str(),
18688 "DAY"
18689 | "MONTH"
18690 | "YEAR"
18691 | "HOUR"
18692 | "MINUTE"
18693 | "SECOND"
18694 | "WEEK"
18695 | "QUARTER"
18696 | "MILLISECOND"
18697 | "MICROSECOND"
18698 ) {
18699 Some(u)
18700 } else {
18701 None
18702 }
18703 }
18704 _ => None,
18705 };
18706 // Reorder: if arg2 is the unit, swap to (unit, val, date) form
18707 let (unit_str, val, date) = if let Some(u) = arg2_unit {
18708 (u, arg1, arg0)
18709 } else {
18710 (Self::get_unit_str_static(&arg0), arg1, arg2)
18711 };
18712 // Alias for backward compat with the rest of the match
18713 let arg1 = val;
18714 let arg2 = date;
18715
18716 match target {
18717 DialectType::Presto
18718 | DialectType::Trino
18719 | DialectType::Athena => {
18720 Ok(Expression::Function(Box::new(Function::new(
18721 "DATE_ADD".to_string(),
18722 vec![Expression::string(&unit_str), arg1, arg2],
18723 ))))
18724 }
18725 DialectType::DuckDB => {
18726 let iu = Self::parse_interval_unit_static(&unit_str);
18727 let interval = Expression::Interval(Box::new(
18728 crate::expressions::Interval {
18729 this: Some(arg1),
18730 unit: Some(
18731 crate::expressions::IntervalUnitSpec::Simple {
18732 unit: iu,
18733 use_plural: false,
18734 },
18735 ),
18736 },
18737 ));
18738 Ok(Expression::Add(Box::new(
18739 crate::expressions::BinaryOp::new(arg2, interval),
18740 )))
18741 }
18742 DialectType::PostgreSQL
18743 | DialectType::Materialize
18744 | DialectType::RisingWave => {
18745 // PostgreSQL: x + INTERVAL '1 DAY'
18746 let amount_str = Self::expr_to_string_static(&arg1);
18747 let interval = Expression::Interval(Box::new(
18748 crate::expressions::Interval {
18749 this: Some(Expression::string(&format!(
18750 "{} {}",
18751 amount_str, unit_str
18752 ))),
18753 unit: None,
18754 },
18755 ));
18756 Ok(Expression::Add(Box::new(
18757 crate::expressions::BinaryOp::new(arg2, interval),
18758 )))
18759 }
18760 DialectType::Snowflake
18761 | DialectType::TSQL
18762 | DialectType::Redshift => {
18763 let unit =
18764 Expression::Identifier(Identifier::new(&unit_str));
18765 Ok(Expression::Function(Box::new(Function::new(
18766 "DATEADD".to_string(),
18767 vec![unit, arg1, arg2],
18768 ))))
18769 }
18770 DialectType::BigQuery
18771 | DialectType::MySQL
18772 | DialectType::Doris
18773 | DialectType::StarRocks
18774 | DialectType::Drill => {
18775 // DATE_ADD(date, INTERVAL amount UNIT)
18776 let iu = Self::parse_interval_unit_static(&unit_str);
18777 let interval = Expression::Interval(Box::new(
18778 crate::expressions::Interval {
18779 this: Some(arg1),
18780 unit: Some(
18781 crate::expressions::IntervalUnitSpec::Simple {
18782 unit: iu,
18783 use_plural: false,
18784 },
18785 ),
18786 },
18787 ));
18788 Ok(Expression::Function(Box::new(Function::new(
18789 "DATE_ADD".to_string(),
18790 vec![arg2, interval],
18791 ))))
18792 }
18793 DialectType::SQLite => {
18794 // SQLite: DATE(x, '1 DAY')
18795 // Build the string '1 DAY' from amount and unit
18796 let amount_str = match &arg1 {
18797 Expression::Literal(lit)
18798 if matches!(lit.as_ref(), Literal::Number(_)) =>
18799 {
18800 let Literal::Number(n) = lit.as_ref() else {
18801 unreachable!()
18802 };
18803 n.clone()
18804 }
18805 _ => "1".to_string(),
18806 };
18807 Ok(Expression::Function(Box::new(Function::new(
18808 "DATE".to_string(),
18809 vec![
18810 arg2,
18811 Expression::string(format!(
18812 "{} {}",
18813 amount_str, unit_str
18814 )),
18815 ],
18816 ))))
18817 }
18818 DialectType::Dremio => {
18819 // Dremio: DATE_ADD(date, amount) - drops unit
18820 Ok(Expression::Function(Box::new(Function::new(
18821 "DATE_ADD".to_string(),
18822 vec![arg2, arg1],
18823 ))))
18824 }
18825 DialectType::Spark => {
18826 // Spark: DATE_ADD(date, val) for DAY, or DATEADD(UNIT, val, date)
18827 if unit_str == "DAY" {
18828 Ok(Expression::Function(Box::new(Function::new(
18829 "DATE_ADD".to_string(),
18830 vec![arg2, arg1],
18831 ))))
18832 } else {
18833 let unit =
18834 Expression::Identifier(Identifier::new(&unit_str));
18835 Ok(Expression::Function(Box::new(Function::new(
18836 "DATE_ADD".to_string(),
18837 vec![unit, arg1, arg2],
18838 ))))
18839 }
18840 }
18841 DialectType::Databricks => {
18842 let unit =
18843 Expression::Identifier(Identifier::new(&unit_str));
18844 Ok(Expression::Function(Box::new(Function::new(
18845 "DATE_ADD".to_string(),
18846 vec![unit, arg1, arg2],
18847 ))))
18848 }
18849 DialectType::Hive => {
18850 // Hive: DATE_ADD(date, val) for DAY
18851 Ok(Expression::Function(Box::new(Function::new(
18852 "DATE_ADD".to_string(),
18853 vec![arg2, arg1],
18854 ))))
18855 }
18856 _ => {
18857 let unit =
18858 Expression::Identifier(Identifier::new(&unit_str));
18859 Ok(Expression::Function(Box::new(Function::new(
18860 "DATE_ADD".to_string(),
18861 vec![unit, arg1, arg2],
18862 ))))
18863 }
18864 }
18865 }
18866 // DATE_ADD(date, days) - 2-arg Hive/Spark/Generic form (add days)
18867 "DATE_ADD"
18868 if f.args.len() == 2
18869 && matches!(
18870 source,
18871 DialectType::Hive
18872 | DialectType::Spark
18873 | DialectType::Databricks
18874 | DialectType::Generic
18875 ) =>
18876 {
18877 let mut args = f.args;
18878 let date = args.remove(0);
18879 let days = args.remove(0);
18880 match target {
18881 DialectType::Hive | DialectType::Spark => {
18882 // Keep as DATE_ADD(date, days) for Hive/Spark
18883 Ok(Expression::Function(Box::new(Function::new(
18884 "DATE_ADD".to_string(),
18885 vec![date, days],
18886 ))))
18887 }
18888 DialectType::Databricks => {
18889 // Databricks: DATEADD(DAY, days, date)
18890 Ok(Expression::Function(Box::new(Function::new(
18891 "DATEADD".to_string(),
18892 vec![
18893 Expression::Identifier(Identifier::new("DAY")),
18894 days,
18895 date,
18896 ],
18897 ))))
18898 }
18899 DialectType::DuckDB => {
18900 // DuckDB: CAST(date AS DATE) + INTERVAL days DAY
18901 let cast_date = Self::ensure_cast_date(date);
18902 // Wrap complex expressions (like Mul from DATE_SUB negation) in Paren
18903 let interval_val = if matches!(
18904 days,
18905 Expression::Mul(_)
18906 | Expression::Sub(_)
18907 | Expression::Add(_)
18908 ) {
18909 Expression::Paren(Box::new(crate::expressions::Paren {
18910 this: days,
18911 trailing_comments: vec![],
18912 }))
18913 } else {
18914 days
18915 };
18916 let interval = Expression::Interval(Box::new(
18917 crate::expressions::Interval {
18918 this: Some(interval_val),
18919 unit: Some(
18920 crate::expressions::IntervalUnitSpec::Simple {
18921 unit: crate::expressions::IntervalUnit::Day,
18922 use_plural: false,
18923 },
18924 ),
18925 },
18926 ));
18927 Ok(Expression::Add(Box::new(
18928 crate::expressions::BinaryOp::new(cast_date, interval),
18929 )))
18930 }
18931 DialectType::Snowflake => {
18932 // For Hive source with string literal date, use CAST(CAST(date AS TIMESTAMP) AS DATE)
18933 let cast_date = if matches!(
18934 source,
18935 DialectType::Hive
18936 | DialectType::Spark
18937 | DialectType::Databricks
18938 ) {
18939 if matches!(
18940 date,
18941 Expression::Literal(ref lit) if matches!(lit.as_ref(), Literal::String(_))
18942 ) {
18943 Self::double_cast_timestamp_date(date)
18944 } else {
18945 date
18946 }
18947 } else {
18948 date
18949 };
18950 Ok(Expression::Function(Box::new(Function::new(
18951 "DATEADD".to_string(),
18952 vec![
18953 Expression::Identifier(Identifier::new("DAY")),
18954 days,
18955 cast_date,
18956 ],
18957 ))))
18958 }
18959 DialectType::Redshift => {
18960 Ok(Expression::Function(Box::new(Function::new(
18961 "DATEADD".to_string(),
18962 vec![
18963 Expression::Identifier(Identifier::new("DAY")),
18964 days,
18965 date,
18966 ],
18967 ))))
18968 }
18969 DialectType::TSQL | DialectType::Fabric => {
18970 // For Hive source with string literal date, use CAST(CAST(date AS DATETIME2) AS DATE)
18971 // But Databricks DATE_ADD doesn't need this wrapping for TSQL
18972 let cast_date = if matches!(
18973 source,
18974 DialectType::Hive | DialectType::Spark
18975 ) {
18976 if matches!(
18977 date,
18978 Expression::Literal(ref lit) if matches!(lit.as_ref(), Literal::String(_))
18979 ) {
18980 Self::double_cast_datetime2_date(date)
18981 } else {
18982 date
18983 }
18984 } else {
18985 date
18986 };
18987 Ok(Expression::Function(Box::new(Function::new(
18988 "DATEADD".to_string(),
18989 vec![
18990 Expression::Identifier(Identifier::new("DAY")),
18991 days,
18992 cast_date,
18993 ],
18994 ))))
18995 }
18996 DialectType::Presto
18997 | DialectType::Trino
18998 | DialectType::Athena => {
18999 // For Hive source with string literal date, use CAST(CAST(date AS TIMESTAMP) AS DATE)
19000 let cast_date = if matches!(
19001 source,
19002 DialectType::Hive
19003 | DialectType::Spark
19004 | DialectType::Databricks
19005 ) {
19006 if matches!(
19007 date,
19008 Expression::Literal(ref lit) if matches!(lit.as_ref(), Literal::String(_))
19009 ) {
19010 Self::double_cast_timestamp_date(date)
19011 } else {
19012 date
19013 }
19014 } else {
19015 date
19016 };
19017 Ok(Expression::Function(Box::new(Function::new(
19018 "DATE_ADD".to_string(),
19019 vec![Expression::string("DAY"), days, cast_date],
19020 ))))
19021 }
19022 DialectType::BigQuery => {
19023 // For Hive/Spark source, wrap date in CAST(CAST(date AS DATETIME) AS DATE)
19024 let cast_date = if matches!(
19025 source,
19026 DialectType::Hive
19027 | DialectType::Spark
19028 | DialectType::Databricks
19029 ) {
19030 Self::double_cast_datetime_date(date)
19031 } else {
19032 date
19033 };
19034 // Wrap complex expressions in Paren for interval
19035 let interval_val = if matches!(
19036 days,
19037 Expression::Mul(_)
19038 | Expression::Sub(_)
19039 | Expression::Add(_)
19040 ) {
19041 Expression::Paren(Box::new(crate::expressions::Paren {
19042 this: days,
19043 trailing_comments: vec![],
19044 }))
19045 } else {
19046 days
19047 };
19048 let interval = Expression::Interval(Box::new(
19049 crate::expressions::Interval {
19050 this: Some(interval_val),
19051 unit: Some(
19052 crate::expressions::IntervalUnitSpec::Simple {
19053 unit: crate::expressions::IntervalUnit::Day,
19054 use_plural: false,
19055 },
19056 ),
19057 },
19058 ));
19059 Ok(Expression::Function(Box::new(Function::new(
19060 "DATE_ADD".to_string(),
19061 vec![cast_date, interval],
19062 ))))
19063 }
19064 DialectType::MySQL => {
19065 let iu = crate::expressions::IntervalUnit::Day;
19066 Ok(Expression::DateAdd(Box::new(
19067 crate::expressions::DateAddFunc {
19068 this: date,
19069 interval: days,
19070 unit: iu,
19071 },
19072 )))
19073 }
19074 DialectType::PostgreSQL => {
19075 let interval = Expression::Interval(Box::new(
19076 crate::expressions::Interval {
19077 this: Some(Expression::string(&format!(
19078 "{} DAY",
19079 Self::expr_to_string_static(&days)
19080 ))),
19081 unit: None,
19082 },
19083 ));
19084 Ok(Expression::Add(Box::new(
19085 crate::expressions::BinaryOp::new(date, interval),
19086 )))
19087 }
19088 DialectType::Doris
19089 | DialectType::StarRocks
19090 | DialectType::Drill => {
19091 // DATE_ADD(date, INTERVAL days DAY)
19092 let interval = Expression::Interval(Box::new(
19093 crate::expressions::Interval {
19094 this: Some(days),
19095 unit: Some(
19096 crate::expressions::IntervalUnitSpec::Simple {
19097 unit: crate::expressions::IntervalUnit::Day,
19098 use_plural: false,
19099 },
19100 ),
19101 },
19102 ));
19103 Ok(Expression::Function(Box::new(Function::new(
19104 "DATE_ADD".to_string(),
19105 vec![date, interval],
19106 ))))
19107 }
19108 _ => Ok(Expression::Function(Box::new(Function::new(
19109 "DATE_ADD".to_string(),
19110 vec![date, days],
19111 )))),
19112 }
19113 }
19114 // DATE_ADD(date, INTERVAL val UNIT) - MySQL 2-arg form with INTERVAL as 2nd arg
19115 "DATE_ADD"
19116 if f.args.len() == 2
19117 && matches!(
19118 source,
19119 DialectType::MySQL | DialectType::SingleStore
19120 )
19121 && matches!(&f.args[1], Expression::Interval(_)) =>
19122 {
19123 let mut args = f.args;
19124 let date = args.remove(0);
19125 let interval_expr = args.remove(0);
19126 let (val, unit) = Self::extract_interval_parts(&interval_expr)
19127 .unwrap_or_else(|| {
19128 (
19129 interval_expr.clone(),
19130 crate::expressions::IntervalUnit::Day,
19131 )
19132 });
19133 let unit_str = Self::interval_unit_to_string(&unit);
19134 let is_literal = matches!(&val,
19135 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(_) | Literal::String(_))
19136 );
19137
19138 match target {
19139 DialectType::MySQL | DialectType::SingleStore => {
19140 // Keep as DATE_ADD(date, INTERVAL val UNIT)
19141 Ok(Expression::Function(Box::new(Function::new(
19142 "DATE_ADD".to_string(),
19143 vec![date, interval_expr],
19144 ))))
19145 }
19146 DialectType::PostgreSQL => {
19147 if is_literal {
19148 // Literal: date + INTERVAL 'val UNIT'
19149 let interval = Expression::Interval(Box::new(
19150 crate::expressions::Interval {
19151 this: Some(Expression::Literal(Box::new(
19152 Literal::String(format!(
19153 "{} {}",
19154 Self::expr_to_string(&val),
19155 unit_str
19156 )),
19157 ))),
19158 unit: None,
19159 },
19160 ));
19161 Ok(Expression::Add(Box::new(
19162 crate::expressions::BinaryOp::new(date, interval),
19163 )))
19164 } else {
19165 // Non-literal (column ref): date + INTERVAL '1 UNIT' * val
19166 let interval_one = Expression::Interval(Box::new(
19167 crate::expressions::Interval {
19168 this: Some(Expression::Literal(Box::new(
19169 Literal::String(format!("1 {}", unit_str)),
19170 ))),
19171 unit: None,
19172 },
19173 ));
19174 let mul = Expression::Mul(Box::new(
19175 crate::expressions::BinaryOp::new(
19176 interval_one,
19177 val,
19178 ),
19179 ));
19180 Ok(Expression::Add(Box::new(
19181 crate::expressions::BinaryOp::new(date, mul),
19182 )))
19183 }
19184 }
19185 _ => {
19186 // Default: keep as DATE_ADD(date, interval)
19187 Ok(Expression::Function(Box::new(Function::new(
19188 "DATE_ADD".to_string(),
19189 vec![date, interval_expr],
19190 ))))
19191 }
19192 }
19193 }
19194 // DATE_SUB(date, days) - 2-arg Hive/Spark form (subtract days)
19195 "DATE_SUB"
19196 if f.args.len() == 2
19197 && matches!(
19198 source,
19199 DialectType::Hive
19200 | DialectType::Spark
19201 | DialectType::Databricks
19202 ) =>
19203 {
19204 let mut args = f.args;
19205 let date = args.remove(0);
19206 let days = args.remove(0);
19207 // Helper to create days * -1
19208 let make_neg_days = |d: Expression| -> Expression {
19209 Expression::Mul(Box::new(crate::expressions::BinaryOp::new(
19210 d,
19211 Expression::Literal(Box::new(Literal::Number(
19212 "-1".to_string(),
19213 ))),
19214 )))
19215 };
19216 let is_string_literal = matches!(date, Expression::Literal(ref lit) if matches!(lit.as_ref(), Literal::String(_)));
19217 match target {
19218 DialectType::Hive
19219 | DialectType::Spark
19220 | DialectType::Databricks => {
19221 // Keep as DATE_SUB(date, days) for Hive/Spark
19222 Ok(Expression::Function(Box::new(Function::new(
19223 "DATE_SUB".to_string(),
19224 vec![date, days],
19225 ))))
19226 }
19227 DialectType::DuckDB => {
19228 let cast_date = Self::ensure_cast_date(date);
19229 let neg = make_neg_days(days);
19230 let interval = Expression::Interval(Box::new(
19231 crate::expressions::Interval {
19232 this: Some(Expression::Paren(Box::new(
19233 crate::expressions::Paren {
19234 this: neg,
19235 trailing_comments: vec![],
19236 },
19237 ))),
19238 unit: Some(
19239 crate::expressions::IntervalUnitSpec::Simple {
19240 unit: crate::expressions::IntervalUnit::Day,
19241 use_plural: false,
19242 },
19243 ),
19244 },
19245 ));
19246 Ok(Expression::Add(Box::new(
19247 crate::expressions::BinaryOp::new(cast_date, interval),
19248 )))
19249 }
19250 DialectType::Snowflake => {
19251 let cast_date = if is_string_literal {
19252 Self::double_cast_timestamp_date(date)
19253 } else {
19254 date
19255 };
19256 let neg = make_neg_days(days);
19257 Ok(Expression::Function(Box::new(Function::new(
19258 "DATEADD".to_string(),
19259 vec![
19260 Expression::Identifier(Identifier::new("DAY")),
19261 neg,
19262 cast_date,
19263 ],
19264 ))))
19265 }
19266 DialectType::Redshift => {
19267 let neg = make_neg_days(days);
19268 Ok(Expression::Function(Box::new(Function::new(
19269 "DATEADD".to_string(),
19270 vec![
19271 Expression::Identifier(Identifier::new("DAY")),
19272 neg,
19273 date,
19274 ],
19275 ))))
19276 }
19277 DialectType::TSQL | DialectType::Fabric => {
19278 let cast_date = if is_string_literal {
19279 Self::double_cast_datetime2_date(date)
19280 } else {
19281 date
19282 };
19283 let neg = make_neg_days(days);
19284 Ok(Expression::Function(Box::new(Function::new(
19285 "DATEADD".to_string(),
19286 vec![
19287 Expression::Identifier(Identifier::new("DAY")),
19288 neg,
19289 cast_date,
19290 ],
19291 ))))
19292 }
19293 DialectType::Presto
19294 | DialectType::Trino
19295 | DialectType::Athena => {
19296 let cast_date = if is_string_literal {
19297 Self::double_cast_timestamp_date(date)
19298 } else {
19299 date
19300 };
19301 let neg = make_neg_days(days);
19302 Ok(Expression::Function(Box::new(Function::new(
19303 "DATE_ADD".to_string(),
19304 vec![Expression::string("DAY"), neg, cast_date],
19305 ))))
19306 }
19307 DialectType::BigQuery => {
19308 let cast_date = if is_string_literal {
19309 Self::double_cast_datetime_date(date)
19310 } else {
19311 date
19312 };
19313 let neg = make_neg_days(days);
19314 let interval = Expression::Interval(Box::new(
19315 crate::expressions::Interval {
19316 this: Some(Expression::Paren(Box::new(
19317 crate::expressions::Paren {
19318 this: neg,
19319 trailing_comments: vec![],
19320 },
19321 ))),
19322 unit: Some(
19323 crate::expressions::IntervalUnitSpec::Simple {
19324 unit: crate::expressions::IntervalUnit::Day,
19325 use_plural: false,
19326 },
19327 ),
19328 },
19329 ));
19330 Ok(Expression::Function(Box::new(Function::new(
19331 "DATE_ADD".to_string(),
19332 vec![cast_date, interval],
19333 ))))
19334 }
19335 _ => Ok(Expression::Function(Box::new(Function::new(
19336 "DATE_SUB".to_string(),
19337 vec![date, days],
19338 )))),
19339 }
19340 }
19341 // ADD_MONTHS(date, val) -> target-specific
19342 "ADD_MONTHS" if f.args.len() == 2 => {
19343 let mut args = f.args;
19344 let date = args.remove(0);
19345 let val = args.remove(0);
19346 match target {
19347 DialectType::TSQL => {
19348 let cast_date = Self::ensure_cast_datetime2(date);
19349 Ok(Expression::Function(Box::new(Function::new(
19350 "DATEADD".to_string(),
19351 vec![
19352 Expression::Identifier(Identifier::new("MONTH")),
19353 val,
19354 cast_date,
19355 ],
19356 ))))
19357 }
19358 DialectType::DuckDB => {
19359 let interval = Expression::Interval(Box::new(
19360 crate::expressions::Interval {
19361 this: Some(val),
19362 unit: Some(
19363 crate::expressions::IntervalUnitSpec::Simple {
19364 unit:
19365 crate::expressions::IntervalUnit::Month,
19366 use_plural: false,
19367 },
19368 ),
19369 },
19370 ));
19371 Ok(Expression::Add(Box::new(
19372 crate::expressions::BinaryOp::new(date, interval),
19373 )))
19374 }
19375 DialectType::Snowflake => {
19376 // Keep ADD_MONTHS when source is Snowflake
19377 if matches!(source, DialectType::Snowflake) {
19378 Ok(Expression::Function(Box::new(Function::new(
19379 "ADD_MONTHS".to_string(),
19380 vec![date, val],
19381 ))))
19382 } else {
19383 Ok(Expression::Function(Box::new(Function::new(
19384 "DATEADD".to_string(),
19385 vec![
19386 Expression::Identifier(Identifier::new(
19387 "MONTH",
19388 )),
19389 val,
19390 date,
19391 ],
19392 ))))
19393 }
19394 }
19395 DialectType::Redshift => {
19396 Ok(Expression::Function(Box::new(Function::new(
19397 "DATEADD".to_string(),
19398 vec![
19399 Expression::Identifier(Identifier::new("MONTH")),
19400 val,
19401 date,
19402 ],
19403 ))))
19404 }
19405 DialectType::Presto
19406 | DialectType::Trino
19407 | DialectType::Athena => {
19408 Ok(Expression::Function(Box::new(Function::new(
19409 "DATE_ADD".to_string(),
19410 vec![Expression::string("MONTH"), val, date],
19411 ))))
19412 }
19413 DialectType::BigQuery => {
19414 let interval = Expression::Interval(Box::new(
19415 crate::expressions::Interval {
19416 this: Some(val),
19417 unit: Some(
19418 crate::expressions::IntervalUnitSpec::Simple {
19419 unit:
19420 crate::expressions::IntervalUnit::Month,
19421 use_plural: false,
19422 },
19423 ),
19424 },
19425 ));
19426 Ok(Expression::Function(Box::new(Function::new(
19427 "DATE_ADD".to_string(),
19428 vec![date, interval],
19429 ))))
19430 }
19431 _ => Ok(Expression::Function(Box::new(Function::new(
19432 "ADD_MONTHS".to_string(),
19433 vec![date, val],
19434 )))),
19435 }
19436 }
19437 // DATETRUNC(unit, date) - TSQL form -> DATE_TRUNC for other targets
19438 "DATETRUNC" if f.args.len() == 2 => {
19439 let mut args = f.args;
19440 let arg0 = args.remove(0);
19441 let arg1 = args.remove(0);
19442 let unit_str = Self::get_unit_str_static(&arg0);
19443 match target {
19444 DialectType::TSQL | DialectType::Fabric => {
19445 // Keep as DATETRUNC for TSQL - the target handler will uppercase the unit
19446 Ok(Expression::Function(Box::new(Function::new(
19447 "DATETRUNC".to_string(),
19448 vec![
19449 Expression::Identifier(Identifier::new(&unit_str)),
19450 arg1,
19451 ],
19452 ))))
19453 }
19454 DialectType::DuckDB => {
19455 // DuckDB: DATE_TRUNC('UNIT', expr) with CAST for string literals
19456 let date = Self::ensure_cast_timestamp(arg1);
19457 Ok(Expression::Function(Box::new(Function::new(
19458 "DATE_TRUNC".to_string(),
19459 vec![Expression::string(&unit_str), date],
19460 ))))
19461 }
19462 DialectType::ClickHouse => {
19463 // ClickHouse: dateTrunc('UNIT', expr)
19464 Ok(Expression::Function(Box::new(Function::new(
19465 "dateTrunc".to_string(),
19466 vec![Expression::string(&unit_str), arg1],
19467 ))))
19468 }
19469 _ => {
19470 // Standard: DATE_TRUNC('UNIT', expr)
19471 let unit = Expression::string(&unit_str);
19472 Ok(Expression::Function(Box::new(Function::new(
19473 "DATE_TRUNC".to_string(),
19474 vec![unit, arg1],
19475 ))))
19476 }
19477 }
19478 }
19479 // GETDATE() -> CURRENT_TIMESTAMP for non-TSQL targets
19480 "GETDATE" if f.args.is_empty() => match target {
19481 DialectType::TSQL => Ok(Expression::Function(f)),
19482 DialectType::Redshift => Ok(Expression::Function(Box::new(
19483 Function::new("GETDATE".to_string(), vec![]),
19484 ))),
19485 _ => Ok(Expression::CurrentTimestamp(
19486 crate::expressions::CurrentTimestamp {
19487 precision: None,
19488 sysdate: false,
19489 },
19490 )),
19491 },
19492 // TO_HEX(x) / HEX(x) -> target-specific hex function
19493 "TO_HEX" | "HEX" if f.args.len() == 1 => {
19494 let name = match target {
19495 DialectType::Presto | DialectType::Trino => "TO_HEX",
19496 DialectType::Spark
19497 | DialectType::Databricks
19498 | DialectType::Hive => "HEX",
19499 DialectType::DuckDB
19500 | DialectType::PostgreSQL
19501 | DialectType::Redshift => "TO_HEX",
19502 _ => &f.name,
19503 };
19504 Ok(Expression::Function(Box::new(Function::new(
19505 name.to_string(),
19506 f.args,
19507 ))))
19508 }
19509 // FROM_HEX(x) / UNHEX(x) -> target-specific hex decode function
19510 "FROM_HEX" | "UNHEX" if f.args.len() == 1 => {
19511 match target {
19512 DialectType::BigQuery => {
19513 // BigQuery: UNHEX(x) -> FROM_HEX(x)
19514 // Special case: UNHEX(MD5(x)) -> FROM_HEX(TO_HEX(MD5(x)))
19515 // because BigQuery MD5 returns BYTES, not hex string
19516 let arg = &f.args[0];
19517 let wrapped_arg = match arg {
19518 Expression::Function(inner_f)
19519 if inner_f.name.eq_ignore_ascii_case("MD5")
19520 || inner_f
19521 .name
19522 .eq_ignore_ascii_case("SHA1")
19523 || inner_f
19524 .name
19525 .eq_ignore_ascii_case("SHA256")
19526 || inner_f
19527 .name
19528 .eq_ignore_ascii_case("SHA512") =>
19529 {
19530 // Wrap hash function in TO_HEX for BigQuery
19531 Expression::Function(Box::new(Function::new(
19532 "TO_HEX".to_string(),
19533 vec![arg.clone()],
19534 )))
19535 }
19536 _ => f.args.into_iter().next().unwrap(),
19537 };
19538 Ok(Expression::Function(Box::new(Function::new(
19539 "FROM_HEX".to_string(),
19540 vec![wrapped_arg],
19541 ))))
19542 }
19543 _ => {
19544 let name = match target {
19545 DialectType::Presto | DialectType::Trino => "FROM_HEX",
19546 DialectType::Spark
19547 | DialectType::Databricks
19548 | DialectType::Hive => "UNHEX",
19549 _ => &f.name,
19550 };
19551 Ok(Expression::Function(Box::new(Function::new(
19552 name.to_string(),
19553 f.args,
19554 ))))
19555 }
19556 }
19557 }
19558 // TO_UTF8(x) -> ENCODE(x, 'utf-8') for Spark
19559 "TO_UTF8" if f.args.len() == 1 => match target {
19560 DialectType::Spark | DialectType::Databricks => {
19561 let mut args = f.args;
19562 args.push(Expression::string("utf-8"));
19563 Ok(Expression::Function(Box::new(Function::new(
19564 "ENCODE".to_string(),
19565 args,
19566 ))))
19567 }
19568 _ => Ok(Expression::Function(f)),
19569 },
19570 // FROM_UTF8(x) -> DECODE(x, 'utf-8') for Spark
19571 "FROM_UTF8" if f.args.len() == 1 => match target {
19572 DialectType::Spark | DialectType::Databricks => {
19573 let mut args = f.args;
19574 args.push(Expression::string("utf-8"));
19575 Ok(Expression::Function(Box::new(Function::new(
19576 "DECODE".to_string(),
19577 args,
19578 ))))
19579 }
19580 _ => Ok(Expression::Function(f)),
19581 },
19582 // STARTS_WITH(x, y) / STARTSWITH(x, y) -> target-specific
19583 "STARTS_WITH" | "STARTSWITH" if f.args.len() == 2 => {
19584 let name = match target {
19585 DialectType::Spark | DialectType::Databricks => "STARTSWITH",
19586 DialectType::Presto | DialectType::Trino => "STARTS_WITH",
19587 DialectType::PostgreSQL | DialectType::Redshift => {
19588 "STARTS_WITH"
19589 }
19590 _ => &f.name,
19591 };
19592 Ok(Expression::Function(Box::new(Function::new(
19593 name.to_string(),
19594 f.args,
19595 ))))
19596 }
19597 // APPROX_COUNT_DISTINCT(x) <-> APPROX_DISTINCT(x)
19598 "APPROX_COUNT_DISTINCT" if f.args.len() >= 1 => {
19599 let name = match target {
19600 DialectType::Presto
19601 | DialectType::Trino
19602 | DialectType::Athena => "APPROX_DISTINCT",
19603 _ => "APPROX_COUNT_DISTINCT",
19604 };
19605 Ok(Expression::Function(Box::new(Function::new(
19606 name.to_string(),
19607 f.args,
19608 ))))
19609 }
19610 // JSON_EXTRACT -> GET_JSON_OBJECT for Spark/Hive
19611 "JSON_EXTRACT"
19612 if f.args.len() == 2
19613 && !matches!(source, DialectType::BigQuery)
19614 && matches!(
19615 target,
19616 DialectType::Spark
19617 | DialectType::Databricks
19618 | DialectType::Hive
19619 ) =>
19620 {
19621 Ok(Expression::Function(Box::new(Function::new(
19622 "GET_JSON_OBJECT".to_string(),
19623 f.args,
19624 ))))
19625 }
19626 // JSON_EXTRACT(x, path) -> x -> path for SQLite (arrow syntax)
19627 "JSON_EXTRACT"
19628 if f.args.len() == 2 && matches!(target, DialectType::SQLite) =>
19629 {
19630 let mut args = f.args;
19631 let path = args.remove(1);
19632 let this = args.remove(0);
19633 Ok(Expression::JsonExtract(Box::new(
19634 crate::expressions::JsonExtractFunc {
19635 this,
19636 path,
19637 returning: None,
19638 arrow_syntax: true,
19639 hash_arrow_syntax: false,
19640 wrapper_option: None,
19641 quotes_option: None,
19642 on_scalar_string: false,
19643 on_error: None,
19644 },
19645 )))
19646 }
19647 // JSON_FORMAT(x) -> TO_JSON(x) for Spark, TO_JSON_STRING for BigQuery, CAST(TO_JSON(x) AS TEXT) for DuckDB
19648 "JSON_FORMAT" if f.args.len() == 1 => {
19649 match target {
19650 DialectType::Spark | DialectType::Databricks => {
19651 // Presto JSON_FORMAT(JSON '...') needs Spark's string-unquoting flow:
19652 // REGEXP_EXTRACT(TO_JSON(FROM_JSON('[...]', SCHEMA_OF_JSON('[...]'))), '^.(.*).$', 1)
19653 if matches!(
19654 source,
19655 DialectType::Presto
19656 | DialectType::Trino
19657 | DialectType::Athena
19658 ) {
19659 if let Some(Expression::ParseJson(pj)) = f.args.first()
19660 {
19661 if let Expression::Literal(lit) = &pj.this {
19662 if let Literal::String(s) = lit.as_ref() {
19663 let wrapped =
19664 Expression::Literal(Box::new(
19665 Literal::String(format!("[{}]", s)),
19666 ));
19667 let schema_of_json = Expression::Function(
19668 Box::new(Function::new(
19669 "SCHEMA_OF_JSON".to_string(),
19670 vec![wrapped.clone()],
19671 )),
19672 );
19673 let from_json = Expression::Function(
19674 Box::new(Function::new(
19675 "FROM_JSON".to_string(),
19676 vec![wrapped, schema_of_json],
19677 )),
19678 );
19679 let to_json = Expression::Function(
19680 Box::new(Function::new(
19681 "TO_JSON".to_string(),
19682 vec![from_json],
19683 )),
19684 );
19685 return Ok(Expression::Function(Box::new(
19686 Function::new(
19687 "REGEXP_EXTRACT".to_string(),
19688 vec![
19689 to_json,
19690 Expression::Literal(Box::new(
19691 Literal::String(
19692 "^.(.*).$".to_string(),
19693 ),
19694 )),
19695 Expression::Literal(Box::new(
19696 Literal::Number(
19697 "1".to_string(),
19698 ),
19699 )),
19700 ],
19701 ),
19702 )));
19703 }
19704 }
19705 }
19706 }
19707
19708 // Strip inner CAST(... AS JSON) or TO_JSON() if present
19709 // The CastToJsonForSpark may have already converted CAST(x AS JSON) to TO_JSON(x)
19710 let mut args = f.args;
19711 if let Some(Expression::Cast(ref c)) = args.first() {
19712 if matches!(&c.to, DataType::Json | DataType::JsonB) {
19713 args = vec![c.this.clone()];
19714 }
19715 } else if let Some(Expression::Function(ref inner_f)) =
19716 args.first()
19717 {
19718 if inner_f.name.eq_ignore_ascii_case("TO_JSON")
19719 && inner_f.args.len() == 1
19720 {
19721 // Already TO_JSON(x) from CastToJsonForSpark, just use the inner arg
19722 args = inner_f.args.clone();
19723 }
19724 }
19725 Ok(Expression::Function(Box::new(Function::new(
19726 "TO_JSON".to_string(),
19727 args,
19728 ))))
19729 }
19730 DialectType::BigQuery => Ok(Expression::Function(Box::new(
19731 Function::new("TO_JSON_STRING".to_string(), f.args),
19732 ))),
19733 DialectType::DuckDB => {
19734 // CAST(TO_JSON(x) AS TEXT)
19735 let to_json = Expression::Function(Box::new(
19736 Function::new("TO_JSON".to_string(), f.args),
19737 ));
19738 Ok(Expression::Cast(Box::new(Cast {
19739 this: to_json,
19740 to: DataType::Text,
19741 trailing_comments: Vec::new(),
19742 double_colon_syntax: false,
19743 format: None,
19744 default: None,
19745 inferred_type: None,
19746 })))
19747 }
19748 _ => Ok(Expression::Function(f)),
19749 }
19750 }
19751 // SYSDATE -> CURRENT_TIMESTAMP for non-Oracle/Redshift/Snowflake targets
19752 "SYSDATE" if f.args.is_empty() => {
19753 match target {
19754 DialectType::Oracle | DialectType::Redshift => {
19755 Ok(Expression::Function(f))
19756 }
19757 DialectType::Snowflake => {
19758 // Snowflake uses SYSDATE() with parens
19759 let mut f = *f;
19760 f.no_parens = false;
19761 Ok(Expression::Function(Box::new(f)))
19762 }
19763 DialectType::DuckDB => {
19764 // DuckDB: SYSDATE() -> CURRENT_TIMESTAMP AT TIME ZONE 'UTC'
19765 Ok(Expression::AtTimeZone(Box::new(
19766 crate::expressions::AtTimeZone {
19767 this: Expression::CurrentTimestamp(
19768 crate::expressions::CurrentTimestamp {
19769 precision: None,
19770 sysdate: false,
19771 },
19772 ),
19773 zone: Expression::Literal(Box::new(
19774 Literal::String("UTC".to_string()),
19775 )),
19776 },
19777 )))
19778 }
19779 _ => Ok(Expression::CurrentTimestamp(
19780 crate::expressions::CurrentTimestamp {
19781 precision: None,
19782 sysdate: true,
19783 },
19784 )),
19785 }
19786 }
19787 // LOGICAL_OR(x) -> BOOL_OR(x)
19788 "LOGICAL_OR" if f.args.len() == 1 => {
19789 let name = match target {
19790 DialectType::Spark | DialectType::Databricks => "BOOL_OR",
19791 _ => &f.name,
19792 };
19793 Ok(Expression::Function(Box::new(Function::new(
19794 name.to_string(),
19795 f.args,
19796 ))))
19797 }
19798 // LOGICAL_AND(x) -> BOOL_AND(x)
19799 "LOGICAL_AND" if f.args.len() == 1 => {
19800 let name = match target {
19801 DialectType::Spark | DialectType::Databricks => "BOOL_AND",
19802 _ => &f.name,
19803 };
19804 Ok(Expression::Function(Box::new(Function::new(
19805 name.to_string(),
19806 f.args,
19807 ))))
19808 }
19809 // MONTHS_ADD(d, n) -> ADD_MONTHS(d, n) for Oracle
19810 "MONTHS_ADD" if f.args.len() == 2 => match target {
19811 DialectType::Oracle => Ok(Expression::Function(Box::new(
19812 Function::new("ADD_MONTHS".to_string(), f.args),
19813 ))),
19814 _ => Ok(Expression::Function(f)),
19815 },
19816 // ARRAY_JOIN(arr, sep[, null_replacement]) -> target-specific
19817 "ARRAY_JOIN" if f.args.len() >= 2 => {
19818 match target {
19819 DialectType::Spark | DialectType::Databricks => {
19820 // Keep as ARRAY_JOIN for Spark (it supports null_replacement)
19821 Ok(Expression::Function(f))
19822 }
19823 DialectType::Hive => {
19824 // ARRAY_JOIN(arr, sep[, null_rep]) -> CONCAT_WS(sep, arr) (drop null_replacement)
19825 let mut args = f.args;
19826 let arr = args.remove(0);
19827 let sep = args.remove(0);
19828 // Drop any remaining args (null_replacement)
19829 Ok(Expression::Function(Box::new(Function::new(
19830 "CONCAT_WS".to_string(),
19831 vec![sep, arr],
19832 ))))
19833 }
19834 DialectType::Presto | DialectType::Trino => {
19835 Ok(Expression::Function(f))
19836 }
19837 _ => Ok(Expression::Function(f)),
19838 }
19839 }
19840 // LOCATE(substr, str, pos) 3-arg -> target-specific
19841 // For Presto/DuckDB: STRPOS doesn't support 3-arg, need complex expansion
19842 "LOCATE"
19843 if f.args.len() == 3
19844 && matches!(
19845 target,
19846 DialectType::Presto
19847 | DialectType::Trino
19848 | DialectType::Athena
19849 | DialectType::DuckDB
19850 ) =>
19851 {
19852 let mut args = f.args;
19853 let substr = args.remove(0);
19854 let string = args.remove(0);
19855 let pos = args.remove(0);
19856 // STRPOS(SUBSTRING(string, pos), substr)
19857 let substring_call = Expression::Function(Box::new(Function::new(
19858 "SUBSTRING".to_string(),
19859 vec![string.clone(), pos.clone()],
19860 )));
19861 let strpos_call = Expression::Function(Box::new(Function::new(
19862 "STRPOS".to_string(),
19863 vec![substring_call, substr.clone()],
19864 )));
19865 // STRPOS(...) + pos - 1
19866 let pos_adjusted =
19867 Expression::Sub(Box::new(crate::expressions::BinaryOp::new(
19868 Expression::Add(Box::new(
19869 crate::expressions::BinaryOp::new(
19870 strpos_call.clone(),
19871 pos.clone(),
19872 ),
19873 )),
19874 Expression::number(1),
19875 )));
19876 // STRPOS(...) = 0
19877 let is_zero =
19878 Expression::Eq(Box::new(crate::expressions::BinaryOp::new(
19879 strpos_call.clone(),
19880 Expression::number(0),
19881 )));
19882
19883 match target {
19884 DialectType::Presto
19885 | DialectType::Trino
19886 | DialectType::Athena => {
19887 // IF(STRPOS(...) = 0, 0, STRPOS(...) + pos - 1)
19888 Ok(Expression::Function(Box::new(Function::new(
19889 "IF".to_string(),
19890 vec![is_zero, Expression::number(0), pos_adjusted],
19891 ))))
19892 }
19893 DialectType::DuckDB => {
19894 // CASE WHEN STRPOS(...) = 0 THEN 0 ELSE STRPOS(...) + pos - 1 END
19895 Ok(Expression::Case(Box::new(crate::expressions::Case {
19896 operand: None,
19897 whens: vec![(is_zero, Expression::number(0))],
19898 else_: Some(pos_adjusted),
19899 comments: Vec::new(),
19900 inferred_type: None,
19901 })))
19902 }
19903 _ => Ok(Expression::Function(Box::new(Function::new(
19904 "LOCATE".to_string(),
19905 vec![substr, string, pos],
19906 )))),
19907 }
19908 }
19909 // STRPOS(haystack, needle, occurrence) 3-arg -> INSTR(haystack, needle, 1, occurrence)
19910 "STRPOS"
19911 if f.args.len() == 3
19912 && matches!(
19913 target,
19914 DialectType::BigQuery
19915 | DialectType::Oracle
19916 | DialectType::Teradata
19917 ) =>
19918 {
19919 let mut args = f.args;
19920 let haystack = args.remove(0);
19921 let needle = args.remove(0);
19922 let occurrence = args.remove(0);
19923 Ok(Expression::Function(Box::new(Function::new(
19924 "INSTR".to_string(),
19925 vec![haystack, needle, Expression::number(1), occurrence],
19926 ))))
19927 }
19928 // SCHEMA_NAME(id) -> target-specific
19929 "SCHEMA_NAME" if f.args.len() <= 1 => match target {
19930 DialectType::MySQL | DialectType::SingleStore => {
19931 Ok(Expression::Function(Box::new(Function::new(
19932 "SCHEMA".to_string(),
19933 vec![],
19934 ))))
19935 }
19936 DialectType::PostgreSQL => Ok(Expression::CurrentSchema(Box::new(
19937 crate::expressions::CurrentSchema { this: None },
19938 ))),
19939 DialectType::SQLite => Ok(Expression::string("main")),
19940 _ => Ok(Expression::Function(f)),
19941 },
19942 // STRTOL(str, base) -> FROM_BASE(str, base) for Trino/Presto
19943 "STRTOL" if f.args.len() == 2 => match target {
19944 DialectType::Presto | DialectType::Trino => {
19945 Ok(Expression::Function(Box::new(Function::new(
19946 "FROM_BASE".to_string(),
19947 f.args,
19948 ))))
19949 }
19950 _ => Ok(Expression::Function(f)),
19951 },
19952 // EDITDIST3(a, b) -> LEVENSHTEIN(a, b) for Spark
19953 "EDITDIST3" if f.args.len() == 2 => match target {
19954 DialectType::Spark | DialectType::Databricks => {
19955 Ok(Expression::Function(Box::new(Function::new(
19956 "LEVENSHTEIN".to_string(),
19957 f.args,
19958 ))))
19959 }
19960 _ => Ok(Expression::Function(f)),
19961 },
19962 // FORMAT(num, decimals) from MySQL -> DuckDB FORMAT('{:,.Xf}', num)
19963 "FORMAT"
19964 if f.args.len() == 2
19965 && matches!(
19966 source,
19967 DialectType::MySQL | DialectType::SingleStore
19968 )
19969 && matches!(target, DialectType::DuckDB) =>
19970 {
19971 let mut args = f.args;
19972 let num_expr = args.remove(0);
19973 let decimals_expr = args.remove(0);
19974 // Extract decimal count
19975 let dec_count = match &decimals_expr {
19976 Expression::Literal(lit)
19977 if matches!(lit.as_ref(), Literal::Number(_)) =>
19978 {
19979 let Literal::Number(n) = lit.as_ref() else {
19980 unreachable!()
19981 };
19982 n.clone()
19983 }
19984 _ => "0".to_string(),
19985 };
19986 let fmt_str = format!("{{:,.{}f}}", dec_count);
19987 Ok(Expression::Function(Box::new(Function::new(
19988 "FORMAT".to_string(),
19989 vec![Expression::string(&fmt_str), num_expr],
19990 ))))
19991 }
19992 // FORMAT(x, fmt) from TSQL -> DATE_FORMAT for Spark, or expand short codes
19993 "FORMAT"
19994 if f.args.len() == 2
19995 && matches!(
19996 source,
19997 DialectType::TSQL | DialectType::Fabric
19998 ) =>
19999 {
20000 let val_expr = f.args[0].clone();
20001 let fmt_expr = f.args[1].clone();
20002 // Expand unambiguous .NET single-char date format shortcodes to full patterns.
20003 // Only expand shortcodes that are NOT also valid numeric format specifiers.
20004 // Ambiguous: d, D, f, F, g, G (used for both dates and numbers)
20005 // Unambiguous date: m/M (Month day), t/T (Time), y/Y (Year month)
20006 let (expanded_fmt, is_shortcode) = match &fmt_expr {
20007 Expression::Literal(lit)
20008 if matches!(
20009 lit.as_ref(),
20010 crate::expressions::Literal::String(_)
20011 ) =>
20012 {
20013 let crate::expressions::Literal::String(s) = lit.as_ref()
20014 else {
20015 unreachable!()
20016 };
20017 match s.as_str() {
20018 "m" | "M" => (Expression::string("MMMM d"), true),
20019 "t" => (Expression::string("h:mm tt"), true),
20020 "T" => (Expression::string("h:mm:ss tt"), true),
20021 "y" | "Y" => (Expression::string("MMMM yyyy"), true),
20022 _ => (fmt_expr.clone(), false),
20023 }
20024 }
20025 _ => (fmt_expr.clone(), false),
20026 };
20027 // Check if the format looks like a date format
20028 let is_date_format = is_shortcode
20029 || match &expanded_fmt {
20030 Expression::Literal(lit)
20031 if matches!(
20032 lit.as_ref(),
20033 crate::expressions::Literal::String(_)
20034 ) =>
20035 {
20036 let crate::expressions::Literal::String(s) =
20037 lit.as_ref()
20038 else {
20039 unreachable!()
20040 };
20041 // Date formats typically contain yyyy, MM, dd, MMMM, HH, etc.
20042 s.contains("yyyy")
20043 || s.contains("YYYY")
20044 || s.contains("MM")
20045 || s.contains("dd")
20046 || s.contains("MMMM")
20047 || s.contains("HH")
20048 || s.contains("hh")
20049 || s.contains("ss")
20050 }
20051 _ => false,
20052 };
20053 match target {
20054 DialectType::Spark | DialectType::Databricks => {
20055 let func_name = if is_date_format {
20056 "DATE_FORMAT"
20057 } else {
20058 "FORMAT_NUMBER"
20059 };
20060 Ok(Expression::Function(Box::new(Function::new(
20061 func_name.to_string(),
20062 vec![val_expr, expanded_fmt],
20063 ))))
20064 }
20065 _ => {
20066 // For TSQL and other targets, expand shortcodes but keep FORMAT
20067 if is_shortcode {
20068 Ok(Expression::Function(Box::new(Function::new(
20069 "FORMAT".to_string(),
20070 vec![val_expr, expanded_fmt],
20071 ))))
20072 } else {
20073 Ok(Expression::Function(f))
20074 }
20075 }
20076 }
20077 }
20078 // FORMAT('%s', x) from Trino/Presto -> target-specific
20079 "FORMAT"
20080 if f.args.len() >= 2
20081 && matches!(
20082 source,
20083 DialectType::Trino
20084 | DialectType::Presto
20085 | DialectType::Athena
20086 ) =>
20087 {
20088 let fmt_expr = f.args[0].clone();
20089 let value_args: Vec<Expression> = f.args[1..].to_vec();
20090 match target {
20091 // DuckDB: replace %s with {} in format string
20092 DialectType::DuckDB => {
20093 let new_fmt = match &fmt_expr {
20094 Expression::Literal(lit)
20095 if matches!(lit.as_ref(), Literal::String(_)) =>
20096 {
20097 let Literal::String(s) = lit.as_ref() else {
20098 unreachable!()
20099 };
20100 Expression::Literal(Box::new(Literal::String(
20101 s.replace("%s", "{}"),
20102 )))
20103 }
20104 _ => fmt_expr,
20105 };
20106 let mut args = vec![new_fmt];
20107 args.extend(value_args);
20108 Ok(Expression::Function(Box::new(Function::new(
20109 "FORMAT".to_string(),
20110 args,
20111 ))))
20112 }
20113 // Snowflake: FORMAT('%s', x) -> TO_CHAR(x) when just %s
20114 DialectType::Snowflake => match &fmt_expr {
20115 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(s) if s == "%s" && value_args.len() == 1) =>
20116 {
20117 let Literal::String(_) = lit.as_ref() else {
20118 unreachable!()
20119 };
20120 Ok(Expression::Function(Box::new(Function::new(
20121 "TO_CHAR".to_string(),
20122 value_args,
20123 ))))
20124 }
20125 _ => Ok(Expression::Function(f)),
20126 },
20127 // Default: keep FORMAT as-is
20128 _ => Ok(Expression::Function(f)),
20129 }
20130 }
20131 // LIST_CONTAINS / LIST_HAS / ARRAY_CONTAINS -> target-specific
20132 "LIST_CONTAINS" | "LIST_HAS" | "ARRAY_CONTAINS"
20133 if f.args.len() == 2 =>
20134 {
20135 // When coming from Snowflake source: ARRAY_CONTAINS(value, array)
20136 // args[0]=value, args[1]=array. For DuckDB target, swap and add NULL-aware CASE.
20137 if matches!(target, DialectType::DuckDB)
20138 && matches!(source, DialectType::Snowflake)
20139 && f.name.eq_ignore_ascii_case("ARRAY_CONTAINS")
20140 {
20141 let value = f.args[0].clone();
20142 let array = f.args[1].clone();
20143
20144 // value IS NULL
20145 let value_is_null =
20146 Expression::IsNull(Box::new(crate::expressions::IsNull {
20147 this: value.clone(),
20148 not: false,
20149 postfix_form: false,
20150 }));
20151
20152 // ARRAY_LENGTH(array)
20153 let array_length =
20154 Expression::Function(Box::new(Function::new(
20155 "ARRAY_LENGTH".to_string(),
20156 vec![array.clone()],
20157 )));
20158 // LIST_COUNT(array)
20159 let list_count = Expression::Function(Box::new(Function::new(
20160 "LIST_COUNT".to_string(),
20161 vec![array.clone()],
20162 )));
20163 // ARRAY_LENGTH(array) <> LIST_COUNT(array)
20164 let neq =
20165 Expression::Neq(Box::new(crate::expressions::BinaryOp {
20166 left: array_length,
20167 right: list_count,
20168 left_comments: vec![],
20169 operator_comments: vec![],
20170 trailing_comments: vec![],
20171 inferred_type: None,
20172 }));
20173 // NULLIF(ARRAY_LENGTH(array) <> LIST_COUNT(array), FALSE)
20174 let nullif =
20175 Expression::Nullif(Box::new(crate::expressions::Nullif {
20176 this: Box::new(neq),
20177 expression: Box::new(Expression::Boolean(
20178 crate::expressions::BooleanLiteral { value: false },
20179 )),
20180 }));
20181
20182 // ARRAY_CONTAINS(array, value) - DuckDB syntax: array first, value second
20183 let array_contains =
20184 Expression::Function(Box::new(Function::new(
20185 "ARRAY_CONTAINS".to_string(),
20186 vec![array, value],
20187 )));
20188
20189 // CASE WHEN value IS NULL THEN NULLIF(...) ELSE ARRAY_CONTAINS(array, value) END
20190 return Ok(Expression::Case(Box::new(Case {
20191 operand: None,
20192 whens: vec![(value_is_null, nullif)],
20193 else_: Some(array_contains),
20194 comments: Vec::new(),
20195 inferred_type: None,
20196 })));
20197 }
20198 match target {
20199 DialectType::PostgreSQL | DialectType::Redshift => {
20200 // CASE WHEN needle IS NULL THEN NULL ELSE COALESCE(needle = ANY(arr), FALSE) END
20201 let arr = f.args[0].clone();
20202 let needle = f.args[1].clone();
20203 // Convert [] to ARRAY[] for PostgreSQL
20204 let pg_arr = match arr {
20205 Expression::Array(a) => Expression::ArrayFunc(
20206 Box::new(crate::expressions::ArrayConstructor {
20207 expressions: a.expressions,
20208 bracket_notation: false,
20209 use_list_keyword: false,
20210 }),
20211 ),
20212 _ => arr,
20213 };
20214 // needle = ANY(arr) using the Any quantified expression
20215 let any_expr = Expression::Any(Box::new(
20216 crate::expressions::QuantifiedExpr {
20217 this: needle.clone(),
20218 subquery: pg_arr,
20219 op: Some(crate::expressions::QuantifiedOp::Eq),
20220 },
20221 ));
20222 let coalesce = Expression::Coalesce(Box::new(
20223 crate::expressions::VarArgFunc {
20224 expressions: vec![
20225 any_expr,
20226 Expression::Boolean(
20227 crate::expressions::BooleanLiteral {
20228 value: false,
20229 },
20230 ),
20231 ],
20232 original_name: None,
20233 inferred_type: None,
20234 },
20235 ));
20236 let is_null_check = Expression::IsNull(Box::new(
20237 crate::expressions::IsNull {
20238 this: needle,
20239 not: false,
20240 postfix_form: false,
20241 },
20242 ));
20243 Ok(Expression::Case(Box::new(Case {
20244 operand: None,
20245 whens: vec![(
20246 is_null_check,
20247 Expression::Null(crate::expressions::Null),
20248 )],
20249 else_: Some(coalesce),
20250 comments: Vec::new(),
20251 inferred_type: None,
20252 })))
20253 }
20254 _ => Ok(Expression::Function(Box::new(Function::new(
20255 "ARRAY_CONTAINS".to_string(),
20256 f.args,
20257 )))),
20258 }
20259 }
20260 // LIST_HAS_ANY / ARRAY_HAS_ANY -> target-specific overlap operator
20261 "LIST_HAS_ANY" | "ARRAY_HAS_ANY" if f.args.len() == 2 => {
20262 match target {
20263 DialectType::PostgreSQL | DialectType::Redshift => {
20264 // arr1 && arr2 with ARRAY[] syntax
20265 let mut args = f.args;
20266 let arr1 = args.remove(0);
20267 let arr2 = args.remove(0);
20268 let pg_arr1 = match arr1 {
20269 Expression::Array(a) => Expression::ArrayFunc(
20270 Box::new(crate::expressions::ArrayConstructor {
20271 expressions: a.expressions,
20272 bracket_notation: false,
20273 use_list_keyword: false,
20274 }),
20275 ),
20276 _ => arr1,
20277 };
20278 let pg_arr2 = match arr2 {
20279 Expression::Array(a) => Expression::ArrayFunc(
20280 Box::new(crate::expressions::ArrayConstructor {
20281 expressions: a.expressions,
20282 bracket_notation: false,
20283 use_list_keyword: false,
20284 }),
20285 ),
20286 _ => arr2,
20287 };
20288 Ok(Expression::ArrayOverlaps(Box::new(BinaryOp::new(
20289 pg_arr1, pg_arr2,
20290 ))))
20291 }
20292 DialectType::DuckDB => {
20293 // DuckDB: arr1 && arr2 (native support)
20294 let mut args = f.args;
20295 let arr1 = args.remove(0);
20296 let arr2 = args.remove(0);
20297 Ok(Expression::ArrayOverlaps(Box::new(BinaryOp::new(
20298 arr1, arr2,
20299 ))))
20300 }
20301 _ => Ok(Expression::Function(Box::new(Function::new(
20302 "LIST_HAS_ANY".to_string(),
20303 f.args,
20304 )))),
20305 }
20306 }
20307 // APPROX_QUANTILE(x, q) -> target-specific
20308 "APPROX_QUANTILE" if f.args.len() == 2 => match target {
20309 DialectType::Snowflake => Ok(Expression::Function(Box::new(
20310 Function::new("APPROX_PERCENTILE".to_string(), f.args),
20311 ))),
20312 DialectType::DuckDB => Ok(Expression::Function(f)),
20313 _ => Ok(Expression::Function(f)),
20314 },
20315 // MAKE_DATE(y, m, d) -> DATE(y, m, d) for BigQuery
20316 "MAKE_DATE" if f.args.len() == 3 => match target {
20317 DialectType::BigQuery => Ok(Expression::Function(Box::new(
20318 Function::new("DATE".to_string(), f.args),
20319 ))),
20320 _ => Ok(Expression::Function(f)),
20321 },
20322 // RANGE(start, end[, step]) -> target-specific
20323 "RANGE"
20324 if f.args.len() >= 2 && !matches!(target, DialectType::DuckDB) =>
20325 {
20326 let start = f.args[0].clone();
20327 let end = f.args[1].clone();
20328 let step = f.args.get(2).cloned();
20329 match target {
20330 // Snowflake ARRAY_GENERATE_RANGE uses exclusive end (same as DuckDB RANGE),
20331 // so just rename without adjusting the end argument.
20332 DialectType::Snowflake => {
20333 let mut args = vec![start, end];
20334 if let Some(s) = step {
20335 args.push(s);
20336 }
20337 Ok(Expression::Function(Box::new(Function::new(
20338 "ARRAY_GENERATE_RANGE".to_string(),
20339 args,
20340 ))))
20341 }
20342 DialectType::Spark | DialectType::Databricks => {
20343 // RANGE(start, end) -> SEQUENCE(start, end-1)
20344 // RANGE(start, end, step) -> SEQUENCE(start, end-step, step) when step constant
20345 // RANGE(start, start) -> ARRAY() (empty)
20346 // RANGE(start, end, 0) -> ARRAY() (empty)
20347 // When end is variable: IF((end - 1) <= start, ARRAY(), SEQUENCE(start, (end - 1)))
20348
20349 // Check for constant args
20350 fn extract_i64(e: &Expression) -> Option<i64> {
20351 match e {
20352 Expression::Literal(lit)
20353 if matches!(
20354 lit.as_ref(),
20355 Literal::Number(_)
20356 ) =>
20357 {
20358 let Literal::Number(n) = lit.as_ref() else {
20359 unreachable!()
20360 };
20361 n.parse::<i64>().ok()
20362 }
20363 Expression::Neg(u) => {
20364 if let Expression::Literal(lit) = &u.this {
20365 if let Literal::Number(n) = lit.as_ref() {
20366 n.parse::<i64>().ok().map(|v| -v)
20367 } else {
20368 None
20369 }
20370 } else {
20371 None
20372 }
20373 }
20374 _ => None,
20375 }
20376 }
20377 let start_val = extract_i64(&start);
20378 let end_val = extract_i64(&end);
20379 let step_val = step.as_ref().and_then(|s| extract_i64(s));
20380
20381 // Check for RANGE(x, x) or RANGE(x, y, 0) -> empty array
20382 if step_val == Some(0) {
20383 return Ok(Expression::Function(Box::new(
20384 Function::new("ARRAY".to_string(), vec![]),
20385 )));
20386 }
20387 if let (Some(s), Some(e_val)) = (start_val, end_val) {
20388 if s == e_val {
20389 return Ok(Expression::Function(Box::new(
20390 Function::new("ARRAY".to_string(), vec![]),
20391 )));
20392 }
20393 }
20394
20395 if let (Some(_s_val), Some(e_val)) = (start_val, end_val) {
20396 // All constants - compute new end = end - step (if step provided) or end - 1
20397 match step_val {
20398 Some(st) if st < 0 => {
20399 // Negative step: SEQUENCE(start, end - step, step)
20400 let new_end = e_val - st; // end - step (= end + |step|)
20401 let mut args =
20402 vec![start, Expression::number(new_end)];
20403 if let Some(s) = step {
20404 args.push(s);
20405 }
20406 Ok(Expression::Function(Box::new(
20407 Function::new("SEQUENCE".to_string(), args),
20408 )))
20409 }
20410 Some(st) => {
20411 let new_end = e_val - st;
20412 let mut args =
20413 vec![start, Expression::number(new_end)];
20414 if let Some(s) = step {
20415 args.push(s);
20416 }
20417 Ok(Expression::Function(Box::new(
20418 Function::new("SEQUENCE".to_string(), args),
20419 )))
20420 }
20421 None => {
20422 // No step: SEQUENCE(start, end - 1)
20423 let new_end = e_val - 1;
20424 Ok(Expression::Function(Box::new(
20425 Function::new(
20426 "SEQUENCE".to_string(),
20427 vec![
20428 start,
20429 Expression::number(new_end),
20430 ],
20431 ),
20432 )))
20433 }
20434 }
20435 } else {
20436 // Variable end: IF((end - 1) < start, ARRAY(), SEQUENCE(start, (end - 1)))
20437 let end_m1 = Expression::Sub(Box::new(BinaryOp::new(
20438 end.clone(),
20439 Expression::number(1),
20440 )));
20441 let cond = Expression::Lt(Box::new(BinaryOp::new(
20442 Expression::Paren(Box::new(Paren {
20443 this: end_m1.clone(),
20444 trailing_comments: Vec::new(),
20445 })),
20446 start.clone(),
20447 )));
20448 let empty = Expression::Function(Box::new(
20449 Function::new("ARRAY".to_string(), vec![]),
20450 ));
20451 let mut seq_args = vec![
20452 start,
20453 Expression::Paren(Box::new(Paren {
20454 this: end_m1,
20455 trailing_comments: Vec::new(),
20456 })),
20457 ];
20458 if let Some(s) = step {
20459 seq_args.push(s);
20460 }
20461 let seq = Expression::Function(Box::new(
20462 Function::new("SEQUENCE".to_string(), seq_args),
20463 ));
20464 Ok(Expression::IfFunc(Box::new(
20465 crate::expressions::IfFunc {
20466 condition: cond,
20467 true_value: empty,
20468 false_value: Some(seq),
20469 original_name: None,
20470 inferred_type: None,
20471 },
20472 )))
20473 }
20474 }
20475 DialectType::SQLite => {
20476 // RANGE(start, end) -> GENERATE_SERIES(start, end)
20477 // The subquery wrapping is handled at the Alias level
20478 let mut args = vec![start, end];
20479 if let Some(s) = step {
20480 args.push(s);
20481 }
20482 Ok(Expression::Function(Box::new(Function::new(
20483 "GENERATE_SERIES".to_string(),
20484 args,
20485 ))))
20486 }
20487 _ => Ok(Expression::Function(f)),
20488 }
20489 }
20490 // ARRAY_REVERSE_SORT -> target-specific
20491 // (handled above as well, but also need DuckDB self-normalization)
20492 // MAP_FROM_ARRAYS(keys, values) -> target-specific map construction
20493 "MAP_FROM_ARRAYS" if f.args.len() == 2 => match target {
20494 DialectType::Snowflake => Ok(Expression::Function(Box::new(
20495 Function::new("OBJECT_CONSTRUCT".to_string(), f.args),
20496 ))),
20497 DialectType::Spark | DialectType::Databricks => {
20498 Ok(Expression::Function(Box::new(Function::new(
20499 "MAP_FROM_ARRAYS".to_string(),
20500 f.args,
20501 ))))
20502 }
20503 _ => Ok(Expression::Function(Box::new(Function::new(
20504 "MAP".to_string(),
20505 f.args,
20506 )))),
20507 },
20508 // VARIANCE(x) -> varSamp(x) for ClickHouse
20509 "VARIANCE" if f.args.len() == 1 => match target {
20510 DialectType::ClickHouse => Ok(Expression::Function(Box::new(
20511 Function::new("varSamp".to_string(), f.args),
20512 ))),
20513 _ => Ok(Expression::Function(f)),
20514 },
20515 // STDDEV(x) -> stddevSamp(x) for ClickHouse
20516 "STDDEV" if f.args.len() == 1 => match target {
20517 DialectType::ClickHouse => Ok(Expression::Function(Box::new(
20518 Function::new("stddevSamp".to_string(), f.args),
20519 ))),
20520 _ => Ok(Expression::Function(f)),
20521 },
20522 // ISINF(x) -> IS_INF(x) for BigQuery
20523 "ISINF" if f.args.len() == 1 => match target {
20524 DialectType::BigQuery => Ok(Expression::Function(Box::new(
20525 Function::new("IS_INF".to_string(), f.args),
20526 ))),
20527 _ => Ok(Expression::Function(f)),
20528 },
20529 // CONTAINS(arr, x) -> ARRAY_CONTAINS(arr, x) for Spark/Hive
20530 "CONTAINS" if f.args.len() == 2 => match target {
20531 DialectType::Spark
20532 | DialectType::Databricks
20533 | DialectType::Hive => Ok(Expression::Function(Box::new(
20534 Function::new("ARRAY_CONTAINS".to_string(), f.args),
20535 ))),
20536 _ => Ok(Expression::Function(f)),
20537 },
20538 // ARRAY_CONTAINS(arr, x) -> CONTAINS(arr, x) for Presto
20539 "ARRAY_CONTAINS" if f.args.len() == 2 => match target {
20540 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
20541 Ok(Expression::Function(Box::new(Function::new(
20542 "CONTAINS".to_string(),
20543 f.args,
20544 ))))
20545 }
20546 DialectType::DuckDB => Ok(Expression::Function(Box::new(
20547 Function::new("ARRAY_CONTAINS".to_string(), f.args),
20548 ))),
20549 _ => Ok(Expression::Function(f)),
20550 },
20551 // TO_UNIXTIME(x) -> UNIX_TIMESTAMP(x) for Hive/Spark
20552 "TO_UNIXTIME" if f.args.len() == 1 => match target {
20553 DialectType::Hive
20554 | DialectType::Spark
20555 | DialectType::Databricks => Ok(Expression::Function(Box::new(
20556 Function::new("UNIX_TIMESTAMP".to_string(), f.args),
20557 ))),
20558 _ => Ok(Expression::Function(f)),
20559 },
20560 // FROM_UNIXTIME(x) -> target-specific
20561 "FROM_UNIXTIME" if f.args.len() == 1 => {
20562 match target {
20563 DialectType::Hive
20564 | DialectType::Spark
20565 | DialectType::Databricks
20566 | DialectType::Presto
20567 | DialectType::Trino => Ok(Expression::Function(f)),
20568 DialectType::DuckDB => {
20569 // DuckDB: TO_TIMESTAMP(x)
20570 let arg = f.args.into_iter().next().unwrap();
20571 Ok(Expression::Function(Box::new(Function::new(
20572 "TO_TIMESTAMP".to_string(),
20573 vec![arg],
20574 ))))
20575 }
20576 DialectType::PostgreSQL => {
20577 // PG: TO_TIMESTAMP(col)
20578 let arg = f.args.into_iter().next().unwrap();
20579 Ok(Expression::Function(Box::new(Function::new(
20580 "TO_TIMESTAMP".to_string(),
20581 vec![arg],
20582 ))))
20583 }
20584 DialectType::Redshift => {
20585 // Redshift: (TIMESTAMP 'epoch' + col * INTERVAL '1 SECOND')
20586 let arg = f.args.into_iter().next().unwrap();
20587 let epoch_ts = Expression::Literal(Box::new(
20588 Literal::Timestamp("epoch".to_string()),
20589 ));
20590 let interval = Expression::Interval(Box::new(
20591 crate::expressions::Interval {
20592 this: Some(Expression::string("1 SECOND")),
20593 unit: None,
20594 },
20595 ));
20596 let mul =
20597 Expression::Mul(Box::new(BinaryOp::new(arg, interval)));
20598 let add =
20599 Expression::Add(Box::new(BinaryOp::new(epoch_ts, mul)));
20600 Ok(Expression::Paren(Box::new(crate::expressions::Paren {
20601 this: add,
20602 trailing_comments: Vec::new(),
20603 })))
20604 }
20605 _ => Ok(Expression::Function(f)),
20606 }
20607 }
20608 // FROM_UNIXTIME(x, fmt) with 2 args from Hive/Spark -> target-specific
20609 "FROM_UNIXTIME"
20610 if f.args.len() == 2
20611 && matches!(
20612 source,
20613 DialectType::Hive
20614 | DialectType::Spark
20615 | DialectType::Databricks
20616 ) =>
20617 {
20618 let mut args = f.args;
20619 let unix_ts = args.remove(0);
20620 let fmt_expr = args.remove(0);
20621 match target {
20622 DialectType::DuckDB => {
20623 // DuckDB: STRFTIME(TO_TIMESTAMP(x), c_fmt)
20624 let to_ts = Expression::Function(Box::new(Function::new(
20625 "TO_TIMESTAMP".to_string(),
20626 vec![unix_ts],
20627 )));
20628 if let Expression::Literal(lit) = &fmt_expr {
20629 if let crate::expressions::Literal::String(s) =
20630 lit.as_ref()
20631 {
20632 let c_fmt = Self::hive_format_to_c_format(s);
20633 Ok(Expression::Function(Box::new(Function::new(
20634 "STRFTIME".to_string(),
20635 vec![to_ts, Expression::string(&c_fmt)],
20636 ))))
20637 } else {
20638 Ok(Expression::Function(Box::new(Function::new(
20639 "STRFTIME".to_string(),
20640 vec![to_ts, fmt_expr],
20641 ))))
20642 }
20643 } else {
20644 Ok(Expression::Function(Box::new(Function::new(
20645 "STRFTIME".to_string(),
20646 vec![to_ts, fmt_expr],
20647 ))))
20648 }
20649 }
20650 DialectType::Presto
20651 | DialectType::Trino
20652 | DialectType::Athena => {
20653 // Presto: DATE_FORMAT(FROM_UNIXTIME(x), presto_fmt)
20654 let from_unix =
20655 Expression::Function(Box::new(Function::new(
20656 "FROM_UNIXTIME".to_string(),
20657 vec![unix_ts],
20658 )));
20659 if let Expression::Literal(lit) = &fmt_expr {
20660 if let crate::expressions::Literal::String(s) =
20661 lit.as_ref()
20662 {
20663 let p_fmt = Self::hive_format_to_presto_format(s);
20664 Ok(Expression::Function(Box::new(Function::new(
20665 "DATE_FORMAT".to_string(),
20666 vec![from_unix, Expression::string(&p_fmt)],
20667 ))))
20668 } else {
20669 Ok(Expression::Function(Box::new(Function::new(
20670 "DATE_FORMAT".to_string(),
20671 vec![from_unix, fmt_expr],
20672 ))))
20673 }
20674 } else {
20675 Ok(Expression::Function(Box::new(Function::new(
20676 "DATE_FORMAT".to_string(),
20677 vec![from_unix, fmt_expr],
20678 ))))
20679 }
20680 }
20681 _ => {
20682 // Keep as FROM_UNIXTIME(x, fmt) for other targets
20683 Ok(Expression::Function(Box::new(Function::new(
20684 "FROM_UNIXTIME".to_string(),
20685 vec![unix_ts, fmt_expr],
20686 ))))
20687 }
20688 }
20689 }
20690 // DATEPART(unit, expr) -> EXTRACT(unit FROM expr) for Spark
20691 "DATEPART" | "DATE_PART" if f.args.len() == 2 => {
20692 let unit_str = Self::get_unit_str_static(&f.args[0]);
20693 // Get the raw unit text preserving original case
20694 let raw_unit = match &f.args[0] {
20695 Expression::Identifier(id) => id.name.clone(),
20696 Expression::Var(v) => v.this.clone(),
20697 Expression::Literal(lit)
20698 if matches!(
20699 lit.as_ref(),
20700 crate::expressions::Literal::String(_)
20701 ) =>
20702 {
20703 let crate::expressions::Literal::String(s) = lit.as_ref()
20704 else {
20705 unreachable!()
20706 };
20707 s.clone()
20708 }
20709 Expression::Column(col) => col.name.name.clone(),
20710 _ => unit_str.clone(),
20711 };
20712 match target {
20713 DialectType::TSQL | DialectType::Fabric => {
20714 // Preserve original case of unit for TSQL
20715 let unit_name = match unit_str.as_str() {
20716 "YY" | "YYYY" => "YEAR".to_string(),
20717 "QQ" | "Q" => "QUARTER".to_string(),
20718 "MM" | "M" => "MONTH".to_string(),
20719 "WK" | "WW" => "WEEK".to_string(),
20720 "DD" | "D" | "DY" => "DAY".to_string(),
20721 "HH" => "HOUR".to_string(),
20722 "MI" | "N" => "MINUTE".to_string(),
20723 "SS" | "S" => "SECOND".to_string(),
20724 _ => raw_unit.clone(), // preserve original case
20725 };
20726 let mut args = f.args;
20727 args[0] =
20728 Expression::Identifier(Identifier::new(&unit_name));
20729 Ok(Expression::Function(Box::new(Function::new(
20730 "DATEPART".to_string(),
20731 args,
20732 ))))
20733 }
20734 DialectType::Spark | DialectType::Databricks => {
20735 // DATEPART(unit, expr) -> EXTRACT(unit FROM expr)
20736 // Preserve original case for non-abbreviation units
20737 let unit = match unit_str.as_str() {
20738 "YY" | "YYYY" => "YEAR".to_string(),
20739 "QQ" | "Q" => "QUARTER".to_string(),
20740 "MM" | "M" => "MONTH".to_string(),
20741 "WK" | "WW" => "WEEK".to_string(),
20742 "DD" | "D" | "DY" => "DAY".to_string(),
20743 "HH" => "HOUR".to_string(),
20744 "MI" | "N" => "MINUTE".to_string(),
20745 "SS" | "S" => "SECOND".to_string(),
20746 _ => raw_unit, // preserve original case
20747 };
20748 Ok(Expression::Extract(Box::new(
20749 crate::expressions::ExtractFunc {
20750 this: f.args[1].clone(),
20751 field: crate::expressions::DateTimeField::Custom(
20752 unit,
20753 ),
20754 },
20755 )))
20756 }
20757 _ => Ok(Expression::Function(Box::new(Function::new(
20758 "DATE_PART".to_string(),
20759 f.args,
20760 )))),
20761 }
20762 }
20763 // DATENAME(mm, date) -> FORMAT(CAST(date AS DATETIME2), 'MMMM') for TSQL
20764 // DATENAME(dw, date) -> FORMAT(CAST(date AS DATETIME2), 'dddd') for TSQL
20765 // DATENAME(mm, date) -> DATE_FORMAT(CAST(date AS TIMESTAMP), 'MMMM') for Spark
20766 // DATENAME(dw, date) -> DATE_FORMAT(CAST(date AS TIMESTAMP), 'EEEE') for Spark
20767 "DATENAME" if f.args.len() == 2 => {
20768 let unit_str = Self::get_unit_str_static(&f.args[0]);
20769 let date_expr = f.args[1].clone();
20770 match unit_str.as_str() {
20771 "MM" | "M" | "MONTH" => match target {
20772 DialectType::TSQL => {
20773 let cast_date = Expression::Cast(Box::new(
20774 crate::expressions::Cast {
20775 this: date_expr,
20776 to: DataType::Custom {
20777 name: "DATETIME2".to_string(),
20778 },
20779 trailing_comments: Vec::new(),
20780 double_colon_syntax: false,
20781 format: None,
20782 default: None,
20783 inferred_type: None,
20784 },
20785 ));
20786 Ok(Expression::Function(Box::new(Function::new(
20787 "FORMAT".to_string(),
20788 vec![cast_date, Expression::string("MMMM")],
20789 ))))
20790 }
20791 DialectType::Spark | DialectType::Databricks => {
20792 let cast_date = Expression::Cast(Box::new(
20793 crate::expressions::Cast {
20794 this: date_expr,
20795 to: DataType::Timestamp {
20796 timezone: false,
20797 precision: None,
20798 },
20799 trailing_comments: Vec::new(),
20800 double_colon_syntax: false,
20801 format: None,
20802 default: None,
20803 inferred_type: None,
20804 },
20805 ));
20806 Ok(Expression::Function(Box::new(Function::new(
20807 "DATE_FORMAT".to_string(),
20808 vec![cast_date, Expression::string("MMMM")],
20809 ))))
20810 }
20811 _ => Ok(Expression::Function(f)),
20812 },
20813 "DW" | "WEEKDAY" => match target {
20814 DialectType::TSQL => {
20815 let cast_date = Expression::Cast(Box::new(
20816 crate::expressions::Cast {
20817 this: date_expr,
20818 to: DataType::Custom {
20819 name: "DATETIME2".to_string(),
20820 },
20821 trailing_comments: Vec::new(),
20822 double_colon_syntax: false,
20823 format: None,
20824 default: None,
20825 inferred_type: None,
20826 },
20827 ));
20828 Ok(Expression::Function(Box::new(Function::new(
20829 "FORMAT".to_string(),
20830 vec![cast_date, Expression::string("dddd")],
20831 ))))
20832 }
20833 DialectType::Spark | DialectType::Databricks => {
20834 let cast_date = Expression::Cast(Box::new(
20835 crate::expressions::Cast {
20836 this: date_expr,
20837 to: DataType::Timestamp {
20838 timezone: false,
20839 precision: None,
20840 },
20841 trailing_comments: Vec::new(),
20842 double_colon_syntax: false,
20843 format: None,
20844 default: None,
20845 inferred_type: None,
20846 },
20847 ));
20848 Ok(Expression::Function(Box::new(Function::new(
20849 "DATE_FORMAT".to_string(),
20850 vec![cast_date, Expression::string("EEEE")],
20851 ))))
20852 }
20853 _ => Ok(Expression::Function(f)),
20854 },
20855 _ => Ok(Expression::Function(f)),
20856 }
20857 }
20858 // STRING_AGG(x, sep) without WITHIN GROUP -> target-specific
20859 "STRING_AGG" if f.args.len() >= 2 => {
20860 let x = f.args[0].clone();
20861 let sep = f.args[1].clone();
20862 match target {
20863 DialectType::MySQL
20864 | DialectType::SingleStore
20865 | DialectType::Doris
20866 | DialectType::StarRocks => Ok(Expression::GroupConcat(
20867 Box::new(crate::expressions::GroupConcatFunc {
20868 this: x,
20869 separator: Some(sep),
20870 order_by: None,
20871 distinct: false,
20872 filter: None,
20873 limit: None,
20874 inferred_type: None,
20875 }),
20876 )),
20877 DialectType::SQLite => Ok(Expression::GroupConcat(Box::new(
20878 crate::expressions::GroupConcatFunc {
20879 this: x,
20880 separator: Some(sep),
20881 order_by: None,
20882 distinct: false,
20883 filter: None,
20884 limit: None,
20885 inferred_type: None,
20886 },
20887 ))),
20888 DialectType::PostgreSQL | DialectType::Redshift => {
20889 Ok(Expression::StringAgg(Box::new(
20890 crate::expressions::StringAggFunc {
20891 this: x,
20892 separator: Some(sep),
20893 order_by: None,
20894 distinct: false,
20895 filter: None,
20896 limit: None,
20897 inferred_type: None,
20898 },
20899 )))
20900 }
20901 _ => Ok(Expression::Function(f)),
20902 }
20903 }
20904 "TRY_DIVIDE" if f.args.len() == 2 => {
20905 let mut args = f.args;
20906 let x = args.remove(0);
20907 let y = args.remove(0);
20908 match target {
20909 DialectType::Spark | DialectType::Databricks => {
20910 Ok(Expression::Function(Box::new(Function::new(
20911 "TRY_DIVIDE".to_string(),
20912 vec![x, y],
20913 ))))
20914 }
20915 DialectType::Snowflake => {
20916 let y_ref = match &y {
20917 Expression::Column(_)
20918 | Expression::Literal(_)
20919 | Expression::Identifier(_) => y.clone(),
20920 _ => Expression::Paren(Box::new(Paren {
20921 this: y.clone(),
20922 trailing_comments: vec![],
20923 })),
20924 };
20925 let x_ref = match &x {
20926 Expression::Column(_)
20927 | Expression::Literal(_)
20928 | Expression::Identifier(_) => x.clone(),
20929 _ => Expression::Paren(Box::new(Paren {
20930 this: x.clone(),
20931 trailing_comments: vec![],
20932 })),
20933 };
20934 let condition = Expression::Neq(Box::new(
20935 crate::expressions::BinaryOp::new(
20936 y_ref.clone(),
20937 Expression::number(0),
20938 ),
20939 ));
20940 let div_expr = Expression::Div(Box::new(
20941 crate::expressions::BinaryOp::new(x_ref, y_ref),
20942 ));
20943 Ok(Expression::IfFunc(Box::new(
20944 crate::expressions::IfFunc {
20945 condition,
20946 true_value: div_expr,
20947 false_value: Some(Expression::Null(Null)),
20948 original_name: Some("IFF".to_string()),
20949 inferred_type: None,
20950 },
20951 )))
20952 }
20953 DialectType::DuckDB => {
20954 let y_ref = match &y {
20955 Expression::Column(_)
20956 | Expression::Literal(_)
20957 | Expression::Identifier(_) => y.clone(),
20958 _ => Expression::Paren(Box::new(Paren {
20959 this: y.clone(),
20960 trailing_comments: vec![],
20961 })),
20962 };
20963 let x_ref = match &x {
20964 Expression::Column(_)
20965 | Expression::Literal(_)
20966 | Expression::Identifier(_) => x.clone(),
20967 _ => Expression::Paren(Box::new(Paren {
20968 this: x.clone(),
20969 trailing_comments: vec![],
20970 })),
20971 };
20972 let condition = Expression::Neq(Box::new(
20973 crate::expressions::BinaryOp::new(
20974 y_ref.clone(),
20975 Expression::number(0),
20976 ),
20977 ));
20978 let div_expr = Expression::Div(Box::new(
20979 crate::expressions::BinaryOp::new(x_ref, y_ref),
20980 ));
20981 Ok(Expression::Case(Box::new(Case {
20982 operand: None,
20983 whens: vec![(condition, div_expr)],
20984 else_: Some(Expression::Null(Null)),
20985 comments: Vec::new(),
20986 inferred_type: None,
20987 })))
20988 }
20989 _ => Ok(Expression::Function(Box::new(Function::new(
20990 "TRY_DIVIDE".to_string(),
20991 vec![x, y],
20992 )))),
20993 }
20994 }
20995 // JSON_ARRAYAGG -> JSON_AGG for PostgreSQL
20996 "JSON_ARRAYAGG" => match target {
20997 DialectType::PostgreSQL => {
20998 Ok(Expression::Function(Box::new(Function {
20999 name: "JSON_AGG".to_string(),
21000 ..(*f)
21001 })))
21002 }
21003 _ => Ok(Expression::Function(f)),
21004 },
21005 // SCHEMA_NAME(id) -> CURRENT_SCHEMA for PostgreSQL, 'main' for SQLite
21006 "SCHEMA_NAME" => match target {
21007 DialectType::PostgreSQL => Ok(Expression::CurrentSchema(Box::new(
21008 crate::expressions::CurrentSchema { this: None },
21009 ))),
21010 DialectType::SQLite => Ok(Expression::string("main")),
21011 _ => Ok(Expression::Function(f)),
21012 },
21013 // TO_TIMESTAMP(x, fmt) 2-arg from Spark/Hive: convert Java format to target format
21014 "TO_TIMESTAMP"
21015 if f.args.len() == 2
21016 && matches!(
21017 source,
21018 DialectType::Spark
21019 | DialectType::Databricks
21020 | DialectType::Hive
21021 )
21022 && matches!(target, DialectType::DuckDB) =>
21023 {
21024 let mut args = f.args;
21025 let val = args.remove(0);
21026 let fmt_expr = args.remove(0);
21027 if let Expression::Literal(ref lit) = fmt_expr {
21028 if let Literal::String(ref s) = lit.as_ref() {
21029 // Convert Java/Spark format to C strptime format
21030 fn java_to_c_fmt(fmt: &str) -> String {
21031 let result = fmt
21032 .replace("yyyy", "%Y")
21033 .replace("SSSSSS", "%f")
21034 .replace("EEEE", "%W")
21035 .replace("MM", "%m")
21036 .replace("dd", "%d")
21037 .replace("HH", "%H")
21038 .replace("mm", "%M")
21039 .replace("ss", "%S")
21040 .replace("yy", "%y");
21041 let mut out = String::new();
21042 let chars: Vec<char> = result.chars().collect();
21043 let mut i = 0;
21044 while i < chars.len() {
21045 if chars[i] == '%' && i + 1 < chars.len() {
21046 out.push(chars[i]);
21047 out.push(chars[i + 1]);
21048 i += 2;
21049 } else if chars[i] == 'z' {
21050 out.push_str("%Z");
21051 i += 1;
21052 } else if chars[i] == 'Z' {
21053 out.push_str("%z");
21054 i += 1;
21055 } else {
21056 out.push(chars[i]);
21057 i += 1;
21058 }
21059 }
21060 out
21061 }
21062 let c_fmt = java_to_c_fmt(s);
21063 Ok(Expression::Function(Box::new(Function::new(
21064 "STRPTIME".to_string(),
21065 vec![val, Expression::string(&c_fmt)],
21066 ))))
21067 } else {
21068 Ok(Expression::Function(Box::new(Function::new(
21069 "STRPTIME".to_string(),
21070 vec![val, fmt_expr],
21071 ))))
21072 }
21073 } else {
21074 Ok(Expression::Function(Box::new(Function::new(
21075 "STRPTIME".to_string(),
21076 vec![val, fmt_expr],
21077 ))))
21078 }
21079 }
21080 // TO_DATE(x) 1-arg from Doris: date conversion
21081 "TO_DATE"
21082 if f.args.len() == 1
21083 && matches!(
21084 source,
21085 DialectType::Doris | DialectType::StarRocks
21086 ) =>
21087 {
21088 let arg = f.args.into_iter().next().unwrap();
21089 match target {
21090 DialectType::Oracle
21091 | DialectType::DuckDB
21092 | DialectType::TSQL => {
21093 // CAST(x AS DATE)
21094 Ok(Expression::Cast(Box::new(Cast {
21095 this: arg,
21096 to: DataType::Date,
21097 double_colon_syntax: false,
21098 trailing_comments: vec![],
21099 format: None,
21100 default: None,
21101 inferred_type: None,
21102 })))
21103 }
21104 DialectType::MySQL | DialectType::SingleStore => {
21105 // DATE(x)
21106 Ok(Expression::Function(Box::new(Function::new(
21107 "DATE".to_string(),
21108 vec![arg],
21109 ))))
21110 }
21111 _ => {
21112 // Default: keep as TO_DATE(x) (Spark, PostgreSQL, etc.)
21113 Ok(Expression::Function(Box::new(Function::new(
21114 "TO_DATE".to_string(),
21115 vec![arg],
21116 ))))
21117 }
21118 }
21119 }
21120 // TO_DATE(x) 1-arg from Spark/Hive: safe date conversion
21121 "TO_DATE"
21122 if f.args.len() == 1
21123 && matches!(
21124 source,
21125 DialectType::Spark
21126 | DialectType::Databricks
21127 | DialectType::Hive
21128 ) =>
21129 {
21130 let arg = f.args.into_iter().next().unwrap();
21131 match target {
21132 DialectType::DuckDB => {
21133 // Spark TO_DATE is safe -> TRY_CAST(x AS DATE)
21134 Ok(Expression::TryCast(Box::new(Cast {
21135 this: arg,
21136 to: DataType::Date,
21137 double_colon_syntax: false,
21138 trailing_comments: vec![],
21139 format: None,
21140 default: None,
21141 inferred_type: None,
21142 })))
21143 }
21144 DialectType::Presto
21145 | DialectType::Trino
21146 | DialectType::Athena => {
21147 // CAST(CAST(x AS TIMESTAMP) AS DATE)
21148 Ok(Self::double_cast_timestamp_date(arg))
21149 }
21150 DialectType::Snowflake => {
21151 // Spark's TO_DATE is safe -> TRY_TO_DATE(x, 'yyyy-mm-DD')
21152 // The default Spark format 'yyyy-MM-dd' maps to Snowflake 'yyyy-mm-DD'
21153 Ok(Expression::Function(Box::new(Function::new(
21154 "TRY_TO_DATE".to_string(),
21155 vec![arg, Expression::string("yyyy-mm-DD")],
21156 ))))
21157 }
21158 _ => {
21159 // Default: keep as TO_DATE(x)
21160 Ok(Expression::Function(Box::new(Function::new(
21161 "TO_DATE".to_string(),
21162 vec![arg],
21163 ))))
21164 }
21165 }
21166 }
21167 // TO_DATE(x, fmt) 2-arg from Spark/Hive: format-based date conversion
21168 "TO_DATE"
21169 if f.args.len() == 2
21170 && matches!(
21171 source,
21172 DialectType::Spark
21173 | DialectType::Databricks
21174 | DialectType::Hive
21175 ) =>
21176 {
21177 let mut args = f.args;
21178 let val = args.remove(0);
21179 let fmt_expr = args.remove(0);
21180 let is_default_format = matches!(&fmt_expr, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(s) if s == "yyyy-MM-dd"));
21181
21182 if is_default_format {
21183 // Default format: same as 1-arg form
21184 match target {
21185 DialectType::DuckDB => {
21186 Ok(Expression::TryCast(Box::new(Cast {
21187 this: val,
21188 to: DataType::Date,
21189 double_colon_syntax: false,
21190 trailing_comments: vec![],
21191 format: None,
21192 default: None,
21193 inferred_type: None,
21194 })))
21195 }
21196 DialectType::Presto
21197 | DialectType::Trino
21198 | DialectType::Athena => {
21199 Ok(Self::double_cast_timestamp_date(val))
21200 }
21201 DialectType::Snowflake => {
21202 // TRY_TO_DATE(x, format) with Snowflake format mapping
21203 let sf_fmt = "yyyy-MM-dd"
21204 .replace("yyyy", "yyyy")
21205 .replace("MM", "mm")
21206 .replace("dd", "DD");
21207 Ok(Expression::Function(Box::new(Function::new(
21208 "TRY_TO_DATE".to_string(),
21209 vec![val, Expression::string(&sf_fmt)],
21210 ))))
21211 }
21212 _ => Ok(Expression::Function(Box::new(Function::new(
21213 "TO_DATE".to_string(),
21214 vec![val],
21215 )))),
21216 }
21217 } else {
21218 // Non-default format: use format-based parsing
21219 if let Expression::Literal(ref lit) = fmt_expr {
21220 if let Literal::String(ref s) = lit.as_ref() {
21221 match target {
21222 DialectType::DuckDB => {
21223 // CAST(CAST(TRY_STRPTIME(x, c_fmt) AS TIMESTAMP) AS DATE)
21224 fn java_to_c_fmt_todate(fmt: &str) -> String {
21225 let result = fmt
21226 .replace("yyyy", "%Y")
21227 .replace("SSSSSS", "%f")
21228 .replace("EEEE", "%W")
21229 .replace("MM", "%m")
21230 .replace("dd", "%d")
21231 .replace("HH", "%H")
21232 .replace("mm", "%M")
21233 .replace("ss", "%S")
21234 .replace("yy", "%y");
21235 let mut out = String::new();
21236 let chars: Vec<char> =
21237 result.chars().collect();
21238 let mut i = 0;
21239 while i < chars.len() {
21240 if chars[i] == '%'
21241 && i + 1 < chars.len()
21242 {
21243 out.push(chars[i]);
21244 out.push(chars[i + 1]);
21245 i += 2;
21246 } else if chars[i] == 'z' {
21247 out.push_str("%Z");
21248 i += 1;
21249 } else if chars[i] == 'Z' {
21250 out.push_str("%z");
21251 i += 1;
21252 } else {
21253 out.push(chars[i]);
21254 i += 1;
21255 }
21256 }
21257 out
21258 }
21259 let c_fmt = java_to_c_fmt_todate(s);
21260 // CAST(CAST(TRY_STRPTIME(x, fmt) AS TIMESTAMP) AS DATE)
21261 let try_strptime = Expression::Function(
21262 Box::new(Function::new(
21263 "TRY_STRPTIME".to_string(),
21264 vec![val, Expression::string(&c_fmt)],
21265 )),
21266 );
21267 let cast_ts =
21268 Expression::Cast(Box::new(Cast {
21269 this: try_strptime,
21270 to: DataType::Timestamp {
21271 precision: None,
21272 timezone: false,
21273 },
21274 double_colon_syntax: false,
21275 trailing_comments: vec![],
21276 format: None,
21277 default: None,
21278 inferred_type: None,
21279 }));
21280 Ok(Expression::Cast(Box::new(Cast {
21281 this: cast_ts,
21282 to: DataType::Date,
21283 double_colon_syntax: false,
21284 trailing_comments: vec![],
21285 format: None,
21286 default: None,
21287 inferred_type: None,
21288 })))
21289 }
21290 DialectType::Presto
21291 | DialectType::Trino
21292 | DialectType::Athena => {
21293 // CAST(DATE_PARSE(x, presto_fmt) AS DATE)
21294 let p_fmt = s
21295 .replace("yyyy", "%Y")
21296 .replace("SSSSSS", "%f")
21297 .replace("MM", "%m")
21298 .replace("dd", "%d")
21299 .replace("HH", "%H")
21300 .replace("mm", "%M")
21301 .replace("ss", "%S")
21302 .replace("yy", "%y");
21303 let date_parse = Expression::Function(
21304 Box::new(Function::new(
21305 "DATE_PARSE".to_string(),
21306 vec![val, Expression::string(&p_fmt)],
21307 )),
21308 );
21309 Ok(Expression::Cast(Box::new(Cast {
21310 this: date_parse,
21311 to: DataType::Date,
21312 double_colon_syntax: false,
21313 trailing_comments: vec![],
21314 format: None,
21315 default: None,
21316 inferred_type: None,
21317 })))
21318 }
21319 DialectType::Snowflake => {
21320 // TRY_TO_DATE(x, snowflake_fmt)
21321 Ok(Expression::Function(Box::new(
21322 Function::new(
21323 "TRY_TO_DATE".to_string(),
21324 vec![val, Expression::string(s)],
21325 ),
21326 )))
21327 }
21328 _ => Ok(Expression::Function(Box::new(
21329 Function::new(
21330 "TO_DATE".to_string(),
21331 vec![val, fmt_expr],
21332 ),
21333 ))),
21334 }
21335 } else {
21336 Ok(Expression::Function(Box::new(Function::new(
21337 "TO_DATE".to_string(),
21338 vec![val, fmt_expr],
21339 ))))
21340 }
21341 } else {
21342 Ok(Expression::Function(Box::new(Function::new(
21343 "TO_DATE".to_string(),
21344 vec![val, fmt_expr],
21345 ))))
21346 }
21347 }
21348 }
21349 // TO_TIMESTAMP(x) 1-arg: epoch conversion
21350 "TO_TIMESTAMP"
21351 if f.args.len() == 1
21352 && matches!(source, DialectType::DuckDB)
21353 && matches!(
21354 target,
21355 DialectType::BigQuery
21356 | DialectType::Presto
21357 | DialectType::Trino
21358 | DialectType::Hive
21359 | DialectType::Spark
21360 | DialectType::Databricks
21361 | DialectType::Athena
21362 ) =>
21363 {
21364 let arg = f.args.into_iter().next().unwrap();
21365 let func_name = match target {
21366 DialectType::BigQuery => "TIMESTAMP_SECONDS",
21367 DialectType::Presto
21368 | DialectType::Trino
21369 | DialectType::Athena
21370 | DialectType::Hive
21371 | DialectType::Spark
21372 | DialectType::Databricks => "FROM_UNIXTIME",
21373 _ => "TO_TIMESTAMP",
21374 };
21375 Ok(Expression::Function(Box::new(Function::new(
21376 func_name.to_string(),
21377 vec![arg],
21378 ))))
21379 }
21380 // CONCAT(x) single-arg: -> CONCAT(COALESCE(x, '')) for Spark
21381 "CONCAT" if f.args.len() == 1 => {
21382 let arg = f.args.into_iter().next().unwrap();
21383 match target {
21384 DialectType::Presto
21385 | DialectType::Trino
21386 | DialectType::Athena => {
21387 // CONCAT(a) -> CAST(a AS VARCHAR)
21388 Ok(Expression::Cast(Box::new(Cast {
21389 this: arg,
21390 to: DataType::VarChar {
21391 length: None,
21392 parenthesized_length: false,
21393 },
21394 trailing_comments: vec![],
21395 double_colon_syntax: false,
21396 format: None,
21397 default: None,
21398 inferred_type: None,
21399 })))
21400 }
21401 DialectType::TSQL => {
21402 // CONCAT(a) -> a
21403 Ok(arg)
21404 }
21405 DialectType::DuckDB => {
21406 // Keep CONCAT(a) for DuckDB (native support)
21407 Ok(Expression::Function(Box::new(Function::new(
21408 "CONCAT".to_string(),
21409 vec![arg],
21410 ))))
21411 }
21412 DialectType::Spark | DialectType::Databricks => {
21413 let coalesced = Expression::Coalesce(Box::new(
21414 crate::expressions::VarArgFunc {
21415 expressions: vec![arg, Expression::string("")],
21416 original_name: None,
21417 inferred_type: None,
21418 },
21419 ));
21420 Ok(Expression::Function(Box::new(Function::new(
21421 "CONCAT".to_string(),
21422 vec![coalesced],
21423 ))))
21424 }
21425 _ => Ok(Expression::Function(Box::new(Function::new(
21426 "CONCAT".to_string(),
21427 vec![arg],
21428 )))),
21429 }
21430 }
21431 // REGEXP_EXTRACT(a, p) 2-arg: BigQuery default group is 0 (no 3rd arg needed)
21432 "REGEXP_EXTRACT"
21433 if f.args.len() == 3 && matches!(target, DialectType::BigQuery) =>
21434 {
21435 // If group_index is 0, drop it
21436 let drop_group = match &f.args[2] {
21437 Expression::Literal(lit)
21438 if matches!(lit.as_ref(), Literal::Number(_)) =>
21439 {
21440 let Literal::Number(n) = lit.as_ref() else {
21441 unreachable!()
21442 };
21443 n == "0"
21444 }
21445 _ => false,
21446 };
21447 if drop_group {
21448 let mut args = f.args;
21449 args.truncate(2);
21450 Ok(Expression::Function(Box::new(Function::new(
21451 "REGEXP_EXTRACT".to_string(),
21452 args,
21453 ))))
21454 } else {
21455 Ok(Expression::Function(f))
21456 }
21457 }
21458 // REGEXP_EXTRACT(a, pattern, group, flags) 4-arg -> REGEXP_SUBSTR for Snowflake
21459 "REGEXP_EXTRACT"
21460 if f.args.len() == 4
21461 && matches!(target, DialectType::Snowflake) =>
21462 {
21463 // REGEXP_EXTRACT(a, 'pattern', 2, 'i') -> REGEXP_SUBSTR(a, 'pattern', 1, 1, 'i', 2)
21464 let mut args = f.args;
21465 let this = args.remove(0);
21466 let pattern = args.remove(0);
21467 let group = args.remove(0);
21468 let flags = args.remove(0);
21469 Ok(Expression::Function(Box::new(Function::new(
21470 "REGEXP_SUBSTR".to_string(),
21471 vec![
21472 this,
21473 pattern,
21474 Expression::number(1),
21475 Expression::number(1),
21476 flags,
21477 group,
21478 ],
21479 ))))
21480 }
21481 // REGEXP_SUBSTR(a, pattern, position) 3-arg -> REGEXP_EXTRACT(SUBSTRING(a, pos), pattern)
21482 "REGEXP_SUBSTR"
21483 if f.args.len() == 3
21484 && matches!(
21485 target,
21486 DialectType::DuckDB
21487 | DialectType::Presto
21488 | DialectType::Trino
21489 | DialectType::Spark
21490 | DialectType::Databricks
21491 ) =>
21492 {
21493 let mut args = f.args;
21494 let this = args.remove(0);
21495 let pattern = args.remove(0);
21496 let position = args.remove(0);
21497 // Wrap subject in SUBSTRING(this, position) to apply the offset
21498 let substring_expr = Expression::Function(Box::new(Function::new(
21499 "SUBSTRING".to_string(),
21500 vec![this, position],
21501 )));
21502 let target_name = match target {
21503 DialectType::DuckDB => "REGEXP_EXTRACT",
21504 _ => "REGEXP_EXTRACT",
21505 };
21506 Ok(Expression::Function(Box::new(Function::new(
21507 target_name.to_string(),
21508 vec![substring_expr, pattern],
21509 ))))
21510 }
21511 // TO_DAYS(x) -> (DATEDIFF(x, '0000-01-01') + 1) or target-specific
21512 "TO_DAYS" if f.args.len() == 1 => {
21513 let x = f.args.into_iter().next().unwrap();
21514 let epoch = Expression::string("0000-01-01");
21515 // Build the final target-specific expression directly
21516 let datediff_expr = match target {
21517 DialectType::MySQL | DialectType::SingleStore => {
21518 // MySQL: (DATEDIFF(x, '0000-01-01') + 1)
21519 Expression::Function(Box::new(Function::new(
21520 "DATEDIFF".to_string(),
21521 vec![x, epoch],
21522 )))
21523 }
21524 DialectType::DuckDB => {
21525 // DuckDB: (DATE_DIFF('DAY', CAST('0000-01-01' AS DATE), CAST(x AS DATE)) + 1)
21526 let cast_epoch = Expression::Cast(Box::new(Cast {
21527 this: epoch,
21528 to: DataType::Date,
21529 trailing_comments: Vec::new(),
21530 double_colon_syntax: false,
21531 format: None,
21532 default: None,
21533 inferred_type: None,
21534 }));
21535 let cast_x = Expression::Cast(Box::new(Cast {
21536 this: x,
21537 to: DataType::Date,
21538 trailing_comments: Vec::new(),
21539 double_colon_syntax: false,
21540 format: None,
21541 default: None,
21542 inferred_type: None,
21543 }));
21544 Expression::Function(Box::new(Function::new(
21545 "DATE_DIFF".to_string(),
21546 vec![Expression::string("DAY"), cast_epoch, cast_x],
21547 )))
21548 }
21549 DialectType::Presto
21550 | DialectType::Trino
21551 | DialectType::Athena => {
21552 // Presto: (DATE_DIFF('DAY', CAST(CAST('0000-01-01' AS TIMESTAMP) AS DATE), CAST(CAST(x AS TIMESTAMP) AS DATE)) + 1)
21553 let cast_epoch = Self::double_cast_timestamp_date(epoch);
21554 let cast_x = Self::double_cast_timestamp_date(x);
21555 Expression::Function(Box::new(Function::new(
21556 "DATE_DIFF".to_string(),
21557 vec![Expression::string("DAY"), cast_epoch, cast_x],
21558 )))
21559 }
21560 _ => {
21561 // Default: (DATEDIFF(x, '0000-01-01') + 1)
21562 Expression::Function(Box::new(Function::new(
21563 "DATEDIFF".to_string(),
21564 vec![x, epoch],
21565 )))
21566 }
21567 };
21568 let add_one = Expression::Add(Box::new(BinaryOp::new(
21569 datediff_expr,
21570 Expression::number(1),
21571 )));
21572 Ok(Expression::Paren(Box::new(crate::expressions::Paren {
21573 this: add_one,
21574 trailing_comments: Vec::new(),
21575 })))
21576 }
21577 // STR_TO_DATE(x, format) -> DATE_PARSE / STRPTIME / TO_DATE etc.
21578 "STR_TO_DATE"
21579 if f.args.len() == 2
21580 && matches!(
21581 target,
21582 DialectType::Presto | DialectType::Trino
21583 ) =>
21584 {
21585 let mut args = f.args;
21586 let x = args.remove(0);
21587 let format_expr = args.remove(0);
21588 // Check if the format contains time components
21589 let has_time = if let Expression::Literal(ref lit) = format_expr {
21590 if let Literal::String(ref fmt) = lit.as_ref() {
21591 fmt.contains("%H")
21592 || fmt.contains("%T")
21593 || fmt.contains("%M")
21594 || fmt.contains("%S")
21595 || fmt.contains("%I")
21596 || fmt.contains("%p")
21597 } else {
21598 false
21599 }
21600 } else {
21601 false
21602 };
21603 let date_parse = Expression::Function(Box::new(Function::new(
21604 "DATE_PARSE".to_string(),
21605 vec![x, format_expr],
21606 )));
21607 if has_time {
21608 // Has time components: just DATE_PARSE
21609 Ok(date_parse)
21610 } else {
21611 // Date-only: CAST(DATE_PARSE(...) AS DATE)
21612 Ok(Expression::Cast(Box::new(Cast {
21613 this: date_parse,
21614 to: DataType::Date,
21615 trailing_comments: Vec::new(),
21616 double_colon_syntax: false,
21617 format: None,
21618 default: None,
21619 inferred_type: None,
21620 })))
21621 }
21622 }
21623 "STR_TO_DATE"
21624 if f.args.len() == 2
21625 && matches!(
21626 target,
21627 DialectType::PostgreSQL | DialectType::Redshift
21628 ) =>
21629 {
21630 let mut args = f.args;
21631 let x = args.remove(0);
21632 let fmt = args.remove(0);
21633 let pg_fmt = match fmt {
21634 Expression::Literal(lit)
21635 if matches!(lit.as_ref(), Literal::String(_)) =>
21636 {
21637 let Literal::String(s) = lit.as_ref() else {
21638 unreachable!()
21639 };
21640 Expression::string(
21641 &s.replace("%Y", "YYYY")
21642 .replace("%m", "MM")
21643 .replace("%d", "DD")
21644 .replace("%H", "HH24")
21645 .replace("%M", "MI")
21646 .replace("%S", "SS"),
21647 )
21648 }
21649 other => other,
21650 };
21651 let to_date = Expression::Function(Box::new(Function::new(
21652 "TO_DATE".to_string(),
21653 vec![x, pg_fmt],
21654 )));
21655 Ok(Expression::Cast(Box::new(Cast {
21656 this: to_date,
21657 to: DataType::Timestamp {
21658 timezone: false,
21659 precision: None,
21660 },
21661 trailing_comments: Vec::new(),
21662 double_colon_syntax: false,
21663 format: None,
21664 default: None,
21665 inferred_type: None,
21666 })))
21667 }
21668 // RANGE(start, end) -> GENERATE_SERIES for SQLite
21669 "RANGE"
21670 if (f.args.len() == 1 || f.args.len() == 2)
21671 && matches!(target, DialectType::SQLite) =>
21672 {
21673 if f.args.len() == 2 {
21674 // RANGE(start, end) -> (SELECT value AS col_alias FROM GENERATE_SERIES(start, end))
21675 // For SQLite, RANGE is exclusive on end, GENERATE_SERIES is inclusive
21676 let mut args = f.args;
21677 let start = args.remove(0);
21678 let end = args.remove(0);
21679 Ok(Expression::Function(Box::new(Function::new(
21680 "GENERATE_SERIES".to_string(),
21681 vec![start, end],
21682 ))))
21683 } else {
21684 Ok(Expression::Function(f))
21685 }
21686 }
21687 // UNIFORM(low, high[, seed]) -> UNIFORM(low, high, RANDOM([seed])) for Snowflake
21688 // When source is Snowflake, keep as-is (args already in correct form)
21689 "UNIFORM"
21690 if matches!(target, DialectType::Snowflake)
21691 && (f.args.len() == 2 || f.args.len() == 3) =>
21692 {
21693 if matches!(source, DialectType::Snowflake) {
21694 // Snowflake -> Snowflake: keep as-is
21695 Ok(Expression::Function(f))
21696 } else {
21697 let mut args = f.args;
21698 let low = args.remove(0);
21699 let high = args.remove(0);
21700 let random = if !args.is_empty() {
21701 let seed = args.remove(0);
21702 Expression::Function(Box::new(Function::new(
21703 "RANDOM".to_string(),
21704 vec![seed],
21705 )))
21706 } else {
21707 Expression::Function(Box::new(Function::new(
21708 "RANDOM".to_string(),
21709 vec![],
21710 )))
21711 };
21712 Ok(Expression::Function(Box::new(Function::new(
21713 "UNIFORM".to_string(),
21714 vec![low, high, random],
21715 ))))
21716 }
21717 }
21718 // TO_UTC_TIMESTAMP(ts, tz) -> target-specific UTC conversion
21719 "TO_UTC_TIMESTAMP" if f.args.len() == 2 => {
21720 let mut args = f.args;
21721 let ts_arg = args.remove(0);
21722 let tz_arg = args.remove(0);
21723 // Cast string literal to TIMESTAMP for all targets
21724 let ts_cast = if matches!(&ts_arg, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)))
21725 {
21726 Expression::Cast(Box::new(Cast {
21727 this: ts_arg,
21728 to: DataType::Timestamp {
21729 timezone: false,
21730 precision: None,
21731 },
21732 trailing_comments: vec![],
21733 double_colon_syntax: false,
21734 format: None,
21735 default: None,
21736 inferred_type: None,
21737 }))
21738 } else {
21739 ts_arg
21740 };
21741 match target {
21742 DialectType::Spark | DialectType::Databricks => {
21743 Ok(Expression::Function(Box::new(Function::new(
21744 "TO_UTC_TIMESTAMP".to_string(),
21745 vec![ts_cast, tz_arg],
21746 ))))
21747 }
21748 DialectType::Snowflake => {
21749 // CONVERT_TIMEZONE(tz, 'UTC', CAST(ts AS TIMESTAMP))
21750 Ok(Expression::Function(Box::new(Function::new(
21751 "CONVERT_TIMEZONE".to_string(),
21752 vec![tz_arg, Expression::string("UTC"), ts_cast],
21753 ))))
21754 }
21755 DialectType::Presto
21756 | DialectType::Trino
21757 | DialectType::Athena => {
21758 // WITH_TIMEZONE(CAST(ts AS TIMESTAMP), tz) AT TIME ZONE 'UTC'
21759 let wtz = Expression::Function(Box::new(Function::new(
21760 "WITH_TIMEZONE".to_string(),
21761 vec![ts_cast, tz_arg],
21762 )));
21763 Ok(Expression::AtTimeZone(Box::new(
21764 crate::expressions::AtTimeZone {
21765 this: wtz,
21766 zone: Expression::string("UTC"),
21767 },
21768 )))
21769 }
21770 DialectType::BigQuery => {
21771 // DATETIME(TIMESTAMP(CAST(ts AS DATETIME), tz), 'UTC')
21772 let cast_dt = Expression::Cast(Box::new(Cast {
21773 this: if let Expression::Cast(c) = ts_cast {
21774 c.this
21775 } else {
21776 ts_cast.clone()
21777 },
21778 to: DataType::Custom {
21779 name: "DATETIME".to_string(),
21780 },
21781 trailing_comments: vec![],
21782 double_colon_syntax: false,
21783 format: None,
21784 default: None,
21785 inferred_type: None,
21786 }));
21787 let ts_func =
21788 Expression::Function(Box::new(Function::new(
21789 "TIMESTAMP".to_string(),
21790 vec![cast_dt, tz_arg],
21791 )));
21792 Ok(Expression::Function(Box::new(Function::new(
21793 "DATETIME".to_string(),
21794 vec![ts_func, Expression::string("UTC")],
21795 ))))
21796 }
21797 _ => {
21798 // DuckDB, PostgreSQL, Redshift: CAST(ts AS TIMESTAMP) AT TIME ZONE tz AT TIME ZONE 'UTC'
21799 let atz1 = Expression::AtTimeZone(Box::new(
21800 crate::expressions::AtTimeZone {
21801 this: ts_cast,
21802 zone: tz_arg,
21803 },
21804 ));
21805 Ok(Expression::AtTimeZone(Box::new(
21806 crate::expressions::AtTimeZone {
21807 this: atz1,
21808 zone: Expression::string("UTC"),
21809 },
21810 )))
21811 }
21812 }
21813 }
21814 // FROM_UTC_TIMESTAMP(ts, tz) -> target-specific UTC conversion
21815 "FROM_UTC_TIMESTAMP" if f.args.len() == 2 => {
21816 let mut args = f.args;
21817 let ts_arg = args.remove(0);
21818 let tz_arg = args.remove(0);
21819 // Cast string literal to TIMESTAMP
21820 let ts_cast = if matches!(&ts_arg, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)))
21821 {
21822 Expression::Cast(Box::new(Cast {
21823 this: ts_arg,
21824 to: DataType::Timestamp {
21825 timezone: false,
21826 precision: None,
21827 },
21828 trailing_comments: vec![],
21829 double_colon_syntax: false,
21830 format: None,
21831 default: None,
21832 inferred_type: None,
21833 }))
21834 } else {
21835 ts_arg
21836 };
21837 match target {
21838 DialectType::Spark | DialectType::Databricks => {
21839 Ok(Expression::Function(Box::new(Function::new(
21840 "FROM_UTC_TIMESTAMP".to_string(),
21841 vec![ts_cast, tz_arg],
21842 ))))
21843 }
21844 DialectType::Presto
21845 | DialectType::Trino
21846 | DialectType::Athena => {
21847 // AT_TIMEZONE(CAST(ts AS TIMESTAMP), tz)
21848 Ok(Expression::Function(Box::new(Function::new(
21849 "AT_TIMEZONE".to_string(),
21850 vec![ts_cast, tz_arg],
21851 ))))
21852 }
21853 DialectType::Snowflake => {
21854 // CONVERT_TIMEZONE('UTC', tz, CAST(ts AS TIMESTAMP))
21855 Ok(Expression::Function(Box::new(Function::new(
21856 "CONVERT_TIMEZONE".to_string(),
21857 vec![Expression::string("UTC"), tz_arg, ts_cast],
21858 ))))
21859 }
21860 _ => {
21861 // DuckDB, PostgreSQL, Redshift: CAST(ts AS TIMESTAMP) AT TIME ZONE tz
21862 Ok(Expression::AtTimeZone(Box::new(
21863 crate::expressions::AtTimeZone {
21864 this: ts_cast,
21865 zone: tz_arg,
21866 },
21867 )))
21868 }
21869 }
21870 }
21871 // MAP_FROM_ARRAYS(keys, values) -> target-specific map construction
21872 "MAP_FROM_ARRAYS" if f.args.len() == 2 => {
21873 let name = match target {
21874 DialectType::Snowflake => "OBJECT_CONSTRUCT",
21875 _ => "MAP",
21876 };
21877 Ok(Expression::Function(Box::new(Function::new(
21878 name.to_string(),
21879 f.args,
21880 ))))
21881 }
21882 // STR_TO_MAP(s, pair_delim, kv_delim) -> SPLIT_TO_MAP for Presto
21883 "STR_TO_MAP" if f.args.len() >= 1 => match target {
21884 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
21885 Ok(Expression::Function(Box::new(Function::new(
21886 "SPLIT_TO_MAP".to_string(),
21887 f.args,
21888 ))))
21889 }
21890 _ => Ok(Expression::Function(f)),
21891 },
21892 // TIME_TO_STR(x, fmt) -> Expression::TimeToStr for proper generation
21893 "TIME_TO_STR" if f.args.len() == 2 => {
21894 let mut args = f.args;
21895 let this = args.remove(0);
21896 let fmt_expr = args.remove(0);
21897 let format = if let Expression::Literal(lit) = fmt_expr {
21898 if let Literal::String(s) = lit.as_ref() {
21899 s.clone()
21900 } else {
21901 String::new()
21902 }
21903 } else {
21904 "%Y-%m-%d %H:%M:%S".to_string()
21905 };
21906 Ok(Expression::TimeToStr(Box::new(
21907 crate::expressions::TimeToStr {
21908 this: Box::new(this),
21909 format,
21910 culture: None,
21911 zone: None,
21912 },
21913 )))
21914 }
21915 // STR_TO_TIME(x, fmt) -> Expression::StrToTime for proper generation
21916 "STR_TO_TIME" if f.args.len() == 2 => {
21917 let mut args = f.args;
21918 let this = args.remove(0);
21919 let fmt_expr = args.remove(0);
21920 let format = if let Expression::Literal(lit) = fmt_expr {
21921 if let Literal::String(s) = lit.as_ref() {
21922 s.clone()
21923 } else {
21924 String::new()
21925 }
21926 } else {
21927 "%Y-%m-%d %H:%M:%S".to_string()
21928 };
21929 Ok(Expression::StrToTime(Box::new(
21930 crate::expressions::StrToTime {
21931 this: Box::new(this),
21932 format,
21933 zone: None,
21934 safe: None,
21935 target_type: None,
21936 },
21937 )))
21938 }
21939 // STR_TO_UNIX(x, fmt) -> Expression::StrToUnix for proper generation
21940 "STR_TO_UNIX" if f.args.len() >= 1 => {
21941 let mut args = f.args;
21942 let this = args.remove(0);
21943 let format = if !args.is_empty() {
21944 if let Expression::Literal(lit) = args.remove(0) {
21945 if let Literal::String(s) = lit.as_ref() {
21946 Some(s.clone())
21947 } else {
21948 None
21949 }
21950 } else {
21951 None
21952 }
21953 } else {
21954 None
21955 };
21956 Ok(Expression::StrToUnix(Box::new(
21957 crate::expressions::StrToUnix {
21958 this: Some(Box::new(this)),
21959 format,
21960 },
21961 )))
21962 }
21963 // TIME_TO_UNIX(x) -> Expression::TimeToUnix for proper generation
21964 "TIME_TO_UNIX" if f.args.len() == 1 => {
21965 let mut args = f.args;
21966 let this = args.remove(0);
21967 Ok(Expression::TimeToUnix(Box::new(
21968 crate::expressions::UnaryFunc {
21969 this,
21970 original_name: None,
21971 inferred_type: None,
21972 },
21973 )))
21974 }
21975 // UNIX_TO_STR(x, fmt) -> Expression::UnixToStr for proper generation
21976 "UNIX_TO_STR" if f.args.len() >= 1 => {
21977 let mut args = f.args;
21978 let this = args.remove(0);
21979 let format = if !args.is_empty() {
21980 if let Expression::Literal(lit) = args.remove(0) {
21981 if let Literal::String(s) = lit.as_ref() {
21982 Some(s.clone())
21983 } else {
21984 None
21985 }
21986 } else {
21987 None
21988 }
21989 } else {
21990 None
21991 };
21992 Ok(Expression::UnixToStr(Box::new(
21993 crate::expressions::UnixToStr {
21994 this: Box::new(this),
21995 format,
21996 },
21997 )))
21998 }
21999 // UNIX_TO_TIME(x) -> Expression::UnixToTime for proper generation
22000 "UNIX_TO_TIME" if f.args.len() == 1 => {
22001 let mut args = f.args;
22002 let this = args.remove(0);
22003 Ok(Expression::UnixToTime(Box::new(
22004 crate::expressions::UnixToTime {
22005 this: Box::new(this),
22006 scale: None,
22007 zone: None,
22008 hours: None,
22009 minutes: None,
22010 format: None,
22011 target_type: None,
22012 },
22013 )))
22014 }
22015 // TIME_STR_TO_DATE(x) -> Expression::TimeStrToDate for proper generation
22016 "TIME_STR_TO_DATE" if f.args.len() == 1 => {
22017 let mut args = f.args;
22018 let this = args.remove(0);
22019 Ok(Expression::TimeStrToDate(Box::new(
22020 crate::expressions::UnaryFunc {
22021 this,
22022 original_name: None,
22023 inferred_type: None,
22024 },
22025 )))
22026 }
22027 // TIME_STR_TO_TIME(x) -> Expression::TimeStrToTime for proper generation
22028 "TIME_STR_TO_TIME" if f.args.len() == 1 => {
22029 let mut args = f.args;
22030 let this = args.remove(0);
22031 Ok(Expression::TimeStrToTime(Box::new(
22032 crate::expressions::TimeStrToTime {
22033 this: Box::new(this),
22034 zone: None,
22035 },
22036 )))
22037 }
22038 // MONTHS_BETWEEN(end, start) -> DuckDB complex expansion
22039 "MONTHS_BETWEEN" if f.args.len() == 2 => {
22040 match target {
22041 DialectType::DuckDB => {
22042 let mut args = f.args;
22043 let end_date = args.remove(0);
22044 let start_date = args.remove(0);
22045 let cast_end = Self::ensure_cast_date(end_date);
22046 let cast_start = Self::ensure_cast_date(start_date);
22047 // DATE_DIFF('MONTH', start, end) + CASE WHEN DAY(end) = DAY(LAST_DAY(end)) AND DAY(start) = DAY(LAST_DAY(start)) THEN 0 ELSE (DAY(end) - DAY(start)) / 31.0 END
22048 let dd = Expression::Function(Box::new(Function::new(
22049 "DATE_DIFF".to_string(),
22050 vec![
22051 Expression::string("MONTH"),
22052 cast_start.clone(),
22053 cast_end.clone(),
22054 ],
22055 )));
22056 let day_end =
22057 Expression::Function(Box::new(Function::new(
22058 "DAY".to_string(),
22059 vec![cast_end.clone()],
22060 )));
22061 let day_start =
22062 Expression::Function(Box::new(Function::new(
22063 "DAY".to_string(),
22064 vec![cast_start.clone()],
22065 )));
22066 let last_day_end =
22067 Expression::Function(Box::new(Function::new(
22068 "LAST_DAY".to_string(),
22069 vec![cast_end.clone()],
22070 )));
22071 let last_day_start =
22072 Expression::Function(Box::new(Function::new(
22073 "LAST_DAY".to_string(),
22074 vec![cast_start.clone()],
22075 )));
22076 let day_last_end = Expression::Function(Box::new(
22077 Function::new("DAY".to_string(), vec![last_day_end]),
22078 ));
22079 let day_last_start = Expression::Function(Box::new(
22080 Function::new("DAY".to_string(), vec![last_day_start]),
22081 ));
22082 let cond1 = Expression::Eq(Box::new(BinaryOp::new(
22083 day_end.clone(),
22084 day_last_end,
22085 )));
22086 let cond2 = Expression::Eq(Box::new(BinaryOp::new(
22087 day_start.clone(),
22088 day_last_start,
22089 )));
22090 let both_cond =
22091 Expression::And(Box::new(BinaryOp::new(cond1, cond2)));
22092 let day_diff = Expression::Sub(Box::new(BinaryOp::new(
22093 day_end, day_start,
22094 )));
22095 let day_diff_paren = Expression::Paren(Box::new(
22096 crate::expressions::Paren {
22097 this: day_diff,
22098 trailing_comments: Vec::new(),
22099 },
22100 ));
22101 let frac = Expression::Div(Box::new(BinaryOp::new(
22102 day_diff_paren,
22103 Expression::Literal(Box::new(Literal::Number(
22104 "31.0".to_string(),
22105 ))),
22106 )));
22107 let case_expr = Expression::Case(Box::new(Case {
22108 operand: None,
22109 whens: vec![(both_cond, Expression::number(0))],
22110 else_: Some(frac),
22111 comments: Vec::new(),
22112 inferred_type: None,
22113 }));
22114 Ok(Expression::Add(Box::new(BinaryOp::new(dd, case_expr))))
22115 }
22116 DialectType::Snowflake | DialectType::Redshift => {
22117 let mut args = f.args;
22118 let end_date = args.remove(0);
22119 let start_date = args.remove(0);
22120 let unit = Expression::Identifier(Identifier::new("MONTH"));
22121 Ok(Expression::Function(Box::new(Function::new(
22122 "DATEDIFF".to_string(),
22123 vec![unit, start_date, end_date],
22124 ))))
22125 }
22126 DialectType::Presto
22127 | DialectType::Trino
22128 | DialectType::Athena => {
22129 let mut args = f.args;
22130 let end_date = args.remove(0);
22131 let start_date = args.remove(0);
22132 Ok(Expression::Function(Box::new(Function::new(
22133 "DATE_DIFF".to_string(),
22134 vec![Expression::string("MONTH"), start_date, end_date],
22135 ))))
22136 }
22137 _ => Ok(Expression::Function(f)),
22138 }
22139 }
22140 // MONTHS_BETWEEN(end, start, roundOff) - 3-arg form (Spark-specific)
22141 // Drop the roundOff arg for non-Spark targets, keep it for Spark
22142 "MONTHS_BETWEEN" if f.args.len() == 3 => {
22143 match target {
22144 DialectType::Spark | DialectType::Databricks => {
22145 Ok(Expression::Function(f))
22146 }
22147 _ => {
22148 // Drop the 3rd arg and delegate to the 2-arg logic
22149 let mut args = f.args;
22150 let end_date = args.remove(0);
22151 let start_date = args.remove(0);
22152 // Re-create as 2-arg and process
22153 let f2 = Function::new(
22154 "MONTHS_BETWEEN".to_string(),
22155 vec![end_date, start_date],
22156 );
22157 let e2 = Expression::Function(Box::new(f2));
22158 Self::cross_dialect_normalize(e2, source, target)
22159 }
22160 }
22161 }
22162 // TO_TIMESTAMP(x) with 1 arg -> CAST(x AS TIMESTAMP) for most targets
22163 "TO_TIMESTAMP"
22164 if f.args.len() == 1
22165 && matches!(
22166 source,
22167 DialectType::Spark
22168 | DialectType::Databricks
22169 | DialectType::Hive
22170 ) =>
22171 {
22172 let arg = f.args.into_iter().next().unwrap();
22173 Ok(Expression::Cast(Box::new(Cast {
22174 this: arg,
22175 to: DataType::Timestamp {
22176 timezone: false,
22177 precision: None,
22178 },
22179 trailing_comments: vec![],
22180 double_colon_syntax: false,
22181 format: None,
22182 default: None,
22183 inferred_type: None,
22184 })))
22185 }
22186 // STRING(x) -> CAST(x AS STRING) for Spark target
22187 "STRING"
22188 if f.args.len() == 1
22189 && matches!(
22190 source,
22191 DialectType::Spark | DialectType::Databricks
22192 ) =>
22193 {
22194 let arg = f.args.into_iter().next().unwrap();
22195 let dt = match target {
22196 DialectType::Spark
22197 | DialectType::Databricks
22198 | DialectType::Hive => DataType::Custom {
22199 name: "STRING".to_string(),
22200 },
22201 _ => DataType::Text,
22202 };
22203 Ok(Expression::Cast(Box::new(Cast {
22204 this: arg,
22205 to: dt,
22206 trailing_comments: vec![],
22207 double_colon_syntax: false,
22208 format: None,
22209 default: None,
22210 inferred_type: None,
22211 })))
22212 }
22213 // LOGICAL_OR(x) -> BOOL_OR(x) for Spark target
22214 "LOGICAL_OR" if f.args.len() == 1 => {
22215 let name = match target {
22216 DialectType::Spark | DialectType::Databricks => "BOOL_OR",
22217 _ => "LOGICAL_OR",
22218 };
22219 Ok(Expression::Function(Box::new(Function::new(
22220 name.to_string(),
22221 f.args,
22222 ))))
22223 }
22224 // SPLIT(x, pattern) from Spark -> STR_SPLIT_REGEX for DuckDB, REGEXP_SPLIT for Presto
22225 "SPLIT"
22226 if f.args.len() == 2
22227 && matches!(
22228 source,
22229 DialectType::Spark
22230 | DialectType::Databricks
22231 | DialectType::Hive
22232 ) =>
22233 {
22234 let name = match target {
22235 DialectType::DuckDB => "STR_SPLIT_REGEX",
22236 DialectType::Presto
22237 | DialectType::Trino
22238 | DialectType::Athena => "REGEXP_SPLIT",
22239 DialectType::Spark
22240 | DialectType::Databricks
22241 | DialectType::Hive => "SPLIT",
22242 _ => "SPLIT",
22243 };
22244 Ok(Expression::Function(Box::new(Function::new(
22245 name.to_string(),
22246 f.args,
22247 ))))
22248 }
22249 // TRY_ELEMENT_AT -> ELEMENT_AT for Presto, array[idx] for DuckDB
22250 "TRY_ELEMENT_AT" if f.args.len() == 2 => match target {
22251 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
22252 Ok(Expression::Function(Box::new(Function::new(
22253 "ELEMENT_AT".to_string(),
22254 f.args,
22255 ))))
22256 }
22257 DialectType::DuckDB => {
22258 let mut args = f.args;
22259 let arr = args.remove(0);
22260 let idx = args.remove(0);
22261 Ok(Expression::Subscript(Box::new(
22262 crate::expressions::Subscript {
22263 this: arr,
22264 index: idx,
22265 },
22266 )))
22267 }
22268 _ => Ok(Expression::Function(f)),
22269 },
22270 // ARRAY_FILTER(arr, lambda) -> FILTER for Hive/Spark/Presto, LIST_FILTER for DuckDB
22271 "ARRAY_FILTER" if f.args.len() == 2 => {
22272 let name = match target {
22273 DialectType::DuckDB => "LIST_FILTER",
22274 DialectType::StarRocks => "ARRAY_FILTER",
22275 _ => "FILTER",
22276 };
22277 Ok(Expression::Function(Box::new(Function::new(
22278 name.to_string(),
22279 f.args,
22280 ))))
22281 }
22282 // FILTER(arr, lambda) -> ARRAY_FILTER for StarRocks, LIST_FILTER for DuckDB
22283 "FILTER" if f.args.len() == 2 => {
22284 let name = match target {
22285 DialectType::DuckDB => "LIST_FILTER",
22286 DialectType::StarRocks => "ARRAY_FILTER",
22287 _ => "FILTER",
22288 };
22289 Ok(Expression::Function(Box::new(Function::new(
22290 name.to_string(),
22291 f.args,
22292 ))))
22293 }
22294 // REDUCE(arr, init, lambda1, lambda2) -> AGGREGATE for Spark
22295 "REDUCE" if f.args.len() >= 3 => {
22296 let name = match target {
22297 DialectType::Spark | DialectType::Databricks => "AGGREGATE",
22298 _ => "REDUCE",
22299 };
22300 Ok(Expression::Function(Box::new(Function::new(
22301 name.to_string(),
22302 f.args,
22303 ))))
22304 }
22305 // CURRENT_SCHEMA() -> dialect-specific
22306 "CURRENT_SCHEMA" => {
22307 match target {
22308 DialectType::PostgreSQL => {
22309 // PostgreSQL: CURRENT_SCHEMA (no parens)
22310 Ok(Expression::Function(Box::new(Function {
22311 name: "CURRENT_SCHEMA".to_string(),
22312 args: vec![],
22313 distinct: false,
22314 trailing_comments: vec![],
22315 use_bracket_syntax: false,
22316 no_parens: true,
22317 quoted: false,
22318 span: None,
22319 inferred_type: None,
22320 })))
22321 }
22322 DialectType::MySQL
22323 | DialectType::Doris
22324 | DialectType::StarRocks => Ok(Expression::Function(Box::new(
22325 Function::new("SCHEMA".to_string(), vec![]),
22326 ))),
22327 DialectType::TSQL => Ok(Expression::Function(Box::new(
22328 Function::new("SCHEMA_NAME".to_string(), vec![]),
22329 ))),
22330 DialectType::SQLite => Ok(Expression::Literal(Box::new(
22331 Literal::String("main".to_string()),
22332 ))),
22333 _ => Ok(Expression::Function(f)),
22334 }
22335 }
22336 // LTRIM(str, chars) 2-arg -> TRIM(LEADING chars FROM str) for Spark/Hive/Databricks/ClickHouse
22337 "LTRIM" if f.args.len() == 2 => match target {
22338 DialectType::Spark
22339 | DialectType::Hive
22340 | DialectType::Databricks
22341 | DialectType::ClickHouse => {
22342 let mut args = f.args;
22343 let str_expr = args.remove(0);
22344 let chars = args.remove(0);
22345 Ok(Expression::Trim(Box::new(crate::expressions::TrimFunc {
22346 this: str_expr,
22347 characters: Some(chars),
22348 position: crate::expressions::TrimPosition::Leading,
22349 sql_standard_syntax: true,
22350 position_explicit: true,
22351 })))
22352 }
22353 _ => Ok(Expression::Function(f)),
22354 },
22355 // RTRIM(str, chars) 2-arg -> TRIM(TRAILING chars FROM str) for Spark/Hive/Databricks/ClickHouse
22356 "RTRIM" if f.args.len() == 2 => match target {
22357 DialectType::Spark
22358 | DialectType::Hive
22359 | DialectType::Databricks
22360 | DialectType::ClickHouse => {
22361 let mut args = f.args;
22362 let str_expr = args.remove(0);
22363 let chars = args.remove(0);
22364 Ok(Expression::Trim(Box::new(crate::expressions::TrimFunc {
22365 this: str_expr,
22366 characters: Some(chars),
22367 position: crate::expressions::TrimPosition::Trailing,
22368 sql_standard_syntax: true,
22369 position_explicit: true,
22370 })))
22371 }
22372 _ => Ok(Expression::Function(f)),
22373 },
22374 // ARRAY_REVERSE(x) -> arrayReverse(x) for ClickHouse
22375 "ARRAY_REVERSE" if f.args.len() == 1 => match target {
22376 DialectType::ClickHouse => {
22377 let mut new_f = *f;
22378 new_f.name = "arrayReverse".to_string();
22379 Ok(Expression::Function(Box::new(new_f)))
22380 }
22381 _ => Ok(Expression::Function(f)),
22382 },
22383 // UUID() -> NEWID() for TSQL
22384 "UUID" if f.args.is_empty() => match target {
22385 DialectType::TSQL | DialectType::Fabric => {
22386 Ok(Expression::Function(Box::new(Function::new(
22387 "NEWID".to_string(),
22388 vec![],
22389 ))))
22390 }
22391 _ => Ok(Expression::Function(f)),
22392 },
22393 // FARM_FINGERPRINT(x) -> farmFingerprint64(x) for ClickHouse, FARMFINGERPRINT64(x) for Redshift
22394 "FARM_FINGERPRINT" if f.args.len() == 1 => match target {
22395 DialectType::ClickHouse => {
22396 let mut new_f = *f;
22397 new_f.name = "farmFingerprint64".to_string();
22398 Ok(Expression::Function(Box::new(new_f)))
22399 }
22400 DialectType::Redshift => {
22401 let mut new_f = *f;
22402 new_f.name = "FARMFINGERPRINT64".to_string();
22403 Ok(Expression::Function(Box::new(new_f)))
22404 }
22405 _ => Ok(Expression::Function(f)),
22406 },
22407 // JSON_KEYS(x) -> JSON_OBJECT_KEYS(x) for Databricks/Spark, OBJECT_KEYS(x) for Snowflake
22408 "JSON_KEYS" => match target {
22409 DialectType::Databricks | DialectType::Spark => {
22410 let mut new_f = *f;
22411 new_f.name = "JSON_OBJECT_KEYS".to_string();
22412 Ok(Expression::Function(Box::new(new_f)))
22413 }
22414 DialectType::Snowflake => {
22415 let mut new_f = *f;
22416 new_f.name = "OBJECT_KEYS".to_string();
22417 Ok(Expression::Function(Box::new(new_f)))
22418 }
22419 _ => Ok(Expression::Function(f)),
22420 },
22421 // WEEKOFYEAR(x) -> WEEKISO(x) for Snowflake
22422 "WEEKOFYEAR" => match target {
22423 DialectType::Snowflake => {
22424 let mut new_f = *f;
22425 new_f.name = "WEEKISO".to_string();
22426 Ok(Expression::Function(Box::new(new_f)))
22427 }
22428 _ => Ok(Expression::Function(f)),
22429 },
22430 // FORMAT(fmt, args...) -> FORMAT_STRING(fmt, args...) for Databricks
22431 "FORMAT"
22432 if f.args.len() >= 2 && matches!(source, DialectType::Generic) =>
22433 {
22434 match target {
22435 DialectType::Databricks | DialectType::Spark => {
22436 let mut new_f = *f;
22437 new_f.name = "FORMAT_STRING".to_string();
22438 Ok(Expression::Function(Box::new(new_f)))
22439 }
22440 _ => Ok(Expression::Function(f)),
22441 }
22442 }
22443 // CONCAT_WS from Generic is null-propagating in SQLGlot fixtures.
22444 // Trino also requires non-separator arguments cast to VARCHAR.
22445 "CONCAT_WS" if f.args.len() >= 2 => {
22446 fn concat_ws_null_case(
22447 args: Vec<Expression>,
22448 else_expr: Expression,
22449 ) -> Expression {
22450 let mut null_checks = args.iter().cloned().map(|arg| {
22451 Expression::IsNull(Box::new(crate::expressions::IsNull {
22452 this: arg,
22453 not: false,
22454 postfix_form: false,
22455 }))
22456 });
22457 let first_null_check = null_checks
22458 .next()
22459 .expect("CONCAT_WS with >= 2 args must yield a null check");
22460 let null_check =
22461 null_checks.fold(first_null_check, |left, right| {
22462 Expression::Or(Box::new(BinaryOp {
22463 left,
22464 right,
22465 left_comments: Vec::new(),
22466 operator_comments: Vec::new(),
22467 trailing_comments: Vec::new(),
22468 inferred_type: None,
22469 }))
22470 });
22471 Expression::Case(Box::new(Case {
22472 operand: None,
22473 whens: vec![(null_check, Expression::Null(Null))],
22474 else_: Some(else_expr),
22475 comments: vec![],
22476 inferred_type: None,
22477 }))
22478 }
22479
22480 match target {
22481 DialectType::Trino
22482 if matches!(source, DialectType::Generic) =>
22483 {
22484 let original_args = f.args.clone();
22485 let mut args = f.args;
22486 let sep = args.remove(0);
22487 let cast_args: Vec<Expression> = args
22488 .into_iter()
22489 .map(|a| {
22490 Expression::Cast(Box::new(Cast {
22491 this: a,
22492 to: DataType::VarChar {
22493 length: None,
22494 parenthesized_length: false,
22495 },
22496 double_colon_syntax: false,
22497 trailing_comments: Vec::new(),
22498 format: None,
22499 default: None,
22500 inferred_type: None,
22501 }))
22502 })
22503 .collect();
22504 let mut new_args = vec![sep];
22505 new_args.extend(cast_args);
22506 let else_expr = Expression::Function(Box::new(
22507 Function::new("CONCAT_WS".to_string(), new_args),
22508 ));
22509 Ok(concat_ws_null_case(original_args, else_expr))
22510 }
22511 DialectType::Presto
22512 | DialectType::Trino
22513 | DialectType::Athena => {
22514 let mut args = f.args;
22515 let sep = args.remove(0);
22516 let cast_args: Vec<Expression> = args
22517 .into_iter()
22518 .map(|a| {
22519 Expression::Cast(Box::new(Cast {
22520 this: a,
22521 to: DataType::VarChar {
22522 length: None,
22523 parenthesized_length: false,
22524 },
22525 double_colon_syntax: false,
22526 trailing_comments: Vec::new(),
22527 format: None,
22528 default: None,
22529 inferred_type: None,
22530 }))
22531 })
22532 .collect();
22533 let mut new_args = vec![sep];
22534 new_args.extend(cast_args);
22535 Ok(Expression::Function(Box::new(Function::new(
22536 "CONCAT_WS".to_string(),
22537 new_args,
22538 ))))
22539 }
22540 DialectType::Spark
22541 | DialectType::Hive
22542 | DialectType::DuckDB
22543 if matches!(source, DialectType::Generic) =>
22544 {
22545 let args = f.args;
22546 let else_expr = Expression::Function(Box::new(
22547 Function::new("CONCAT_WS".to_string(), args.clone()),
22548 ));
22549 Ok(concat_ws_null_case(args, else_expr))
22550 }
22551 _ => Ok(Expression::Function(f)),
22552 }
22553 }
22554 // ARRAY_SLICE(x, start, end) -> SLICE(x, start, end) for Presto/Trino/Databricks, arraySlice for ClickHouse
22555 "ARRAY_SLICE" if f.args.len() >= 2 => match target {
22556 DialectType::DuckDB
22557 if f.args.len() == 3
22558 && matches!(source, DialectType::Snowflake) =>
22559 {
22560 // Snowflake ARRAY_SLICE (0-indexed, exclusive end)
22561 // -> DuckDB ARRAY_SLICE (1-indexed, inclusive end)
22562 let mut args = f.args;
22563 let arr = args.remove(0);
22564 let start = args.remove(0);
22565 let end = args.remove(0);
22566
22567 // CASE WHEN start >= 0 THEN start + 1 ELSE start END
22568 let adjusted_start = Expression::Case(Box::new(Case {
22569 operand: None,
22570 whens: vec![(
22571 Expression::Gte(Box::new(BinaryOp {
22572 left: start.clone(),
22573 right: Expression::number(0),
22574 left_comments: vec![],
22575 operator_comments: vec![],
22576 trailing_comments: vec![],
22577 inferred_type: None,
22578 })),
22579 Expression::Add(Box::new(BinaryOp {
22580 left: start.clone(),
22581 right: Expression::number(1),
22582 left_comments: vec![],
22583 operator_comments: vec![],
22584 trailing_comments: vec![],
22585 inferred_type: None,
22586 })),
22587 )],
22588 else_: Some(start),
22589 comments: vec![],
22590 inferred_type: None,
22591 }));
22592
22593 // CASE WHEN end < 0 THEN end - 1 ELSE end END
22594 let adjusted_end = Expression::Case(Box::new(Case {
22595 operand: None,
22596 whens: vec![(
22597 Expression::Lt(Box::new(BinaryOp {
22598 left: end.clone(),
22599 right: Expression::number(0),
22600 left_comments: vec![],
22601 operator_comments: vec![],
22602 trailing_comments: vec![],
22603 inferred_type: None,
22604 })),
22605 Expression::Sub(Box::new(BinaryOp {
22606 left: end.clone(),
22607 right: Expression::number(1),
22608 left_comments: vec![],
22609 operator_comments: vec![],
22610 trailing_comments: vec![],
22611 inferred_type: None,
22612 })),
22613 )],
22614 else_: Some(end),
22615 comments: vec![],
22616 inferred_type: None,
22617 }));
22618
22619 Ok(Expression::Function(Box::new(Function::new(
22620 "ARRAY_SLICE".to_string(),
22621 vec![arr, adjusted_start, adjusted_end],
22622 ))))
22623 }
22624 DialectType::Presto
22625 | DialectType::Trino
22626 | DialectType::Athena
22627 | DialectType::Databricks
22628 | DialectType::Spark => {
22629 let mut new_f = *f;
22630 new_f.name = "SLICE".to_string();
22631 Ok(Expression::Function(Box::new(new_f)))
22632 }
22633 DialectType::ClickHouse => {
22634 let mut new_f = *f;
22635 new_f.name = "arraySlice".to_string();
22636 Ok(Expression::Function(Box::new(new_f)))
22637 }
22638 _ => Ok(Expression::Function(f)),
22639 },
22640 // ARRAY_PREPEND(arr, x) -> LIST_PREPEND(x, arr) for DuckDB (swap args)
22641 "ARRAY_PREPEND" if f.args.len() == 2 => match target {
22642 DialectType::DuckDB => {
22643 let mut args = f.args;
22644 let arr = args.remove(0);
22645 let val = args.remove(0);
22646 Ok(Expression::Function(Box::new(Function::new(
22647 "LIST_PREPEND".to_string(),
22648 vec![val, arr],
22649 ))))
22650 }
22651 _ => Ok(Expression::Function(f)),
22652 },
22653 // ARRAY_REMOVE(arr, target) -> dialect-specific
22654 "ARRAY_REMOVE" if f.args.len() == 2 => {
22655 match target {
22656 DialectType::DuckDB => {
22657 let mut args = f.args;
22658 let arr = args.remove(0);
22659 let target_val = args.remove(0);
22660 let u_id = crate::expressions::Identifier::new("_u");
22661 // LIST_FILTER(arr, _u -> _u <> target)
22662 let lambda = Expression::Lambda(Box::new(
22663 crate::expressions::LambdaExpr {
22664 parameters: vec![u_id.clone()],
22665 body: Expression::Neq(Box::new(BinaryOp {
22666 left: Expression::Identifier(u_id),
22667 right: target_val,
22668 left_comments: Vec::new(),
22669 operator_comments: Vec::new(),
22670 trailing_comments: Vec::new(),
22671 inferred_type: None,
22672 })),
22673 colon: false,
22674 parameter_types: Vec::new(),
22675 },
22676 ));
22677 Ok(Expression::Function(Box::new(Function::new(
22678 "LIST_FILTER".to_string(),
22679 vec![arr, lambda],
22680 ))))
22681 }
22682 DialectType::ClickHouse => {
22683 let mut args = f.args;
22684 let arr = args.remove(0);
22685 let target_val = args.remove(0);
22686 let u_id = crate::expressions::Identifier::new("_u");
22687 // arrayFilter(_u -> _u <> target, arr)
22688 let lambda = Expression::Lambda(Box::new(
22689 crate::expressions::LambdaExpr {
22690 parameters: vec![u_id.clone()],
22691 body: Expression::Neq(Box::new(BinaryOp {
22692 left: Expression::Identifier(u_id),
22693 right: target_val,
22694 left_comments: Vec::new(),
22695 operator_comments: Vec::new(),
22696 trailing_comments: Vec::new(),
22697 inferred_type: None,
22698 })),
22699 colon: false,
22700 parameter_types: Vec::new(),
22701 },
22702 ));
22703 Ok(Expression::Function(Box::new(Function::new(
22704 "arrayFilter".to_string(),
22705 vec![lambda, arr],
22706 ))))
22707 }
22708 DialectType::BigQuery => {
22709 // ARRAY(SELECT _u FROM UNNEST(the_array) AS _u WHERE _u <> target)
22710 let mut args = f.args;
22711 let arr = args.remove(0);
22712 let target_val = args.remove(0);
22713 let u_id = crate::expressions::Identifier::new("_u");
22714 let u_col = Expression::Column(Box::new(
22715 crate::expressions::Column {
22716 name: u_id.clone(),
22717 table: None,
22718 join_mark: false,
22719 trailing_comments: Vec::new(),
22720 span: None,
22721 inferred_type: None,
22722 },
22723 ));
22724 // UNNEST(the_array) AS _u
22725 let unnest_expr = Expression::Unnest(Box::new(
22726 crate::expressions::UnnestFunc {
22727 this: arr,
22728 expressions: Vec::new(),
22729 with_ordinality: false,
22730 alias: None,
22731 offset_alias: None,
22732 },
22733 ));
22734 let aliased_unnest = Expression::Alias(Box::new(
22735 crate::expressions::Alias {
22736 this: unnest_expr,
22737 alias: u_id.clone(),
22738 column_aliases: Vec::new(),
22739 alias_explicit_as: false,
22740 alias_keyword: None,
22741 pre_alias_comments: Vec::new(),
22742 trailing_comments: Vec::new(),
22743 inferred_type: None,
22744 },
22745 ));
22746 // _u <> target
22747 let where_cond = Expression::Neq(Box::new(BinaryOp {
22748 left: u_col.clone(),
22749 right: target_val,
22750 left_comments: Vec::new(),
22751 operator_comments: Vec::new(),
22752 trailing_comments: Vec::new(),
22753 inferred_type: None,
22754 }));
22755 // SELECT _u FROM UNNEST(the_array) AS _u WHERE _u <> target
22756 let subquery = Expression::Select(Box::new(
22757 crate::expressions::Select::new()
22758 .column(u_col)
22759 .from(aliased_unnest)
22760 .where_(where_cond),
22761 ));
22762 // ARRAY(subquery) -- use ArrayFunc with subquery as single element
22763 Ok(Expression::ArrayFunc(Box::new(
22764 crate::expressions::ArrayConstructor {
22765 expressions: vec![subquery],
22766 bracket_notation: false,
22767 use_list_keyword: false,
22768 },
22769 )))
22770 }
22771 _ => Ok(Expression::Function(f)),
22772 }
22773 }
22774 // PARSE_JSON(str) -> remove for SQLite/Doris (just use the string literal)
22775 "PARSE_JSON" if f.args.len() == 1 => {
22776 match target {
22777 DialectType::SQLite
22778 | DialectType::Doris
22779 | DialectType::MySQL
22780 | DialectType::StarRocks => {
22781 // Strip PARSE_JSON, return the inner argument
22782 Ok(f.args.into_iter().next().unwrap())
22783 }
22784 _ => Ok(Expression::Function(f)),
22785 }
22786 }
22787 // JSON_REMOVE(PARSE_JSON(str), path...) -> for SQLite strip PARSE_JSON
22788 // This is handled by PARSE_JSON stripping above; JSON_REMOVE is passed through
22789 "JSON_REMOVE" => Ok(Expression::Function(f)),
22790 // JSON_SET(PARSE_JSON(str), path, PARSE_JSON(val)) -> for SQLite strip PARSE_JSON
22791 // This is handled by PARSE_JSON stripping above; JSON_SET is passed through
22792 "JSON_SET" => Ok(Expression::Function(f)),
22793 // DECODE(x, search1, result1, ..., default) -> CASE WHEN
22794 // Behavior per search value type:
22795 // NULL literal -> CASE WHEN x IS NULL THEN result
22796 // Literal (number, string, bool) -> CASE WHEN x = literal THEN result
22797 // Non-literal (column, expr) -> CASE WHEN x = search OR (x IS NULL AND search IS NULL) THEN result
22798 "DECODE" if f.args.len() >= 3 => {
22799 // Keep as DECODE for targets that support it natively
22800 let keep_as_decode = matches!(
22801 target,
22802 DialectType::Oracle
22803 | DialectType::Snowflake
22804 | DialectType::Redshift
22805 | DialectType::Teradata
22806 | DialectType::Spark
22807 | DialectType::Databricks
22808 );
22809 if keep_as_decode {
22810 return Ok(Expression::Function(f));
22811 }
22812
22813 let mut args = f.args;
22814 let this_expr = args.remove(0);
22815 let mut pairs = Vec::new();
22816 let mut default = None;
22817 let mut i = 0;
22818 while i + 1 < args.len() {
22819 pairs.push((args[i].clone(), args[i + 1].clone()));
22820 i += 2;
22821 }
22822 if i < args.len() {
22823 default = Some(args[i].clone());
22824 }
22825 // Helper: check if expression is a literal value
22826 fn is_literal(e: &Expression) -> bool {
22827 matches!(
22828 e,
22829 Expression::Literal(_)
22830 | Expression::Boolean(_)
22831 | Expression::Neg(_)
22832 )
22833 }
22834 let whens: Vec<(Expression, Expression)> = pairs
22835 .into_iter()
22836 .map(|(search, result)| {
22837 if matches!(&search, Expression::Null(_)) {
22838 // NULL search -> IS NULL
22839 let condition = Expression::Is(Box::new(BinaryOp {
22840 left: this_expr.clone(),
22841 right: Expression::Null(crate::expressions::Null),
22842 left_comments: Vec::new(),
22843 operator_comments: Vec::new(),
22844 trailing_comments: Vec::new(),
22845 inferred_type: None,
22846 }));
22847 (condition, result)
22848 } else if is_literal(&search) {
22849 // Literal search -> simple equality
22850 let eq = Expression::Eq(Box::new(BinaryOp {
22851 left: this_expr.clone(),
22852 right: search,
22853 left_comments: Vec::new(),
22854 operator_comments: Vec::new(),
22855 trailing_comments: Vec::new(),
22856 inferred_type: None,
22857 }));
22858 (eq, result)
22859 } else {
22860 // Non-literal (column ref, expression) -> null-safe comparison
22861 let needs_paren = matches!(
22862 &search,
22863 Expression::Eq(_)
22864 | Expression::Neq(_)
22865 | Expression::Gt(_)
22866 | Expression::Gte(_)
22867 | Expression::Lt(_)
22868 | Expression::Lte(_)
22869 );
22870 let search_for_eq = if needs_paren {
22871 Expression::Paren(Box::new(
22872 crate::expressions::Paren {
22873 this: search.clone(),
22874 trailing_comments: Vec::new(),
22875 },
22876 ))
22877 } else {
22878 search.clone()
22879 };
22880 let eq = Expression::Eq(Box::new(BinaryOp {
22881 left: this_expr.clone(),
22882 right: search_for_eq,
22883 left_comments: Vec::new(),
22884 operator_comments: Vec::new(),
22885 trailing_comments: Vec::new(),
22886 inferred_type: None,
22887 }));
22888 let search_for_null = if needs_paren {
22889 Expression::Paren(Box::new(
22890 crate::expressions::Paren {
22891 this: search.clone(),
22892 trailing_comments: Vec::new(),
22893 },
22894 ))
22895 } else {
22896 search.clone()
22897 };
22898 let x_is_null = Expression::Is(Box::new(BinaryOp {
22899 left: this_expr.clone(),
22900 right: Expression::Null(crate::expressions::Null),
22901 left_comments: Vec::new(),
22902 operator_comments: Vec::new(),
22903 trailing_comments: Vec::new(),
22904 inferred_type: None,
22905 }));
22906 let s_is_null = Expression::Is(Box::new(BinaryOp {
22907 left: search_for_null,
22908 right: Expression::Null(crate::expressions::Null),
22909 left_comments: Vec::new(),
22910 operator_comments: Vec::new(),
22911 trailing_comments: Vec::new(),
22912 inferred_type: None,
22913 }));
22914 let both_null = Expression::And(Box::new(BinaryOp {
22915 left: x_is_null,
22916 right: s_is_null,
22917 left_comments: Vec::new(),
22918 operator_comments: Vec::new(),
22919 trailing_comments: Vec::new(),
22920 inferred_type: None,
22921 }));
22922 let condition = Expression::Or(Box::new(BinaryOp {
22923 left: eq,
22924 right: Expression::Paren(Box::new(
22925 crate::expressions::Paren {
22926 this: both_null,
22927 trailing_comments: Vec::new(),
22928 },
22929 )),
22930 left_comments: Vec::new(),
22931 operator_comments: Vec::new(),
22932 trailing_comments: Vec::new(),
22933 inferred_type: None,
22934 }));
22935 (condition, result)
22936 }
22937 })
22938 .collect();
22939 Ok(Expression::Case(Box::new(Case {
22940 operand: None,
22941 whens,
22942 else_: default,
22943 comments: Vec::new(),
22944 inferred_type: None,
22945 })))
22946 }
22947 // LEVENSHTEIN(a, b, ...) -> dialect-specific
22948 "LEVENSHTEIN" => {
22949 match target {
22950 DialectType::BigQuery => {
22951 let mut new_f = *f;
22952 new_f.name = "EDIT_DISTANCE".to_string();
22953 Ok(Expression::Function(Box::new(new_f)))
22954 }
22955 DialectType::Drill => {
22956 let mut new_f = *f;
22957 new_f.name = "LEVENSHTEIN_DISTANCE".to_string();
22958 Ok(Expression::Function(Box::new(new_f)))
22959 }
22960 DialectType::PostgreSQL if f.args.len() == 6 => {
22961 // PostgreSQL: LEVENSHTEIN(src, tgt, ins, del, sub, max_d) -> LEVENSHTEIN_LESS_EQUAL
22962 // 2 args: basic, 5 args: with costs, 6 args: with costs + max_distance
22963 let mut new_f = *f;
22964 new_f.name = "LEVENSHTEIN_LESS_EQUAL".to_string();
22965 Ok(Expression::Function(Box::new(new_f)))
22966 }
22967 _ => Ok(Expression::Function(f)),
22968 }
22969 }
22970 // ARRAY_MAX(x) -> arrayMax(x) for ClickHouse, LIST_MAX(x) for DuckDB
22971 "ARRAY_MAX" => {
22972 let name = match target {
22973 DialectType::ClickHouse => "arrayMax",
22974 DialectType::DuckDB => "LIST_MAX",
22975 _ => "ARRAY_MAX",
22976 };
22977 let mut new_f = *f;
22978 new_f.name = name.to_string();
22979 Ok(Expression::Function(Box::new(new_f)))
22980 }
22981 // ARRAY_MIN(x) -> arrayMin(x) for ClickHouse, LIST_MIN(x) for DuckDB
22982 "ARRAY_MIN" => {
22983 let name = match target {
22984 DialectType::ClickHouse => "arrayMin",
22985 DialectType::DuckDB => "LIST_MIN",
22986 _ => "ARRAY_MIN",
22987 };
22988 let mut new_f = *f;
22989 new_f.name = name.to_string();
22990 Ok(Expression::Function(Box::new(new_f)))
22991 }
22992 // JAROWINKLER_SIMILARITY(a, b) -> jaroWinklerSimilarity(UPPER(a), UPPER(b)) for ClickHouse
22993 // -> JARO_WINKLER_SIMILARITY(UPPER(a), UPPER(b)) for DuckDB
22994 "JAROWINKLER_SIMILARITY" if f.args.len() == 2 => {
22995 let mut args = f.args;
22996 let b = args.pop().unwrap();
22997 let a = args.pop().unwrap();
22998 match target {
22999 DialectType::ClickHouse => {
23000 let upper_a = Expression::Upper(Box::new(
23001 crate::expressions::UnaryFunc::new(a),
23002 ));
23003 let upper_b = Expression::Upper(Box::new(
23004 crate::expressions::UnaryFunc::new(b),
23005 ));
23006 Ok(Expression::Function(Box::new(Function::new(
23007 "jaroWinklerSimilarity".to_string(),
23008 vec![upper_a, upper_b],
23009 ))))
23010 }
23011 DialectType::DuckDB => {
23012 let upper_a = Expression::Upper(Box::new(
23013 crate::expressions::UnaryFunc::new(a),
23014 ));
23015 let upper_b = Expression::Upper(Box::new(
23016 crate::expressions::UnaryFunc::new(b),
23017 ));
23018 let score = Expression::Function(Box::new(Function::new(
23019 "JARO_WINKLER_SIMILARITY".to_string(),
23020 vec![upper_a, upper_b],
23021 )));
23022 let scaled = Expression::Mul(Box::new(BinaryOp {
23023 left: score,
23024 right: Expression::number(100),
23025 left_comments: Vec::new(),
23026 operator_comments: Vec::new(),
23027 trailing_comments: Vec::new(),
23028 inferred_type: None,
23029 }));
23030 Ok(Expression::Cast(Box::new(Cast {
23031 this: scaled,
23032 to: DataType::Int {
23033 length: None,
23034 integer_spelling: false,
23035 },
23036 trailing_comments: Vec::new(),
23037 double_colon_syntax: false,
23038 format: None,
23039 default: None,
23040 inferred_type: None,
23041 })))
23042 }
23043 _ => Ok(Expression::Function(Box::new(Function::new(
23044 "JAROWINKLER_SIMILARITY".to_string(),
23045 vec![a, b],
23046 )))),
23047 }
23048 }
23049 // CURRENT_SCHEMAS(x) -> CURRENT_SCHEMAS() for Snowflake (drop arg)
23050 "CURRENT_SCHEMAS" => match target {
23051 DialectType::Snowflake => Ok(Expression::Function(Box::new(
23052 Function::new("CURRENT_SCHEMAS".to_string(), vec![]),
23053 ))),
23054 _ => Ok(Expression::Function(f)),
23055 },
23056 // TRUNC/TRUNCATE (numeric) -> dialect-specific
23057 "TRUNC" | "TRUNCATE" if f.args.len() <= 2 => {
23058 match target {
23059 DialectType::TSQL | DialectType::Fabric => {
23060 // ROUND(x, decimals, 1) - the 1 flag means truncation
23061 let mut args = f.args;
23062 let this = if args.is_empty() {
23063 return Ok(Expression::Function(Box::new(
23064 Function::new("TRUNC".to_string(), args),
23065 )));
23066 } else {
23067 args.remove(0)
23068 };
23069 let decimals = if args.is_empty() {
23070 Expression::Literal(Box::new(Literal::Number(
23071 "0".to_string(),
23072 )))
23073 } else {
23074 args.remove(0)
23075 };
23076 Ok(Expression::Function(Box::new(Function::new(
23077 "ROUND".to_string(),
23078 vec![
23079 this,
23080 decimals,
23081 Expression::Literal(Box::new(Literal::Number(
23082 "1".to_string(),
23083 ))),
23084 ],
23085 ))))
23086 }
23087 DialectType::Presto
23088 | DialectType::Trino
23089 | DialectType::Athena => {
23090 // TRUNCATE(x, decimals)
23091 let mut new_f = *f;
23092 new_f.name = "TRUNCATE".to_string();
23093 Ok(Expression::Function(Box::new(new_f)))
23094 }
23095 DialectType::MySQL
23096 | DialectType::SingleStore
23097 | DialectType::TiDB => {
23098 // TRUNCATE(x, decimals)
23099 let mut new_f = *f;
23100 new_f.name = "TRUNCATE".to_string();
23101 Ok(Expression::Function(Box::new(new_f)))
23102 }
23103 DialectType::DuckDB => {
23104 // DuckDB supports TRUNC(x, decimals) — preserve both args
23105 let mut args = f.args;
23106 // Snowflake fractions_supported: wrap non-INT decimals in CAST(... AS INT)
23107 if args.len() == 2
23108 && matches!(source, DialectType::Snowflake)
23109 {
23110 let decimals = args.remove(1);
23111 let is_int = matches!(&decimals, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(_)))
23112 || matches!(&decimals, Expression::Cast(c) if matches!(c.to, DataType::Int { .. } | DataType::SmallInt { .. } | DataType::BigInt { .. } | DataType::TinyInt { .. }));
23113 let wrapped = if !is_int {
23114 Expression::Cast(Box::new(
23115 crate::expressions::Cast {
23116 this: decimals,
23117 to: DataType::Int {
23118 length: None,
23119 integer_spelling: false,
23120 },
23121 double_colon_syntax: false,
23122 trailing_comments: Vec::new(),
23123 format: None,
23124 default: None,
23125 inferred_type: None,
23126 },
23127 ))
23128 } else {
23129 decimals
23130 };
23131 args.push(wrapped);
23132 }
23133 Ok(Expression::Function(Box::new(Function::new(
23134 "TRUNC".to_string(),
23135 args,
23136 ))))
23137 }
23138 DialectType::ClickHouse => {
23139 // trunc(x, decimals) - lowercase
23140 let mut new_f = *f;
23141 new_f.name = "trunc".to_string();
23142 Ok(Expression::Function(Box::new(new_f)))
23143 }
23144 DialectType::Spark | DialectType::Databricks => {
23145 // Spark: TRUNC is date-only; numeric TRUNC → CAST(x AS BIGINT)
23146 let this = f.args.into_iter().next().unwrap_or(
23147 Expression::Literal(Box::new(Literal::Number(
23148 "0".to_string(),
23149 ))),
23150 );
23151 Ok(Expression::Cast(Box::new(crate::expressions::Cast {
23152 this,
23153 to: crate::expressions::DataType::BigInt {
23154 length: None,
23155 },
23156 double_colon_syntax: false,
23157 trailing_comments: Vec::new(),
23158 format: None,
23159 default: None,
23160 inferred_type: None,
23161 })))
23162 }
23163 _ => {
23164 // TRUNC(x, decimals) for PostgreSQL, Oracle, Snowflake, etc.
23165 let mut new_f = *f;
23166 new_f.name = "TRUNC".to_string();
23167 Ok(Expression::Function(Box::new(new_f)))
23168 }
23169 }
23170 }
23171 // CURRENT_VERSION() -> VERSION() for most dialects
23172 "CURRENT_VERSION" => match target {
23173 DialectType::Snowflake
23174 | DialectType::Databricks
23175 | DialectType::StarRocks => Ok(Expression::Function(f)),
23176 DialectType::SQLite => {
23177 let mut new_f = *f;
23178 new_f.name = "SQLITE_VERSION".to_string();
23179 Ok(Expression::Function(Box::new(new_f)))
23180 }
23181 _ => {
23182 let mut new_f = *f;
23183 new_f.name = "VERSION".to_string();
23184 Ok(Expression::Function(Box::new(new_f)))
23185 }
23186 },
23187 // ARRAY_REVERSE(x) -> arrayReverse(x) for ClickHouse
23188 "ARRAY_REVERSE" => match target {
23189 DialectType::ClickHouse => {
23190 let mut new_f = *f;
23191 new_f.name = "arrayReverse".to_string();
23192 Ok(Expression::Function(Box::new(new_f)))
23193 }
23194 _ => Ok(Expression::Function(f)),
23195 },
23196 // GENERATE_DATE_ARRAY(start, end[, step]) -> target-specific
23197 "GENERATE_DATE_ARRAY" => {
23198 let mut args = f.args;
23199 if matches!(target, DialectType::BigQuery) {
23200 // BigQuery keeps GENERATE_DATE_ARRAY; add default interval if not present
23201 if args.len() == 2 {
23202 let default_interval = Expression::Interval(Box::new(
23203 crate::expressions::Interval {
23204 this: Some(Expression::Literal(Box::new(
23205 Literal::String("1".to_string()),
23206 ))),
23207 unit: Some(
23208 crate::expressions::IntervalUnitSpec::Simple {
23209 unit: crate::expressions::IntervalUnit::Day,
23210 use_plural: false,
23211 },
23212 ),
23213 },
23214 ));
23215 args.push(default_interval);
23216 }
23217 Ok(Expression::Function(Box::new(Function::new(
23218 "GENERATE_DATE_ARRAY".to_string(),
23219 args,
23220 ))))
23221 } else if matches!(target, DialectType::DuckDB) {
23222 // DuckDB: CAST(GENERATE_SERIES(start, end, step) AS DATE[])
23223 let start = args.get(0).cloned();
23224 let end = args.get(1).cloned();
23225 let step = args.get(2).cloned().or_else(|| {
23226 Some(Expression::Interval(Box::new(
23227 crate::expressions::Interval {
23228 this: Some(Expression::Literal(Box::new(
23229 Literal::String("1".to_string()),
23230 ))),
23231 unit: Some(
23232 crate::expressions::IntervalUnitSpec::Simple {
23233 unit: crate::expressions::IntervalUnit::Day,
23234 use_plural: false,
23235 },
23236 ),
23237 },
23238 )))
23239 });
23240 let gen_series = Expression::GenerateSeries(Box::new(
23241 crate::expressions::GenerateSeries {
23242 start: start.map(Box::new),
23243 end: end.map(Box::new),
23244 step: step.map(Box::new),
23245 is_end_exclusive: None,
23246 },
23247 ));
23248 Ok(Expression::Cast(Box::new(Cast {
23249 this: gen_series,
23250 to: DataType::Array {
23251 element_type: Box::new(DataType::Date),
23252 dimension: None,
23253 },
23254 trailing_comments: vec![],
23255 double_colon_syntax: false,
23256 format: None,
23257 default: None,
23258 inferred_type: None,
23259 })))
23260 } else if matches!(
23261 target,
23262 DialectType::Presto | DialectType::Trino | DialectType::Athena
23263 ) {
23264 // Presto/Trino: SEQUENCE(start, end, interval) with interval normalization
23265 let start = args.get(0).cloned();
23266 let end = args.get(1).cloned();
23267 let step = args.get(2).cloned().or_else(|| {
23268 Some(Expression::Interval(Box::new(
23269 crate::expressions::Interval {
23270 this: Some(Expression::Literal(Box::new(
23271 Literal::String("1".to_string()),
23272 ))),
23273 unit: Some(
23274 crate::expressions::IntervalUnitSpec::Simple {
23275 unit: crate::expressions::IntervalUnit::Day,
23276 use_plural: false,
23277 },
23278 ),
23279 },
23280 )))
23281 });
23282 let gen_series = Expression::GenerateSeries(Box::new(
23283 crate::expressions::GenerateSeries {
23284 start: start.map(Box::new),
23285 end: end.map(Box::new),
23286 step: step.map(Box::new),
23287 is_end_exclusive: None,
23288 },
23289 ));
23290 Ok(gen_series)
23291 } else if matches!(
23292 target,
23293 DialectType::Spark | DialectType::Databricks
23294 ) {
23295 // Spark/Databricks: SEQUENCE(start, end, step) - keep step as-is
23296 let start = args.get(0).cloned();
23297 let end = args.get(1).cloned();
23298 let step = args.get(2).cloned().or_else(|| {
23299 Some(Expression::Interval(Box::new(
23300 crate::expressions::Interval {
23301 this: Some(Expression::Literal(Box::new(
23302 Literal::String("1".to_string()),
23303 ))),
23304 unit: Some(
23305 crate::expressions::IntervalUnitSpec::Simple {
23306 unit: crate::expressions::IntervalUnit::Day,
23307 use_plural: false,
23308 },
23309 ),
23310 },
23311 )))
23312 });
23313 let gen_series = Expression::GenerateSeries(Box::new(
23314 crate::expressions::GenerateSeries {
23315 start: start.map(Box::new),
23316 end: end.map(Box::new),
23317 step: step.map(Box::new),
23318 is_end_exclusive: None,
23319 },
23320 ));
23321 Ok(gen_series)
23322 } else if matches!(target, DialectType::Snowflake) {
23323 // Snowflake: keep as GENERATE_DATE_ARRAY for later transform
23324 if args.len() == 2 {
23325 let default_interval = Expression::Interval(Box::new(
23326 crate::expressions::Interval {
23327 this: Some(Expression::Literal(Box::new(
23328 Literal::String("1".to_string()),
23329 ))),
23330 unit: Some(
23331 crate::expressions::IntervalUnitSpec::Simple {
23332 unit: crate::expressions::IntervalUnit::Day,
23333 use_plural: false,
23334 },
23335 ),
23336 },
23337 ));
23338 args.push(default_interval);
23339 }
23340 Ok(Expression::Function(Box::new(Function::new(
23341 "GENERATE_DATE_ARRAY".to_string(),
23342 args,
23343 ))))
23344 } else if matches!(
23345 target,
23346 DialectType::MySQL
23347 | DialectType::TSQL
23348 | DialectType::Fabric
23349 | DialectType::Redshift
23350 ) {
23351 // MySQL/TSQL/Redshift: keep as GENERATE_DATE_ARRAY for the preprocess
23352 // step (unnest_generate_date_array_using_recursive_cte) to convert to CTE
23353 Ok(Expression::Function(Box::new(Function::new(
23354 "GENERATE_DATE_ARRAY".to_string(),
23355 args,
23356 ))))
23357 } else {
23358 // PostgreSQL/others: convert to GenerateSeries
23359 let start = args.get(0).cloned();
23360 let end = args.get(1).cloned();
23361 let step = args.get(2).cloned().or_else(|| {
23362 Some(Expression::Interval(Box::new(
23363 crate::expressions::Interval {
23364 this: Some(Expression::Literal(Box::new(
23365 Literal::String("1".to_string()),
23366 ))),
23367 unit: Some(
23368 crate::expressions::IntervalUnitSpec::Simple {
23369 unit: crate::expressions::IntervalUnit::Day,
23370 use_plural: false,
23371 },
23372 ),
23373 },
23374 )))
23375 });
23376 Ok(Expression::GenerateSeries(Box::new(
23377 crate::expressions::GenerateSeries {
23378 start: start.map(Box::new),
23379 end: end.map(Box::new),
23380 step: step.map(Box::new),
23381 is_end_exclusive: None,
23382 },
23383 )))
23384 }
23385 }
23386 // ARRAYS_OVERLAP(arr1, arr2) from Snowflake -> DuckDB:
23387 // (arr1 && arr2) OR (ARRAY_LENGTH(arr1) <> LIST_COUNT(arr1) AND ARRAY_LENGTH(arr2) <> LIST_COUNT(arr2))
23388 "ARRAYS_OVERLAP"
23389 if f.args.len() == 2
23390 && matches!(source, DialectType::Snowflake)
23391 && matches!(target, DialectType::DuckDB) =>
23392 {
23393 let mut args = f.args;
23394 let arr1 = args.remove(0);
23395 let arr2 = args.remove(0);
23396
23397 // (arr1 && arr2)
23398 let overlap = Expression::Paren(Box::new(Paren {
23399 this: Expression::ArrayOverlaps(Box::new(BinaryOp {
23400 left: arr1.clone(),
23401 right: arr2.clone(),
23402 left_comments: vec![],
23403 operator_comments: vec![],
23404 trailing_comments: vec![],
23405 inferred_type: None,
23406 })),
23407 trailing_comments: vec![],
23408 }));
23409
23410 // ARRAY_LENGTH(arr1) <> LIST_COUNT(arr1)
23411 let arr1_has_null = Expression::Neq(Box::new(BinaryOp {
23412 left: Expression::Function(Box::new(Function::new(
23413 "ARRAY_LENGTH".to_string(),
23414 vec![arr1.clone()],
23415 ))),
23416 right: Expression::Function(Box::new(Function::new(
23417 "LIST_COUNT".to_string(),
23418 vec![arr1],
23419 ))),
23420 left_comments: vec![],
23421 operator_comments: vec![],
23422 trailing_comments: vec![],
23423 inferred_type: None,
23424 }));
23425
23426 // ARRAY_LENGTH(arr2) <> LIST_COUNT(arr2)
23427 let arr2_has_null = Expression::Neq(Box::new(BinaryOp {
23428 left: Expression::Function(Box::new(Function::new(
23429 "ARRAY_LENGTH".to_string(),
23430 vec![arr2.clone()],
23431 ))),
23432 right: Expression::Function(Box::new(Function::new(
23433 "LIST_COUNT".to_string(),
23434 vec![arr2],
23435 ))),
23436 left_comments: vec![],
23437 operator_comments: vec![],
23438 trailing_comments: vec![],
23439 inferred_type: None,
23440 }));
23441
23442 // (ARRAY_LENGTH(arr1) <> LIST_COUNT(arr1) AND ARRAY_LENGTH(arr2) <> LIST_COUNT(arr2))
23443 let null_check = Expression::Paren(Box::new(Paren {
23444 this: Expression::And(Box::new(BinaryOp {
23445 left: arr1_has_null,
23446 right: arr2_has_null,
23447 left_comments: vec![],
23448 operator_comments: vec![],
23449 trailing_comments: vec![],
23450 inferred_type: None,
23451 })),
23452 trailing_comments: vec![],
23453 }));
23454
23455 // (arr1 && arr2) OR (null_check)
23456 Ok(Expression::Or(Box::new(BinaryOp {
23457 left: overlap,
23458 right: null_check,
23459 left_comments: vec![],
23460 operator_comments: vec![],
23461 trailing_comments: vec![],
23462 inferred_type: None,
23463 })))
23464 }
23465 // ARRAY_INTERSECTION([1, 2], [2, 3]) from Snowflake -> DuckDB:
23466 // Bag semantics using LIST_TRANSFORM/LIST_FILTER with GENERATE_SERIES
23467 "ARRAY_INTERSECTION"
23468 if f.args.len() == 2
23469 && matches!(source, DialectType::Snowflake)
23470 && matches!(target, DialectType::DuckDB) =>
23471 {
23472 let mut args = f.args;
23473 let arr1 = args.remove(0);
23474 let arr2 = args.remove(0);
23475
23476 // Build: arr1 IS NULL
23477 let arr1_is_null = Expression::IsNull(Box::new(IsNull {
23478 this: arr1.clone(),
23479 not: false,
23480 postfix_form: false,
23481 }));
23482 let arr2_is_null = Expression::IsNull(Box::new(IsNull {
23483 this: arr2.clone(),
23484 not: false,
23485 postfix_form: false,
23486 }));
23487 let null_check = Expression::Or(Box::new(BinaryOp {
23488 left: arr1_is_null,
23489 right: arr2_is_null,
23490 left_comments: vec![],
23491 operator_comments: vec![],
23492 trailing_comments: vec![],
23493 inferred_type: None,
23494 }));
23495
23496 // GENERATE_SERIES(1, LENGTH(arr1))
23497 let gen_series = Expression::Function(Box::new(Function::new(
23498 "GENERATE_SERIES".to_string(),
23499 vec![
23500 Expression::number(1),
23501 Expression::Function(Box::new(Function::new(
23502 "LENGTH".to_string(),
23503 vec![arr1.clone()],
23504 ))),
23505 ],
23506 )));
23507
23508 // LIST_ZIP(arr1, GENERATE_SERIES(1, LENGTH(arr1)))
23509 let list_zip = Expression::Function(Box::new(Function::new(
23510 "LIST_ZIP".to_string(),
23511 vec![arr1.clone(), gen_series],
23512 )));
23513
23514 // pair[1] and pair[2]
23515 let pair_col = Expression::column("pair");
23516 let pair_1 = Expression::Subscript(Box::new(
23517 crate::expressions::Subscript {
23518 this: pair_col.clone(),
23519 index: Expression::number(1),
23520 },
23521 ));
23522 let pair_2 = Expression::Subscript(Box::new(
23523 crate::expressions::Subscript {
23524 this: pair_col.clone(),
23525 index: Expression::number(2),
23526 },
23527 ));
23528
23529 // arr1[1:pair[2]]
23530 let arr1_slice = Expression::ArraySlice(Box::new(
23531 crate::expressions::ArraySlice {
23532 this: arr1.clone(),
23533 start: Some(Expression::number(1)),
23534 end: Some(pair_2),
23535 },
23536 ));
23537
23538 // e IS NOT DISTINCT FROM pair[1]
23539 let e_col = Expression::column("e");
23540 let is_not_distinct = Expression::NullSafeEq(Box::new(BinaryOp {
23541 left: e_col.clone(),
23542 right: pair_1.clone(),
23543 left_comments: vec![],
23544 operator_comments: vec![],
23545 trailing_comments: vec![],
23546 inferred_type: None,
23547 }));
23548
23549 // e -> e IS NOT DISTINCT FROM pair[1]
23550 let inner_lambda1 =
23551 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
23552 parameters: vec![crate::expressions::Identifier::new("e")],
23553 body: is_not_distinct,
23554 colon: false,
23555 parameter_types: vec![],
23556 }));
23557
23558 // LIST_FILTER(arr1[1:pair[2]], e -> e IS NOT DISTINCT FROM pair[1])
23559 let inner_filter1 = Expression::Function(Box::new(Function::new(
23560 "LIST_FILTER".to_string(),
23561 vec![arr1_slice, inner_lambda1],
23562 )));
23563
23564 // LENGTH(LIST_FILTER(arr1[1:pair[2]], ...))
23565 let len1 = Expression::Function(Box::new(Function::new(
23566 "LENGTH".to_string(),
23567 vec![inner_filter1],
23568 )));
23569
23570 // e -> e IS NOT DISTINCT FROM pair[1]
23571 let inner_lambda2 =
23572 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
23573 parameters: vec![crate::expressions::Identifier::new("e")],
23574 body: Expression::NullSafeEq(Box::new(BinaryOp {
23575 left: e_col,
23576 right: pair_1.clone(),
23577 left_comments: vec![],
23578 operator_comments: vec![],
23579 trailing_comments: vec![],
23580 inferred_type: None,
23581 })),
23582 colon: false,
23583 parameter_types: vec![],
23584 }));
23585
23586 // LIST_FILTER(arr2, e -> e IS NOT DISTINCT FROM pair[1])
23587 let inner_filter2 = Expression::Function(Box::new(Function::new(
23588 "LIST_FILTER".to_string(),
23589 vec![arr2.clone(), inner_lambda2],
23590 )));
23591
23592 // LENGTH(LIST_FILTER(arr2, ...))
23593 let len2 = Expression::Function(Box::new(Function::new(
23594 "LENGTH".to_string(),
23595 vec![inner_filter2],
23596 )));
23597
23598 // LENGTH(...) <= LENGTH(...)
23599 let cond = Expression::Paren(Box::new(Paren {
23600 this: Expression::Lte(Box::new(BinaryOp {
23601 left: len1,
23602 right: len2,
23603 left_comments: vec![],
23604 operator_comments: vec![],
23605 trailing_comments: vec![],
23606 inferred_type: None,
23607 })),
23608 trailing_comments: vec![],
23609 }));
23610
23611 // pair -> (condition)
23612 let filter_lambda =
23613 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
23614 parameters: vec![crate::expressions::Identifier::new(
23615 "pair",
23616 )],
23617 body: cond,
23618 colon: false,
23619 parameter_types: vec![],
23620 }));
23621
23622 // LIST_FILTER(LIST_ZIP(...), pair -> ...)
23623 let outer_filter = Expression::Function(Box::new(Function::new(
23624 "LIST_FILTER".to_string(),
23625 vec![list_zip, filter_lambda],
23626 )));
23627
23628 // pair -> pair[1]
23629 let transform_lambda =
23630 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
23631 parameters: vec![crate::expressions::Identifier::new(
23632 "pair",
23633 )],
23634 body: pair_1,
23635 colon: false,
23636 parameter_types: vec![],
23637 }));
23638
23639 // LIST_TRANSFORM(LIST_FILTER(...), pair -> pair[1])
23640 let list_transform = Expression::Function(Box::new(Function::new(
23641 "LIST_TRANSFORM".to_string(),
23642 vec![outer_filter, transform_lambda],
23643 )));
23644
23645 // CASE WHEN arr1 IS NULL OR arr2 IS NULL THEN NULL
23646 // ELSE LIST_TRANSFORM(LIST_FILTER(...), pair -> pair[1])
23647 // END
23648 Ok(Expression::Case(Box::new(Case {
23649 operand: None,
23650 whens: vec![(null_check, Expression::Null(Null))],
23651 else_: Some(list_transform),
23652 comments: vec![],
23653 inferred_type: None,
23654 })))
23655 }
23656 // ARRAY_CONSTRUCT(args) -> Expression::Array for all targets
23657 "ARRAY_CONSTRUCT" => {
23658 if matches!(target, DialectType::Snowflake) {
23659 Ok(Expression::Function(f))
23660 } else {
23661 Ok(Expression::Array(Box::new(crate::expressions::Array {
23662 expressions: f.args,
23663 })))
23664 }
23665 }
23666 // ARRAY(args) function -> Expression::Array for DuckDB/Snowflake/Presto/Trino/Athena
23667 "ARRAY"
23668 if !f.args.iter().any(|a| {
23669 matches!(a, Expression::Select(_) | Expression::Subquery(_))
23670 }) =>
23671 {
23672 match target {
23673 DialectType::DuckDB
23674 | DialectType::Snowflake
23675 | DialectType::Presto
23676 | DialectType::Trino
23677 | DialectType::Athena => {
23678 Ok(Expression::Array(Box::new(crate::expressions::Array {
23679 expressions: f.args,
23680 })))
23681 }
23682 _ => Ok(Expression::Function(f)),
23683 }
23684 }
23685 _ => Ok(Expression::Function(f)),
23686 }
23687 } else if let Expression::AggregateFunction(mut af) = e {
23688 let name = af.name.to_ascii_uppercase();
23689 match name.as_str() {
23690 "ARBITRARY" if af.args.len() == 1 => {
23691 let arg = af.args.into_iter().next().unwrap();
23692 Ok(convert_arbitrary(arg, target))
23693 }
23694 "JSON_ARRAYAGG" => {
23695 match target {
23696 DialectType::PostgreSQL => {
23697 af.name = "JSON_AGG".to_string();
23698 // Add NULLS FIRST to ORDER BY items for PostgreSQL
23699 for ordered in af.order_by.iter_mut() {
23700 if ordered.nulls_first.is_none() {
23701 ordered.nulls_first = Some(true);
23702 }
23703 }
23704 Ok(Expression::AggregateFunction(af))
23705 }
23706 _ => Ok(Expression::AggregateFunction(af)),
23707 }
23708 }
23709 _ => Ok(Expression::AggregateFunction(af)),
23710 }
23711 } else if let Expression::JSONArrayAgg(ja) = e {
23712 // JSONArrayAgg -> JSON_AGG for PostgreSQL, JSON_ARRAYAGG for others
23713 match target {
23714 DialectType::PostgreSQL => {
23715 let mut order_by = Vec::new();
23716 if let Some(order_expr) = ja.order {
23717 if let Expression::OrderBy(ob) = *order_expr {
23718 for mut ordered in ob.expressions {
23719 if ordered.nulls_first.is_none() {
23720 ordered.nulls_first = Some(true);
23721 }
23722 order_by.push(ordered);
23723 }
23724 }
23725 }
23726 Ok(Expression::AggregateFunction(Box::new(
23727 crate::expressions::AggregateFunction {
23728 name: "JSON_AGG".to_string(),
23729 args: vec![*ja.this],
23730 distinct: false,
23731 filter: None,
23732 order_by,
23733 limit: None,
23734 ignore_nulls: None,
23735 inferred_type: None,
23736 },
23737 )))
23738 }
23739 _ => Ok(Expression::JSONArrayAgg(ja)),
23740 }
23741 } else if let Expression::JSONArray(ja) = e {
23742 match target {
23743 DialectType::Snowflake
23744 if ja.null_handling.is_none()
23745 && ja.return_type.is_none()
23746 && ja.strict.is_none() =>
23747 {
23748 let array_construct = Expression::ArrayFunc(Box::new(
23749 crate::expressions::ArrayConstructor {
23750 expressions: ja.expressions,
23751 bracket_notation: false,
23752 use_list_keyword: false,
23753 },
23754 ));
23755 Ok(Expression::Function(Box::new(Function::new(
23756 "TO_VARIANT".to_string(),
23757 vec![array_construct],
23758 ))))
23759 }
23760 _ => Ok(Expression::JSONArray(ja)),
23761 }
23762 } else if let Expression::JsonArray(f) = e {
23763 match target {
23764 DialectType::Snowflake => {
23765 let array_construct = Expression::ArrayFunc(Box::new(
23766 crate::expressions::ArrayConstructor {
23767 expressions: f.expressions,
23768 bracket_notation: false,
23769 use_list_keyword: false,
23770 },
23771 ));
23772 Ok(Expression::Function(Box::new(Function::new(
23773 "TO_VARIANT".to_string(),
23774 vec![array_construct],
23775 ))))
23776 }
23777 _ => Ok(Expression::JsonArray(f)),
23778 }
23779 } else if let Expression::CombinedParameterizedAgg(cpa) = e {
23780 let function_name = match cpa.this.as_ref() {
23781 Expression::Identifier(ident) => Some(ident.name.as_str()),
23782 _ => None,
23783 };
23784 match function_name {
23785 Some(name)
23786 if name.eq_ignore_ascii_case("groupConcat")
23787 && cpa.expressions.len() == 1 =>
23788 {
23789 match target {
23790 DialectType::MySQL | DialectType::SingleStore => {
23791 let this = cpa.expressions[0].clone();
23792 let separator = cpa.params.first().cloned();
23793 Ok(Expression::GroupConcat(Box::new(
23794 crate::expressions::GroupConcatFunc {
23795 this,
23796 separator,
23797 order_by: None,
23798 distinct: false,
23799 filter: None,
23800 limit: None,
23801 inferred_type: None,
23802 },
23803 )))
23804 }
23805 DialectType::DuckDB => Ok(Expression::ListAgg(Box::new({
23806 let this = cpa.expressions[0].clone();
23807 let separator = cpa.params.first().cloned();
23808 crate::expressions::ListAggFunc {
23809 this,
23810 separator,
23811 on_overflow: None,
23812 order_by: None,
23813 distinct: false,
23814 filter: None,
23815 inferred_type: None,
23816 }
23817 }))),
23818 _ => Ok(Expression::CombinedParameterizedAgg(cpa)),
23819 }
23820 }
23821 _ => Ok(Expression::CombinedParameterizedAgg(cpa)),
23822 }
23823 } else if let Expression::ToNumber(tn) = e {
23824 // TO_NUMBER(x) with no format/precision/scale -> CAST(x AS DOUBLE)
23825 let arg = *tn.this;
23826 Ok(Expression::Cast(Box::new(crate::expressions::Cast {
23827 this: arg,
23828 to: crate::expressions::DataType::Double {
23829 precision: None,
23830 scale: None,
23831 },
23832 double_colon_syntax: false,
23833 trailing_comments: Vec::new(),
23834 format: None,
23835 default: None,
23836 inferred_type: None,
23837 })))
23838 } else {
23839 Ok(e)
23840 }
23841 }
23842
23843 Action::RegexpLikeToDuckDB => {
23844 if let Expression::RegexpLike(f) = e {
23845 let mut args = vec![f.this, f.pattern];
23846 if let Some(flags) = f.flags {
23847 args.push(flags);
23848 }
23849 Ok(Expression::Function(Box::new(Function::new(
23850 "REGEXP_MATCHES".to_string(),
23851 args,
23852 ))))
23853 } else {
23854 Ok(e)
23855 }
23856 }
23857 Action::EpochConvert => {
23858 if let Expression::Epoch(f) = e {
23859 let arg = f.this;
23860 let name = match target {
23861 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
23862 "UNIX_TIMESTAMP"
23863 }
23864 DialectType::Presto | DialectType::Trino => "TO_UNIXTIME",
23865 DialectType::BigQuery => "TIME_TO_UNIX",
23866 _ => "EPOCH",
23867 };
23868 Ok(Expression::Function(Box::new(Function::new(
23869 name.to_string(),
23870 vec![arg],
23871 ))))
23872 } else {
23873 Ok(e)
23874 }
23875 }
23876 Action::EpochMsConvert => {
23877 use crate::expressions::{BinaryOp, Cast};
23878 if let Expression::EpochMs(f) = e {
23879 let arg = f.this;
23880 match target {
23881 DialectType::Spark | DialectType::Databricks => {
23882 Ok(Expression::Function(Box::new(Function::new(
23883 "TIMESTAMP_MILLIS".to_string(),
23884 vec![arg],
23885 ))))
23886 }
23887 DialectType::BigQuery => Ok(Expression::Function(Box::new(
23888 Function::new("TIMESTAMP_MILLIS".to_string(), vec![arg]),
23889 ))),
23890 DialectType::Presto | DialectType::Trino => {
23891 // FROM_UNIXTIME(CAST(x AS DOUBLE) / POW(10, 3))
23892 let cast_arg = Expression::Cast(Box::new(Cast {
23893 this: arg,
23894 to: DataType::Double {
23895 precision: None,
23896 scale: None,
23897 },
23898 trailing_comments: Vec::new(),
23899 double_colon_syntax: false,
23900 format: None,
23901 default: None,
23902 inferred_type: None,
23903 }));
23904 let div = Expression::Div(Box::new(BinaryOp::new(
23905 cast_arg,
23906 Expression::Function(Box::new(Function::new(
23907 "POW".to_string(),
23908 vec![Expression::number(10), Expression::number(3)],
23909 ))),
23910 )));
23911 Ok(Expression::Function(Box::new(Function::new(
23912 "FROM_UNIXTIME".to_string(),
23913 vec![div],
23914 ))))
23915 }
23916 DialectType::MySQL => {
23917 // FROM_UNIXTIME(x / POWER(10, 3))
23918 let div = Expression::Div(Box::new(BinaryOp::new(
23919 arg,
23920 Expression::Function(Box::new(Function::new(
23921 "POWER".to_string(),
23922 vec![Expression::number(10), Expression::number(3)],
23923 ))),
23924 )));
23925 Ok(Expression::Function(Box::new(Function::new(
23926 "FROM_UNIXTIME".to_string(),
23927 vec![div],
23928 ))))
23929 }
23930 DialectType::PostgreSQL | DialectType::Redshift => {
23931 // TO_TIMESTAMP(CAST(x AS DOUBLE PRECISION) / POWER(10, 3))
23932 let cast_arg = Expression::Cast(Box::new(Cast {
23933 this: arg,
23934 to: DataType::Custom {
23935 name: "DOUBLE PRECISION".to_string(),
23936 },
23937 trailing_comments: Vec::new(),
23938 double_colon_syntax: false,
23939 format: None,
23940 default: None,
23941 inferred_type: None,
23942 }));
23943 let div = Expression::Div(Box::new(BinaryOp::new(
23944 cast_arg,
23945 Expression::Function(Box::new(Function::new(
23946 "POWER".to_string(),
23947 vec![Expression::number(10), Expression::number(3)],
23948 ))),
23949 )));
23950 Ok(Expression::Function(Box::new(Function::new(
23951 "TO_TIMESTAMP".to_string(),
23952 vec![div],
23953 ))))
23954 }
23955 DialectType::ClickHouse => {
23956 // fromUnixTimestamp64Milli(CAST(x AS Nullable(Int64)))
23957 let cast_arg = Expression::Cast(Box::new(Cast {
23958 this: arg,
23959 to: DataType::Nullable {
23960 inner: Box::new(DataType::BigInt { length: None }),
23961 },
23962 trailing_comments: Vec::new(),
23963 double_colon_syntax: false,
23964 format: None,
23965 default: None,
23966 inferred_type: None,
23967 }));
23968 Ok(Expression::Function(Box::new(Function::new(
23969 "fromUnixTimestamp64Milli".to_string(),
23970 vec![cast_arg],
23971 ))))
23972 }
23973 _ => Ok(Expression::Function(Box::new(Function::new(
23974 "EPOCH_MS".to_string(),
23975 vec![arg],
23976 )))),
23977 }
23978 } else {
23979 Ok(e)
23980 }
23981 }
23982 Action::TSQLTypeNormalize => {
23983 if let Expression::DataType(dt) = e {
23984 let new_dt = match &dt {
23985 DataType::Custom { name } if name.eq_ignore_ascii_case("MONEY") => {
23986 DataType::Decimal {
23987 precision: Some(15),
23988 scale: Some(4),
23989 }
23990 }
23991 DataType::Custom { name }
23992 if name.eq_ignore_ascii_case("SMALLMONEY") =>
23993 {
23994 DataType::Decimal {
23995 precision: Some(6),
23996 scale: Some(4),
23997 }
23998 }
23999 DataType::Custom { name } if name.eq_ignore_ascii_case("DATETIME2") => {
24000 DataType::Timestamp {
24001 timezone: false,
24002 precision: None,
24003 }
24004 }
24005 DataType::Custom { name } if name.eq_ignore_ascii_case("REAL") => {
24006 DataType::Float {
24007 precision: None,
24008 scale: None,
24009 real_spelling: false,
24010 }
24011 }
24012 DataType::Float {
24013 real_spelling: true,
24014 ..
24015 } => DataType::Float {
24016 precision: None,
24017 scale: None,
24018 real_spelling: false,
24019 },
24020 DataType::Custom { name } if name.eq_ignore_ascii_case("IMAGE") => {
24021 DataType::Custom {
24022 name: "BLOB".to_string(),
24023 }
24024 }
24025 DataType::Custom { name } if name.eq_ignore_ascii_case("BIT") => {
24026 DataType::Boolean
24027 }
24028 DataType::Custom { name }
24029 if name.eq_ignore_ascii_case("ROWVERSION") =>
24030 {
24031 DataType::Custom {
24032 name: "BINARY".to_string(),
24033 }
24034 }
24035 DataType::Custom { name }
24036 if name.eq_ignore_ascii_case("UNIQUEIDENTIFIER") =>
24037 {
24038 match target {
24039 DialectType::Spark
24040 | DialectType::Databricks
24041 | DialectType::Hive => DataType::Custom {
24042 name: "STRING".to_string(),
24043 },
24044 _ => DataType::VarChar {
24045 length: Some(36),
24046 parenthesized_length: true,
24047 },
24048 }
24049 }
24050 DataType::Custom { name }
24051 if name.eq_ignore_ascii_case("DATETIMEOFFSET") =>
24052 {
24053 match target {
24054 DialectType::Spark
24055 | DialectType::Databricks
24056 | DialectType::Hive => DataType::Timestamp {
24057 timezone: false,
24058 precision: None,
24059 },
24060 _ => DataType::Timestamp {
24061 timezone: true,
24062 precision: None,
24063 },
24064 }
24065 }
24066 DataType::Custom { ref name }
24067 if name.len() >= 10
24068 && name[..10].eq_ignore_ascii_case("DATETIME2(") =>
24069 {
24070 // DATETIME2(n) -> TIMESTAMP
24071 DataType::Timestamp {
24072 timezone: false,
24073 precision: None,
24074 }
24075 }
24076 DataType::Custom { ref name }
24077 if name.len() >= 5 && name[..5].eq_ignore_ascii_case("TIME(") =>
24078 {
24079 // TIME(n) -> TIMESTAMP for Spark, keep as TIME for others
24080 match target {
24081 DialectType::Spark
24082 | DialectType::Databricks
24083 | DialectType::Hive => DataType::Timestamp {
24084 timezone: false,
24085 precision: None,
24086 },
24087 _ => return Ok(Expression::DataType(dt)),
24088 }
24089 }
24090 DataType::Custom { ref name }
24091 if name.len() >= 7 && name[..7].eq_ignore_ascii_case("NUMERIC") =>
24092 {
24093 // Parse NUMERIC(p,s) back to Decimal(p,s)
24094 let upper = name.to_ascii_uppercase();
24095 if let Some(inner) = upper
24096 .strip_prefix("NUMERIC(")
24097 .and_then(|s| s.strip_suffix(')'))
24098 {
24099 let parts: Vec<&str> = inner.split(',').collect();
24100 let precision =
24101 parts.first().and_then(|s| s.trim().parse::<u32>().ok());
24102 let scale =
24103 parts.get(1).and_then(|s| s.trim().parse::<u32>().ok());
24104 DataType::Decimal { precision, scale }
24105 } else if upper == "NUMERIC" {
24106 DataType::Decimal {
24107 precision: None,
24108 scale: None,
24109 }
24110 } else {
24111 return Ok(Expression::DataType(dt));
24112 }
24113 }
24114 DataType::Float {
24115 precision: Some(p), ..
24116 } => {
24117 // For Hive/Spark: FLOAT(1-32) -> FLOAT, FLOAT(33+) -> DOUBLE (IEEE 754 boundary)
24118 // For other targets: FLOAT(1-24) -> FLOAT, FLOAT(25+) -> DOUBLE (TSQL boundary)
24119 let boundary = match target {
24120 DialectType::Hive
24121 | DialectType::Spark
24122 | DialectType::Databricks => 32,
24123 _ => 24,
24124 };
24125 if *p <= boundary {
24126 DataType::Float {
24127 precision: None,
24128 scale: None,
24129 real_spelling: false,
24130 }
24131 } else {
24132 DataType::Double {
24133 precision: None,
24134 scale: None,
24135 }
24136 }
24137 }
24138 DataType::TinyInt { .. } => match target {
24139 DialectType::DuckDB => DataType::Custom {
24140 name: "UTINYINT".to_string(),
24141 },
24142 DialectType::Hive
24143 | DialectType::Spark
24144 | DialectType::Databricks => DataType::SmallInt { length: None },
24145 _ => return Ok(Expression::DataType(dt)),
24146 },
24147 // INTEGER -> INT for Spark/Databricks
24148 DataType::Int {
24149 length,
24150 integer_spelling: true,
24151 } => DataType::Int {
24152 length: *length,
24153 integer_spelling: false,
24154 },
24155 _ => return Ok(Expression::DataType(dt)),
24156 };
24157 Ok(Expression::DataType(new_dt))
24158 } else {
24159 Ok(e)
24160 }
24161 }
24162 Action::MySQLSafeDivide => {
24163 use crate::expressions::{BinaryOp, Cast};
24164 if let Expression::Div(op) = e {
24165 let left = op.left;
24166 let right = op.right;
24167 // For SQLite: CAST left as REAL but NO NULLIF wrapping
24168 if matches!(target, DialectType::SQLite) {
24169 let new_left = Expression::Cast(Box::new(Cast {
24170 this: left,
24171 to: DataType::Float {
24172 precision: None,
24173 scale: None,
24174 real_spelling: true,
24175 },
24176 trailing_comments: Vec::new(),
24177 double_colon_syntax: false,
24178 format: None,
24179 default: None,
24180 inferred_type: None,
24181 }));
24182 return Ok(Expression::Div(Box::new(BinaryOp::new(new_left, right))));
24183 }
24184 // Wrap right in NULLIF(right, 0)
24185 let nullif_right = Expression::Function(Box::new(Function::new(
24186 "NULLIF".to_string(),
24187 vec![right, Expression::number(0)],
24188 )));
24189 // For some dialects, also CAST the left side
24190 let new_left = match target {
24191 DialectType::PostgreSQL
24192 | DialectType::Redshift
24193 | DialectType::Teradata
24194 | DialectType::Materialize
24195 | DialectType::RisingWave => Expression::Cast(Box::new(Cast {
24196 this: left,
24197 to: DataType::Custom {
24198 name: "DOUBLE PRECISION".to_string(),
24199 },
24200 trailing_comments: Vec::new(),
24201 double_colon_syntax: false,
24202 format: None,
24203 default: None,
24204 inferred_type: None,
24205 })),
24206 DialectType::Drill
24207 | DialectType::Trino
24208 | DialectType::Presto
24209 | DialectType::Athena => Expression::Cast(Box::new(Cast {
24210 this: left,
24211 to: DataType::Double {
24212 precision: None,
24213 scale: None,
24214 },
24215 trailing_comments: Vec::new(),
24216 double_colon_syntax: false,
24217 format: None,
24218 default: None,
24219 inferred_type: None,
24220 })),
24221 DialectType::TSQL => Expression::Cast(Box::new(Cast {
24222 this: left,
24223 to: DataType::Float {
24224 precision: None,
24225 scale: None,
24226 real_spelling: false,
24227 },
24228 trailing_comments: Vec::new(),
24229 double_colon_syntax: false,
24230 format: None,
24231 default: None,
24232 inferred_type: None,
24233 })),
24234 _ => left,
24235 };
24236 Ok(Expression::Div(Box::new(BinaryOp::new(
24237 new_left,
24238 nullif_right,
24239 ))))
24240 } else {
24241 Ok(e)
24242 }
24243 }
24244 Action::AlterTableRenameStripSchema => {
24245 if let Expression::AlterTable(mut at) = e {
24246 if let Some(crate::expressions::AlterTableAction::RenameTable(
24247 ref mut new_tbl,
24248 )) = at.actions.first_mut()
24249 {
24250 new_tbl.schema = None;
24251 new_tbl.catalog = None;
24252 }
24253 Ok(Expression::AlterTable(at))
24254 } else {
24255 Ok(e)
24256 }
24257 }
24258 Action::NullsOrdering => {
24259 // Fill in the source dialect's implied null ordering default.
24260 // This makes implicit null ordering explicit so the target generator
24261 // can correctly strip or keep it.
24262 //
24263 // Dialect null ordering categories:
24264 // nulls_are_large (Oracle, PostgreSQL, Redshift, Snowflake):
24265 // ASC -> NULLS LAST, DESC -> NULLS FIRST
24266 // nulls_are_small (Spark, Hive, BigQuery, MySQL, Databricks, ClickHouse, etc.):
24267 // ASC -> NULLS FIRST, DESC -> NULLS LAST
24268 // nulls_are_last (DuckDB, Presto, Trino, Dremio, Athena):
24269 // NULLS LAST always (both ASC and DESC)
24270 if let Expression::Ordered(mut o) = e {
24271 let is_asc = !o.desc;
24272
24273 let is_source_nulls_large = matches!(
24274 source,
24275 DialectType::Oracle
24276 | DialectType::PostgreSQL
24277 | DialectType::Redshift
24278 | DialectType::Snowflake
24279 );
24280 let is_source_nulls_last = matches!(
24281 source,
24282 DialectType::DuckDB
24283 | DialectType::Presto
24284 | DialectType::Trino
24285 | DialectType::Dremio
24286 | DialectType::Athena
24287 | DialectType::ClickHouse
24288 | DialectType::Drill
24289 | DialectType::Exasol
24290 | DialectType::DataFusion
24291 );
24292
24293 // Determine target category to check if default matches
24294 let is_target_nulls_large = matches!(
24295 target,
24296 DialectType::Oracle
24297 | DialectType::PostgreSQL
24298 | DialectType::Redshift
24299 | DialectType::Snowflake
24300 );
24301 let is_target_nulls_last = matches!(
24302 target,
24303 DialectType::DuckDB
24304 | DialectType::Presto
24305 | DialectType::Trino
24306 | DialectType::Dremio
24307 | DialectType::Athena
24308 | DialectType::ClickHouse
24309 | DialectType::Drill
24310 | DialectType::Exasol
24311 | DialectType::DataFusion
24312 );
24313
24314 // Compute the implied nulls_first for source
24315 let source_nulls_first = if is_source_nulls_large {
24316 !is_asc // ASC -> NULLS LAST (false), DESC -> NULLS FIRST (true)
24317 } else if is_source_nulls_last {
24318 false // NULLS LAST always
24319 } else {
24320 is_asc // nulls_are_small: ASC -> NULLS FIRST (true), DESC -> NULLS LAST (false)
24321 };
24322
24323 // Compute the target's default
24324 let target_nulls_first = if is_target_nulls_large {
24325 !is_asc
24326 } else if is_target_nulls_last {
24327 false
24328 } else {
24329 is_asc
24330 };
24331
24332 // Only add explicit nulls ordering if source and target defaults differ
24333 if source_nulls_first != target_nulls_first {
24334 o.nulls_first = Some(source_nulls_first);
24335 }
24336 // If they match, leave nulls_first as None so the generator won't output it
24337
24338 Ok(Expression::Ordered(o))
24339 } else {
24340 Ok(e)
24341 }
24342 }
24343 Action::StringAggConvert => {
24344 match e {
24345 Expression::WithinGroup(wg) => {
24346 // STRING_AGG(x, sep) WITHIN GROUP (ORDER BY z) -> target-specific
24347 // Extract args and distinct flag from either Function, AggregateFunction, or StringAgg
24348 let (x_opt, sep_opt, distinct) = match wg.this {
24349 Expression::AggregateFunction(ref af)
24350 if af.name.eq_ignore_ascii_case("STRING_AGG")
24351 && af.args.len() >= 2 =>
24352 {
24353 (
24354 Some(af.args[0].clone()),
24355 Some(af.args[1].clone()),
24356 af.distinct,
24357 )
24358 }
24359 Expression::Function(ref f)
24360 if f.name.eq_ignore_ascii_case("STRING_AGG")
24361 && f.args.len() >= 2 =>
24362 {
24363 (Some(f.args[0].clone()), Some(f.args[1].clone()), false)
24364 }
24365 Expression::StringAgg(ref sa) => {
24366 (Some(sa.this.clone()), sa.separator.clone(), sa.distinct)
24367 }
24368 _ => (None, None, false),
24369 };
24370 if let (Some(x), Some(sep)) = (x_opt, sep_opt) {
24371 let order_by = wg.order_by;
24372
24373 match target {
24374 DialectType::TSQL | DialectType::Fabric => {
24375 // Keep as WithinGroup(StringAgg) for TSQL
24376 Ok(Expression::WithinGroup(Box::new(
24377 crate::expressions::WithinGroup {
24378 this: Expression::StringAgg(Box::new(
24379 crate::expressions::StringAggFunc {
24380 this: x,
24381 separator: Some(sep),
24382 order_by: None, // order_by goes in WithinGroup, not StringAgg
24383 distinct,
24384 filter: None,
24385 limit: None,
24386 inferred_type: None,
24387 },
24388 )),
24389 order_by,
24390 },
24391 )))
24392 }
24393 DialectType::MySQL
24394 | DialectType::SingleStore
24395 | DialectType::Doris
24396 | DialectType::StarRocks => {
24397 // GROUP_CONCAT(x ORDER BY z SEPARATOR sep)
24398 Ok(Expression::GroupConcat(Box::new(
24399 crate::expressions::GroupConcatFunc {
24400 this: x,
24401 separator: Some(sep),
24402 order_by: Some(order_by),
24403 distinct,
24404 filter: None,
24405 limit: None,
24406 inferred_type: None,
24407 },
24408 )))
24409 }
24410 DialectType::SQLite => {
24411 // GROUP_CONCAT(x, sep) - no ORDER BY support
24412 Ok(Expression::GroupConcat(Box::new(
24413 crate::expressions::GroupConcatFunc {
24414 this: x,
24415 separator: Some(sep),
24416 order_by: None,
24417 distinct,
24418 filter: None,
24419 limit: None,
24420 inferred_type: None,
24421 },
24422 )))
24423 }
24424 DialectType::PostgreSQL | DialectType::Redshift => {
24425 // STRING_AGG(x, sep ORDER BY z)
24426 Ok(Expression::StringAgg(Box::new(
24427 crate::expressions::StringAggFunc {
24428 this: x,
24429 separator: Some(sep),
24430 order_by: Some(order_by),
24431 distinct,
24432 filter: None,
24433 limit: None,
24434 inferred_type: None,
24435 },
24436 )))
24437 }
24438 _ => {
24439 // Default: keep as STRING_AGG(x, sep) with ORDER BY inside
24440 Ok(Expression::StringAgg(Box::new(
24441 crate::expressions::StringAggFunc {
24442 this: x,
24443 separator: Some(sep),
24444 order_by: Some(order_by),
24445 distinct,
24446 filter: None,
24447 limit: None,
24448 inferred_type: None,
24449 },
24450 )))
24451 }
24452 }
24453 } else {
24454 Ok(Expression::WithinGroup(wg))
24455 }
24456 }
24457 Expression::StringAgg(sa) => {
24458 match target {
24459 DialectType::MySQL
24460 | DialectType::SingleStore
24461 | DialectType::Doris
24462 | DialectType::StarRocks => {
24463 // STRING_AGG(x, sep) -> GROUP_CONCAT(x SEPARATOR sep)
24464 Ok(Expression::GroupConcat(Box::new(
24465 crate::expressions::GroupConcatFunc {
24466 this: sa.this,
24467 separator: sa.separator,
24468 order_by: sa.order_by,
24469 distinct: sa.distinct,
24470 filter: sa.filter,
24471 limit: None,
24472 inferred_type: None,
24473 },
24474 )))
24475 }
24476 DialectType::SQLite => {
24477 // STRING_AGG(x, sep) -> GROUP_CONCAT(x, sep)
24478 Ok(Expression::GroupConcat(Box::new(
24479 crate::expressions::GroupConcatFunc {
24480 this: sa.this,
24481 separator: sa.separator,
24482 order_by: None, // SQLite doesn't support ORDER BY in GROUP_CONCAT
24483 distinct: sa.distinct,
24484 filter: sa.filter,
24485 limit: None,
24486 inferred_type: None,
24487 },
24488 )))
24489 }
24490 DialectType::Spark | DialectType::Databricks => {
24491 // STRING_AGG(x, sep) -> LISTAGG(x, sep)
24492 Ok(Expression::ListAgg(Box::new(
24493 crate::expressions::ListAggFunc {
24494 this: sa.this,
24495 separator: sa.separator,
24496 on_overflow: None,
24497 order_by: sa.order_by,
24498 distinct: sa.distinct,
24499 filter: None,
24500 inferred_type: None,
24501 },
24502 )))
24503 }
24504 _ => Ok(Expression::StringAgg(sa)),
24505 }
24506 }
24507 _ => Ok(e),
24508 }
24509 }
24510 Action::GroupConcatConvert => {
24511 // Helper to expand CONCAT(a, b, c) -> a || b || c (for PostgreSQL/SQLite)
24512 // or CONCAT(a, b, c) -> a + b + c (for TSQL)
24513 fn expand_concat_to_dpipe(expr: Expression) -> Expression {
24514 if let Expression::Function(ref f) = expr {
24515 if f.name.eq_ignore_ascii_case("CONCAT") && f.args.len() > 1 {
24516 let mut result = f.args[0].clone();
24517 for arg in &f.args[1..] {
24518 result = Expression::Concat(Box::new(BinaryOp {
24519 left: result,
24520 right: arg.clone(),
24521 left_comments: vec![],
24522 operator_comments: vec![],
24523 trailing_comments: vec![],
24524 inferred_type: None,
24525 }));
24526 }
24527 return result;
24528 }
24529 }
24530 expr
24531 }
24532 fn expand_concat_to_plus(expr: Expression) -> Expression {
24533 if let Expression::Function(ref f) = expr {
24534 if f.name.eq_ignore_ascii_case("CONCAT") && f.args.len() > 1 {
24535 let mut result = f.args[0].clone();
24536 for arg in &f.args[1..] {
24537 result = Expression::Add(Box::new(BinaryOp {
24538 left: result,
24539 right: arg.clone(),
24540 left_comments: vec![],
24541 operator_comments: vec![],
24542 trailing_comments: vec![],
24543 inferred_type: None,
24544 }));
24545 }
24546 return result;
24547 }
24548 }
24549 expr
24550 }
24551 // Helper to wrap each arg in CAST(arg AS VARCHAR) for Presto/Trino CONCAT
24552 fn wrap_concat_args_in_varchar_cast(expr: Expression) -> Expression {
24553 if let Expression::Function(ref f) = expr {
24554 if f.name.eq_ignore_ascii_case("CONCAT") && f.args.len() > 1 {
24555 let new_args: Vec<Expression> = f
24556 .args
24557 .iter()
24558 .map(|arg| {
24559 Expression::Cast(Box::new(crate::expressions::Cast {
24560 this: arg.clone(),
24561 to: crate::expressions::DataType::VarChar {
24562 length: None,
24563 parenthesized_length: false,
24564 },
24565 trailing_comments: Vec::new(),
24566 double_colon_syntax: false,
24567 format: None,
24568 default: None,
24569 inferred_type: None,
24570 }))
24571 })
24572 .collect();
24573 return Expression::Function(Box::new(
24574 crate::expressions::Function::new(
24575 "CONCAT".to_string(),
24576 new_args,
24577 ),
24578 ));
24579 }
24580 }
24581 expr
24582 }
24583 if let Expression::GroupConcat(gc) = e {
24584 match target {
24585 DialectType::Presto => {
24586 // GROUP_CONCAT(x [, sep]) -> ARRAY_JOIN(ARRAY_AGG(x), sep)
24587 let sep = gc.separator.unwrap_or(Expression::string(","));
24588 // For multi-arg CONCAT, wrap each arg in CAST(... AS VARCHAR)
24589 let this = wrap_concat_args_in_varchar_cast(gc.this);
24590 let array_agg =
24591 Expression::ArrayAgg(Box::new(crate::expressions::AggFunc {
24592 this,
24593 distinct: gc.distinct,
24594 filter: gc.filter,
24595 order_by: gc.order_by.unwrap_or_default(),
24596 name: None,
24597 ignore_nulls: None,
24598 having_max: None,
24599 limit: None,
24600 inferred_type: None,
24601 }));
24602 Ok(Expression::ArrayJoin(Box::new(
24603 crate::expressions::ArrayJoinFunc {
24604 this: array_agg,
24605 separator: sep,
24606 null_replacement: None,
24607 },
24608 )))
24609 }
24610 DialectType::Trino => {
24611 // GROUP_CONCAT(x [, sep]) -> LISTAGG(x, sep)
24612 let sep = gc.separator.unwrap_or(Expression::string(","));
24613 // For multi-arg CONCAT, wrap each arg in CAST(... AS VARCHAR)
24614 let this = wrap_concat_args_in_varchar_cast(gc.this);
24615 Ok(Expression::ListAgg(Box::new(
24616 crate::expressions::ListAggFunc {
24617 this,
24618 separator: Some(sep),
24619 on_overflow: None,
24620 order_by: gc.order_by,
24621 distinct: gc.distinct,
24622 filter: gc.filter,
24623 inferred_type: None,
24624 },
24625 )))
24626 }
24627 DialectType::PostgreSQL
24628 | DialectType::Redshift
24629 | DialectType::Snowflake
24630 | DialectType::DuckDB
24631 | DialectType::Hive
24632 | DialectType::ClickHouse => {
24633 // GROUP_CONCAT(x [, sep]) -> STRING_AGG(x, sep)
24634 let sep = gc.separator.unwrap_or(Expression::string(","));
24635 // Expand CONCAT(a,b,c) -> a || b || c for || dialects
24636 let this = expand_concat_to_dpipe(gc.this);
24637 // For PostgreSQL, add NULLS LAST for DESC / NULLS FIRST for ASC
24638 let order_by = if target == DialectType::PostgreSQL {
24639 gc.order_by.map(|ords| {
24640 ords.into_iter()
24641 .map(|mut o| {
24642 if o.nulls_first.is_none() {
24643 if o.desc {
24644 o.nulls_first = Some(false);
24645 // NULLS LAST
24646 } else {
24647 o.nulls_first = Some(true);
24648 // NULLS FIRST
24649 }
24650 }
24651 o
24652 })
24653 .collect()
24654 })
24655 } else {
24656 gc.order_by
24657 };
24658 Ok(Expression::StringAgg(Box::new(
24659 crate::expressions::StringAggFunc {
24660 this,
24661 separator: Some(sep),
24662 order_by,
24663 distinct: gc.distinct,
24664 filter: gc.filter,
24665 limit: None,
24666 inferred_type: None,
24667 },
24668 )))
24669 }
24670 DialectType::TSQL => {
24671 // GROUP_CONCAT(x [, sep]) -> STRING_AGG(x, sep) WITHIN GROUP (ORDER BY ...)
24672 // TSQL doesn't support DISTINCT in STRING_AGG
24673 let sep = gc.separator.unwrap_or(Expression::string(","));
24674 // Expand CONCAT(a,b,c) -> a + b + c for TSQL
24675 let this = expand_concat_to_plus(gc.this);
24676 Ok(Expression::StringAgg(Box::new(
24677 crate::expressions::StringAggFunc {
24678 this,
24679 separator: Some(sep),
24680 order_by: gc.order_by,
24681 distinct: false, // TSQL doesn't support DISTINCT in STRING_AGG
24682 filter: gc.filter,
24683 limit: None,
24684 inferred_type: None,
24685 },
24686 )))
24687 }
24688 DialectType::SQLite => {
24689 // GROUP_CONCAT stays as GROUP_CONCAT but ORDER BY is removed
24690 // SQLite GROUP_CONCAT doesn't support ORDER BY
24691 // Expand CONCAT(a,b,c) -> a || b || c
24692 let this = expand_concat_to_dpipe(gc.this);
24693 Ok(Expression::GroupConcat(Box::new(
24694 crate::expressions::GroupConcatFunc {
24695 this,
24696 separator: gc.separator,
24697 order_by: None, // SQLite doesn't support ORDER BY in GROUP_CONCAT
24698 distinct: gc.distinct,
24699 filter: gc.filter,
24700 limit: None,
24701 inferred_type: None,
24702 },
24703 )))
24704 }
24705 DialectType::Spark | DialectType::Databricks => {
24706 // GROUP_CONCAT(x [, sep]) -> LISTAGG(x, sep)
24707 let sep = gc.separator.unwrap_or(Expression::string(","));
24708 Ok(Expression::ListAgg(Box::new(
24709 crate::expressions::ListAggFunc {
24710 this: gc.this,
24711 separator: Some(sep),
24712 on_overflow: None,
24713 order_by: gc.order_by,
24714 distinct: gc.distinct,
24715 filter: None,
24716 inferred_type: None,
24717 },
24718 )))
24719 }
24720 DialectType::MySQL
24721 | DialectType::SingleStore
24722 | DialectType::StarRocks => {
24723 // MySQL GROUP_CONCAT should have explicit SEPARATOR (default ',')
24724 if gc.separator.is_none() {
24725 let mut gc = gc;
24726 gc.separator = Some(Expression::string(","));
24727 Ok(Expression::GroupConcat(gc))
24728 } else {
24729 Ok(Expression::GroupConcat(gc))
24730 }
24731 }
24732 _ => Ok(Expression::GroupConcat(gc)),
24733 }
24734 } else {
24735 Ok(e)
24736 }
24737 }
24738 Action::TempTableHash => {
24739 match e {
24740 Expression::CreateTable(mut ct) => {
24741 // TSQL #table -> TEMPORARY TABLE with # stripped from name
24742 let name = &ct.name.name.name;
24743 if name.starts_with('#') {
24744 ct.name.name.name = name.trim_start_matches('#').to_string();
24745 }
24746 // Set temporary flag
24747 ct.temporary = true;
24748 Ok(Expression::CreateTable(ct))
24749 }
24750 Expression::Table(mut tr) => {
24751 // Strip # from table references
24752 let name = &tr.name.name;
24753 if name.starts_with('#') {
24754 tr.name.name = name.trim_start_matches('#').to_string();
24755 }
24756 Ok(Expression::Table(tr))
24757 }
24758 Expression::DropTable(mut dt) => {
24759 // Strip # from DROP TABLE names
24760 for table_ref in &mut dt.names {
24761 if table_ref.name.name.starts_with('#') {
24762 table_ref.name.name =
24763 table_ref.name.name.trim_start_matches('#').to_string();
24764 }
24765 }
24766 Ok(Expression::DropTable(dt))
24767 }
24768 _ => Ok(e),
24769 }
24770 }
24771 Action::NvlClearOriginal => {
24772 if let Expression::Nvl(mut f) = e {
24773 f.original_name = None;
24774 Ok(Expression::Nvl(f))
24775 } else {
24776 Ok(e)
24777 }
24778 }
24779 Action::HiveCastToTryCast => {
24780 // Convert Hive/Spark CAST to TRY_CAST for targets that support it
24781 if let Expression::Cast(mut c) = e {
24782 // For Spark/Hive -> DuckDB: TIMESTAMP -> TIMESTAMPTZ
24783 // (Spark's TIMESTAMP is always timezone-aware)
24784 if matches!(target, DialectType::DuckDB)
24785 && matches!(source, DialectType::Spark | DialectType::Databricks)
24786 && matches!(
24787 c.to,
24788 DataType::Timestamp {
24789 timezone: false,
24790 ..
24791 }
24792 )
24793 {
24794 c.to = DataType::Custom {
24795 name: "TIMESTAMPTZ".to_string(),
24796 };
24797 }
24798 // For Spark source -> Databricks: VARCHAR/CHAR -> STRING
24799 // Spark parses VARCHAR(n)/CHAR(n) as TEXT, normalize to STRING
24800 if matches!(target, DialectType::Databricks | DialectType::Spark)
24801 && matches!(
24802 source,
24803 DialectType::Spark | DialectType::Databricks | DialectType::Hive
24804 )
24805 && Self::has_varchar_char_type(&c.to)
24806 {
24807 c.to = Self::normalize_varchar_to_string(c.to);
24808 }
24809 Ok(Expression::TryCast(c))
24810 } else {
24811 Ok(e)
24812 }
24813 }
24814 Action::XorExpand => {
24815 // Expand XOR to (a AND NOT b) OR (NOT a AND b) for dialects without XOR keyword
24816 // Snowflake: use BOOLXOR(a, b) instead
24817 if let Expression::Xor(xor) = e {
24818 // Collect all XOR operands
24819 let mut operands = Vec::new();
24820 if let Some(this) = xor.this {
24821 operands.push(*this);
24822 }
24823 if let Some(expr) = xor.expression {
24824 operands.push(*expr);
24825 }
24826 operands.extend(xor.expressions);
24827
24828 // Snowflake: use BOOLXOR(a, b)
24829 if matches!(target, DialectType::Snowflake) && operands.len() == 2 {
24830 let a = operands.remove(0);
24831 let b = operands.remove(0);
24832 return Ok(Expression::Function(Box::new(Function::new(
24833 "BOOLXOR".to_string(),
24834 vec![a, b],
24835 ))));
24836 }
24837
24838 // Helper to build (a AND NOT b) OR (NOT a AND b)
24839 let make_xor = |a: Expression, b: Expression| -> Expression {
24840 let not_b = Expression::Not(Box::new(
24841 crate::expressions::UnaryOp::new(b.clone()),
24842 ));
24843 let not_a = Expression::Not(Box::new(
24844 crate::expressions::UnaryOp::new(a.clone()),
24845 ));
24846 let left_and = Expression::And(Box::new(BinaryOp {
24847 left: a,
24848 right: Expression::Paren(Box::new(Paren {
24849 this: not_b,
24850 trailing_comments: Vec::new(),
24851 })),
24852 left_comments: Vec::new(),
24853 operator_comments: Vec::new(),
24854 trailing_comments: Vec::new(),
24855 inferred_type: None,
24856 }));
24857 let right_and = Expression::And(Box::new(BinaryOp {
24858 left: Expression::Paren(Box::new(Paren {
24859 this: not_a,
24860 trailing_comments: Vec::new(),
24861 })),
24862 right: b,
24863 left_comments: Vec::new(),
24864 operator_comments: Vec::new(),
24865 trailing_comments: Vec::new(),
24866 inferred_type: None,
24867 }));
24868 Expression::Or(Box::new(BinaryOp {
24869 left: Expression::Paren(Box::new(Paren {
24870 this: left_and,
24871 trailing_comments: Vec::new(),
24872 })),
24873 right: Expression::Paren(Box::new(Paren {
24874 this: right_and,
24875 trailing_comments: Vec::new(),
24876 })),
24877 left_comments: Vec::new(),
24878 operator_comments: Vec::new(),
24879 trailing_comments: Vec::new(),
24880 inferred_type: None,
24881 }))
24882 };
24883
24884 if operands.len() >= 2 {
24885 let mut result = make_xor(operands.remove(0), operands.remove(0));
24886 for operand in operands {
24887 result = make_xor(result, operand);
24888 }
24889 Ok(result)
24890 } else if operands.len() == 1 {
24891 Ok(operands.remove(0))
24892 } else {
24893 // No operands - return FALSE (shouldn't happen)
24894 Ok(Expression::Boolean(crate::expressions::BooleanLiteral {
24895 value: false,
24896 }))
24897 }
24898 } else {
24899 Ok(e)
24900 }
24901 }
24902 Action::DatePartUnquote => {
24903 // DATE_PART('month', x) -> DATE_PART(month, x) for Snowflake target
24904 // Convert the quoted string first arg to a bare Column/Identifier
24905 if let Expression::Function(mut f) = e {
24906 if let Some(Expression::Literal(lit)) = f.args.first() {
24907 if let crate::expressions::Literal::String(s) = lit.as_ref() {
24908 let bare_name = s.to_ascii_lowercase();
24909 f.args[0] =
24910 Expression::Column(Box::new(crate::expressions::Column {
24911 name: Identifier::new(bare_name),
24912 table: None,
24913 join_mark: false,
24914 trailing_comments: Vec::new(),
24915 span: None,
24916 inferred_type: None,
24917 }));
24918 }
24919 }
24920 Ok(Expression::Function(f))
24921 } else {
24922 Ok(e)
24923 }
24924 }
24925 Action::ArrayLengthConvert => {
24926 // Extract the argument from the expression
24927 let arg = match e {
24928 Expression::Cardinality(ref f) => f.this.clone(),
24929 Expression::ArrayLength(ref f) => f.this.clone(),
24930 Expression::ArraySize(ref f) => f.this.clone(),
24931 _ => return Ok(e),
24932 };
24933 match target {
24934 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
24935 Ok(Expression::Function(Box::new(Function::new(
24936 "SIZE".to_string(),
24937 vec![arg],
24938 ))))
24939 }
24940 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
24941 Ok(Expression::Cardinality(Box::new(
24942 crate::expressions::UnaryFunc::new(arg),
24943 )))
24944 }
24945 DialectType::BigQuery => Ok(Expression::ArrayLength(Box::new(
24946 crate::expressions::UnaryFunc::new(arg),
24947 ))),
24948 DialectType::DuckDB => Ok(Expression::ArrayLength(Box::new(
24949 crate::expressions::UnaryFunc::new(arg),
24950 ))),
24951 DialectType::PostgreSQL | DialectType::Redshift => {
24952 // PostgreSQL ARRAY_LENGTH requires dimension arg
24953 Ok(Expression::Function(Box::new(Function::new(
24954 "ARRAY_LENGTH".to_string(),
24955 vec![arg, Expression::number(1)],
24956 ))))
24957 }
24958 DialectType::Snowflake => Ok(Expression::ArraySize(Box::new(
24959 crate::expressions::UnaryFunc::new(arg),
24960 ))),
24961 _ => Ok(e), // Keep original
24962 }
24963 }
24964
24965 Action::JsonExtractToArrow => {
24966 // JSON_EXTRACT(x, path) -> x -> path for SQLite/DuckDB (set arrow_syntax = true)
24967 if let Expression::JsonExtract(mut f) = e {
24968 f.arrow_syntax = true;
24969 // Transform path: convert bracket notation to dot notation
24970 // SQLite strips wildcards, DuckDB preserves them
24971 if let Expression::Literal(ref lit) = f.path {
24972 if let Literal::String(ref s) = lit.as_ref() {
24973 let mut transformed = s.clone();
24974 if matches!(target, DialectType::SQLite) {
24975 transformed = Self::strip_json_wildcards(&transformed);
24976 }
24977 transformed = Self::bracket_to_dot_notation(&transformed);
24978 if transformed != *s {
24979 f.path = Expression::string(&transformed);
24980 }
24981 }
24982 }
24983 Ok(Expression::JsonExtract(f))
24984 } else {
24985 Ok(e)
24986 }
24987 }
24988
24989 Action::JsonExtractToGetJsonObject => {
24990 if let Expression::JsonExtract(f) = e {
24991 if matches!(target, DialectType::PostgreSQL | DialectType::Redshift) {
24992 // JSON_EXTRACT(x, '$.key') -> JSON_EXTRACT_PATH(x, 'key') for PostgreSQL
24993 // Use proper decomposition that handles brackets
24994 let keys: Vec<Expression> = if let Expression::Literal(lit) = f.path {
24995 if let Literal::String(ref s) = lit.as_ref() {
24996 let parts = Self::decompose_json_path(s);
24997 parts.into_iter().map(|k| Expression::string(&k)).collect()
24998 } else {
24999 vec![]
25000 }
25001 } else {
25002 vec![f.path]
25003 };
25004 let func_name = if matches!(target, DialectType::Redshift) {
25005 "JSON_EXTRACT_PATH_TEXT"
25006 } else {
25007 "JSON_EXTRACT_PATH"
25008 };
25009 let mut args = vec![f.this];
25010 args.extend(keys);
25011 Ok(Expression::Function(Box::new(Function::new(
25012 func_name.to_string(),
25013 args,
25014 ))))
25015 } else {
25016 // GET_JSON_OBJECT(x, '$.path') for Hive/Spark
25017 // Convert bracket double quotes to single quotes
25018 let path = if let Expression::Literal(ref lit) = f.path {
25019 if let Literal::String(ref s) = lit.as_ref() {
25020 let normalized = Self::bracket_to_single_quotes(s);
25021 if normalized != *s {
25022 Expression::string(&normalized)
25023 } else {
25024 f.path.clone()
25025 }
25026 } else {
25027 f.path.clone()
25028 }
25029 } else {
25030 f.path.clone()
25031 };
25032 Ok(Expression::Function(Box::new(Function::new(
25033 "GET_JSON_OBJECT".to_string(),
25034 vec![f.this, path],
25035 ))))
25036 }
25037 } else {
25038 Ok(e)
25039 }
25040 }
25041
25042 Action::JsonExtractScalarToGetJsonObject => {
25043 // JSON_EXTRACT_SCALAR(x, '$.path') -> GET_JSON_OBJECT(x, '$.path') for Hive/Spark
25044 if let Expression::JsonExtractScalar(f) = e {
25045 Ok(Expression::Function(Box::new(Function::new(
25046 "GET_JSON_OBJECT".to_string(),
25047 vec![f.this, f.path],
25048 ))))
25049 } else {
25050 Ok(e)
25051 }
25052 }
25053
25054 Action::JsonExtractToTsql => {
25055 // JSON_EXTRACT/JSON_EXTRACT_SCALAR -> ISNULL(JSON_QUERY(x, path), JSON_VALUE(x, path)) for TSQL
25056 let (this, path) = match e {
25057 Expression::JsonExtract(f) => (f.this, f.path),
25058 Expression::JsonExtractScalar(f) => (f.this, f.path),
25059 _ => return Ok(e),
25060 };
25061 // Transform path: strip wildcards, convert bracket notation to dot notation
25062 let transformed_path = if let Expression::Literal(ref lit) = path {
25063 if let Literal::String(ref s) = lit.as_ref() {
25064 let stripped = Self::strip_json_wildcards(s);
25065 let dotted = Self::bracket_to_dot_notation(&stripped);
25066 Expression::string(&dotted)
25067 } else {
25068 path.clone()
25069 }
25070 } else {
25071 path
25072 };
25073 let json_query = Expression::Function(Box::new(Function::new(
25074 "JSON_QUERY".to_string(),
25075 vec![this.clone(), transformed_path.clone()],
25076 )));
25077 let json_value = Expression::Function(Box::new(Function::new(
25078 "JSON_VALUE".to_string(),
25079 vec![this, transformed_path],
25080 )));
25081 Ok(Expression::Function(Box::new(Function::new(
25082 "ISNULL".to_string(),
25083 vec![json_query, json_value],
25084 ))))
25085 }
25086
25087 Action::JsonExtractToClickHouse => {
25088 // JSON_EXTRACT/JSON_EXTRACT_SCALAR -> JSONExtractString(x, 'key1', idx, 'key2') for ClickHouse
25089 let (this, path) = match e {
25090 Expression::JsonExtract(f) => (f.this, f.path),
25091 Expression::JsonExtractScalar(f) => (f.this, f.path),
25092 _ => return Ok(e),
25093 };
25094 let args: Vec<Expression> = if let Expression::Literal(lit) = path {
25095 if let Literal::String(ref s) = lit.as_ref() {
25096 let parts = Self::decompose_json_path(s);
25097 let mut result = vec![this];
25098 for part in parts {
25099 // ClickHouse uses 1-based integer indices for array access
25100 if let Ok(idx) = part.parse::<i64>() {
25101 result.push(Expression::number(idx + 1));
25102 } else {
25103 result.push(Expression::string(&part));
25104 }
25105 }
25106 result
25107 } else {
25108 vec![]
25109 }
25110 } else {
25111 vec![this, path]
25112 };
25113 Ok(Expression::Function(Box::new(Function::new(
25114 "JSONExtractString".to_string(),
25115 args,
25116 ))))
25117 }
25118
25119 Action::JsonExtractScalarConvert => {
25120 // JSON_EXTRACT_SCALAR -> target-specific
25121 if let Expression::JsonExtractScalar(f) = e {
25122 match target {
25123 DialectType::PostgreSQL | DialectType::Redshift => {
25124 // JSON_EXTRACT_SCALAR(x, '$.path') -> JSON_EXTRACT_PATH_TEXT(x, 'key1', 'key2')
25125 let keys: Vec<Expression> = if let Expression::Literal(lit) = f.path
25126 {
25127 if let Literal::String(ref s) = lit.as_ref() {
25128 let parts = Self::decompose_json_path(s);
25129 parts.into_iter().map(|k| Expression::string(&k)).collect()
25130 } else {
25131 vec![]
25132 }
25133 } else {
25134 vec![f.path]
25135 };
25136 let mut args = vec![f.this];
25137 args.extend(keys);
25138 Ok(Expression::Function(Box::new(Function::new(
25139 "JSON_EXTRACT_PATH_TEXT".to_string(),
25140 args,
25141 ))))
25142 }
25143 DialectType::Snowflake => {
25144 // JSON_EXTRACT_SCALAR(x, '$.path') -> JSON_EXTRACT_PATH_TEXT(x, 'stripped_path')
25145 let stripped_path = if let Expression::Literal(ref lit) = f.path {
25146 if let Literal::String(ref s) = lit.as_ref() {
25147 let stripped = Self::strip_json_dollar_prefix(s);
25148 Expression::string(&stripped)
25149 } else {
25150 f.path.clone()
25151 }
25152 } else {
25153 f.path
25154 };
25155 Ok(Expression::Function(Box::new(Function::new(
25156 "JSON_EXTRACT_PATH_TEXT".to_string(),
25157 vec![f.this, stripped_path],
25158 ))))
25159 }
25160 DialectType::SQLite | DialectType::DuckDB => {
25161 // JSON_EXTRACT_SCALAR(x, '$.path') -> x ->> '$.path'
25162 Ok(Expression::JsonExtractScalar(Box::new(
25163 crate::expressions::JsonExtractFunc {
25164 this: f.this,
25165 path: f.path,
25166 returning: f.returning,
25167 arrow_syntax: true,
25168 hash_arrow_syntax: false,
25169 wrapper_option: None,
25170 quotes_option: None,
25171 on_scalar_string: false,
25172 on_error: None,
25173 },
25174 )))
25175 }
25176 _ => Ok(Expression::JsonExtractScalar(f)),
25177 }
25178 } else {
25179 Ok(e)
25180 }
25181 }
25182
25183 Action::JsonPathNormalize => {
25184 // Normalize JSON path format for BigQuery, MySQL, etc.
25185 if let Expression::JsonExtract(mut f) = e {
25186 if let Expression::Literal(ref lit) = f.path {
25187 if let Literal::String(ref s) = lit.as_ref() {
25188 let mut normalized = s.clone();
25189 // Convert bracket notation and handle wildcards per dialect
25190 match target {
25191 DialectType::BigQuery => {
25192 // BigQuery strips wildcards and uses single quotes in brackets
25193 normalized = Self::strip_json_wildcards(&normalized);
25194 normalized = Self::bracket_to_single_quotes(&normalized);
25195 }
25196 DialectType::MySQL => {
25197 // MySQL preserves wildcards, converts brackets to dot notation
25198 normalized = Self::bracket_to_dot_notation(&normalized);
25199 }
25200 _ => {}
25201 }
25202 if normalized != *s {
25203 f.path = Expression::string(&normalized);
25204 }
25205 }
25206 }
25207 Ok(Expression::JsonExtract(f))
25208 } else {
25209 Ok(e)
25210 }
25211 }
25212
25213 Action::JsonQueryValueConvert => {
25214 // JsonQuery/JsonValue -> target-specific
25215 let (f, is_query) = match e {
25216 Expression::JsonQuery(f) => (f, true),
25217 Expression::JsonValue(f) => (f, false),
25218 _ => return Ok(e),
25219 };
25220 match target {
25221 DialectType::TSQL | DialectType::Fabric => {
25222 // ISNULL(JSON_QUERY(...), JSON_VALUE(...))
25223 let json_query = Expression::Function(Box::new(Function::new(
25224 "JSON_QUERY".to_string(),
25225 vec![f.this.clone(), f.path.clone()],
25226 )));
25227 let json_value = Expression::Function(Box::new(Function::new(
25228 "JSON_VALUE".to_string(),
25229 vec![f.this, f.path],
25230 )));
25231 Ok(Expression::Function(Box::new(Function::new(
25232 "ISNULL".to_string(),
25233 vec![json_query, json_value],
25234 ))))
25235 }
25236 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
25237 Ok(Expression::Function(Box::new(Function::new(
25238 "GET_JSON_OBJECT".to_string(),
25239 vec![f.this, f.path],
25240 ))))
25241 }
25242 DialectType::PostgreSQL | DialectType::Redshift => {
25243 Ok(Expression::Function(Box::new(Function::new(
25244 "JSON_EXTRACT_PATH_TEXT".to_string(),
25245 vec![f.this, f.path],
25246 ))))
25247 }
25248 DialectType::DuckDB | DialectType::SQLite => {
25249 // json -> path arrow syntax
25250 Ok(Expression::JsonExtract(Box::new(
25251 crate::expressions::JsonExtractFunc {
25252 this: f.this,
25253 path: f.path,
25254 returning: f.returning,
25255 arrow_syntax: true,
25256 hash_arrow_syntax: false,
25257 wrapper_option: f.wrapper_option,
25258 quotes_option: f.quotes_option,
25259 on_scalar_string: f.on_scalar_string,
25260 on_error: f.on_error,
25261 },
25262 )))
25263 }
25264 DialectType::Snowflake => {
25265 // GET_PATH(PARSE_JSON(json), 'path')
25266 // Strip $. prefix from path
25267 // Only wrap in PARSE_JSON if not already a PARSE_JSON call or ParseJson expression
25268 let json_expr = match &f.this {
25269 Expression::Function(ref inner_f)
25270 if inner_f.name.eq_ignore_ascii_case("PARSE_JSON") =>
25271 {
25272 f.this
25273 }
25274 Expression::ParseJson(_) => {
25275 // Already a ParseJson expression, which generates as PARSE_JSON(...)
25276 f.this
25277 }
25278 _ => Expression::Function(Box::new(Function::new(
25279 "PARSE_JSON".to_string(),
25280 vec![f.this],
25281 ))),
25282 };
25283 let path_str = match &f.path {
25284 Expression::Literal(lit)
25285 if matches!(lit.as_ref(), Literal::String(_)) =>
25286 {
25287 let Literal::String(s) = lit.as_ref() else {
25288 unreachable!()
25289 };
25290 let stripped = s.strip_prefix("$.").unwrap_or(s);
25291 Expression::Literal(Box::new(Literal::String(
25292 stripped.to_string(),
25293 )))
25294 }
25295 other => other.clone(),
25296 };
25297 Ok(Expression::Function(Box::new(Function::new(
25298 "GET_PATH".to_string(),
25299 vec![json_expr, path_str],
25300 ))))
25301 }
25302 _ => {
25303 // Default: keep as JSON_QUERY/JSON_VALUE function
25304 let func_name = if is_query { "JSON_QUERY" } else { "JSON_VALUE" };
25305 Ok(Expression::Function(Box::new(Function::new(
25306 func_name.to_string(),
25307 vec![f.this, f.path],
25308 ))))
25309 }
25310 }
25311 }
25312
25313 Action::JsonLiteralToJsonParse => {
25314 // CAST('x' AS JSON) -> JSON_PARSE('x') for Presto, PARSE_JSON for Snowflake
25315 // Also DuckDB CAST(x AS JSON) -> JSON_PARSE(x) for Trino/Presto/Athena
25316 if let Expression::Cast(c) = e {
25317 let func_name = if matches!(target, DialectType::Snowflake) {
25318 "PARSE_JSON"
25319 } else {
25320 "JSON_PARSE"
25321 };
25322 Ok(Expression::Function(Box::new(Function::new(
25323 func_name.to_string(),
25324 vec![c.this],
25325 ))))
25326 } else {
25327 Ok(e)
25328 }
25329 }
25330
25331 Action::DuckDBCastJsonToVariant => {
25332 if let Expression::Cast(c) = e {
25333 Ok(Expression::Cast(Box::new(Cast {
25334 this: c.this,
25335 to: DataType::Custom {
25336 name: "VARIANT".to_string(),
25337 },
25338 trailing_comments: c.trailing_comments,
25339 double_colon_syntax: false,
25340 format: None,
25341 default: None,
25342 inferred_type: None,
25343 })))
25344 } else {
25345 Ok(e)
25346 }
25347 }
25348
25349 Action::DuckDBTryCastJsonToTryJsonParse => {
25350 // DuckDB TRY_CAST(x AS JSON) -> TRY(JSON_PARSE(x)) for Trino/Presto/Athena
25351 if let Expression::TryCast(c) = e {
25352 let json_parse = Expression::Function(Box::new(Function::new(
25353 "JSON_PARSE".to_string(),
25354 vec![c.this],
25355 )));
25356 Ok(Expression::Function(Box::new(Function::new(
25357 "TRY".to_string(),
25358 vec![json_parse],
25359 ))))
25360 } else {
25361 Ok(e)
25362 }
25363 }
25364
25365 Action::DuckDBJsonFuncToJsonParse => {
25366 // DuckDB json(x) -> JSON_PARSE(x) for Trino/Presto/Athena
25367 if let Expression::Function(f) = e {
25368 let args = f.args;
25369 Ok(Expression::Function(Box::new(Function::new(
25370 "JSON_PARSE".to_string(),
25371 args,
25372 ))))
25373 } else {
25374 Ok(e)
25375 }
25376 }
25377
25378 Action::DuckDBJsonValidToIsJson => {
25379 // DuckDB json_valid(x) -> x IS JSON (SQL:2016 predicate) for Trino/Presto/Athena
25380 if let Expression::Function(mut f) = e {
25381 let arg = f.args.remove(0);
25382 Ok(Expression::IsJson(Box::new(crate::expressions::IsJson {
25383 this: arg,
25384 json_type: None,
25385 unique_keys: None,
25386 negated: false,
25387 })))
25388 } else {
25389 Ok(e)
25390 }
25391 }
25392
25393 Action::AtTimeZoneConvert => {
25394 // AT TIME ZONE -> target-specific conversion
25395 if let Expression::AtTimeZone(atz) = e {
25396 match target {
25397 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
25398 Ok(Expression::Function(Box::new(Function::new(
25399 "AT_TIMEZONE".to_string(),
25400 vec![atz.this, atz.zone],
25401 ))))
25402 }
25403 DialectType::Spark | DialectType::Databricks => {
25404 Ok(Expression::Function(Box::new(Function::new(
25405 "FROM_UTC_TIMESTAMP".to_string(),
25406 vec![atz.this, atz.zone],
25407 ))))
25408 }
25409 DialectType::Snowflake => {
25410 // CONVERT_TIMEZONE('zone', expr)
25411 Ok(Expression::Function(Box::new(Function::new(
25412 "CONVERT_TIMEZONE".to_string(),
25413 vec![atz.zone, atz.this],
25414 ))))
25415 }
25416 DialectType::BigQuery => {
25417 // TIMESTAMP(DATETIME(expr, 'zone'))
25418 let datetime_call = Expression::Function(Box::new(Function::new(
25419 "DATETIME".to_string(),
25420 vec![atz.this, atz.zone],
25421 )));
25422 Ok(Expression::Function(Box::new(Function::new(
25423 "TIMESTAMP".to_string(),
25424 vec![datetime_call],
25425 ))))
25426 }
25427 _ => Ok(Expression::Function(Box::new(Function::new(
25428 "AT_TIMEZONE".to_string(),
25429 vec![atz.this, atz.zone],
25430 )))),
25431 }
25432 } else {
25433 Ok(e)
25434 }
25435 }
25436
25437 Action::DayOfWeekConvert => {
25438 // DAY_OF_WEEK -> ISODOW for DuckDB, ((DAYOFWEEK(x) % 7) + 1) for Spark
25439 if let Expression::DayOfWeek(f) = e {
25440 match target {
25441 DialectType::DuckDB => Ok(Expression::Function(Box::new(
25442 Function::new("ISODOW".to_string(), vec![f.this]),
25443 ))),
25444 DialectType::Spark | DialectType::Databricks => {
25445 // ((DAYOFWEEK(x) % 7) + 1)
25446 let dayofweek = Expression::Function(Box::new(Function::new(
25447 "DAYOFWEEK".to_string(),
25448 vec![f.this],
25449 )));
25450 let modulo = Expression::Mod(Box::new(BinaryOp {
25451 left: dayofweek,
25452 right: Expression::number(7),
25453 left_comments: Vec::new(),
25454 operator_comments: Vec::new(),
25455 trailing_comments: Vec::new(),
25456 inferred_type: None,
25457 }));
25458 let paren_mod = Expression::Paren(Box::new(Paren {
25459 this: modulo,
25460 trailing_comments: Vec::new(),
25461 }));
25462 let add_one = Expression::Add(Box::new(BinaryOp {
25463 left: paren_mod,
25464 right: Expression::number(1),
25465 left_comments: Vec::new(),
25466 operator_comments: Vec::new(),
25467 trailing_comments: Vec::new(),
25468 inferred_type: None,
25469 }));
25470 Ok(Expression::Paren(Box::new(Paren {
25471 this: add_one,
25472 trailing_comments: Vec::new(),
25473 })))
25474 }
25475 _ => Ok(Expression::DayOfWeek(f)),
25476 }
25477 } else {
25478 Ok(e)
25479 }
25480 }
25481
25482 Action::MaxByMinByConvert => {
25483 // MAX_BY -> argMax for ClickHouse, drop 3rd arg for Spark
25484 // MIN_BY -> argMin for ClickHouse, ARG_MIN for DuckDB, drop 3rd arg for Spark/ClickHouse
25485 // Handle both Expression::Function and Expression::AggregateFunction
25486 let (is_max, args) = match &e {
25487 Expression::Function(f) => {
25488 (f.name.eq_ignore_ascii_case("MAX_BY"), f.args.clone())
25489 }
25490 Expression::AggregateFunction(af) => {
25491 (af.name.eq_ignore_ascii_case("MAX_BY"), af.args.clone())
25492 }
25493 _ => return Ok(e),
25494 };
25495 match target {
25496 DialectType::ClickHouse => {
25497 let name = if is_max { "argMax" } else { "argMin" };
25498 let mut args = args;
25499 args.truncate(2);
25500 Ok(Expression::Function(Box::new(Function::new(
25501 name.to_string(),
25502 args,
25503 ))))
25504 }
25505 DialectType::DuckDB => {
25506 let name = if is_max { "ARG_MAX" } else { "ARG_MIN" };
25507 Ok(Expression::Function(Box::new(Function::new(
25508 name.to_string(),
25509 args,
25510 ))))
25511 }
25512 DialectType::Spark | DialectType::Databricks => {
25513 let mut args = args;
25514 args.truncate(2);
25515 let name = if is_max { "MAX_BY" } else { "MIN_BY" };
25516 Ok(Expression::Function(Box::new(Function::new(
25517 name.to_string(),
25518 args,
25519 ))))
25520 }
25521 _ => Ok(e),
25522 }
25523 }
25524
25525 Action::ElementAtConvert => {
25526 // ELEMENT_AT(arr, idx) -> arr[idx] for PostgreSQL, arr[SAFE_ORDINAL(idx)] for BigQuery
25527 let (arr, idx) = if let Expression::ElementAt(bf) = e {
25528 (bf.this, bf.expression)
25529 } else if let Expression::Function(ref f) = e {
25530 if f.args.len() >= 2 {
25531 if let Expression::Function(f) = e {
25532 let mut args = f.args;
25533 let arr = args.remove(0);
25534 let idx = args.remove(0);
25535 (arr, idx)
25536 } else {
25537 unreachable!("outer condition already matched Expression::Function")
25538 }
25539 } else {
25540 return Ok(e);
25541 }
25542 } else {
25543 return Ok(e);
25544 };
25545 match target {
25546 DialectType::PostgreSQL => {
25547 // Wrap array in parens for PostgreSQL: (ARRAY[1,2,3])[4]
25548 let arr_expr = Expression::Paren(Box::new(Paren {
25549 this: arr,
25550 trailing_comments: vec![],
25551 }));
25552 Ok(Expression::Subscript(Box::new(
25553 crate::expressions::Subscript {
25554 this: arr_expr,
25555 index: idx,
25556 },
25557 )))
25558 }
25559 DialectType::BigQuery => {
25560 // BigQuery: convert ARRAY[...] to bare [...] for subscript
25561 let arr_expr = match arr {
25562 Expression::ArrayFunc(af) => Expression::ArrayFunc(Box::new(
25563 crate::expressions::ArrayConstructor {
25564 expressions: af.expressions,
25565 bracket_notation: true,
25566 use_list_keyword: false,
25567 },
25568 )),
25569 other => other,
25570 };
25571 let safe_ordinal = Expression::Function(Box::new(Function::new(
25572 "SAFE_ORDINAL".to_string(),
25573 vec![idx],
25574 )));
25575 Ok(Expression::Subscript(Box::new(
25576 crate::expressions::Subscript {
25577 this: arr_expr,
25578 index: safe_ordinal,
25579 },
25580 )))
25581 }
25582 _ => Ok(Expression::Function(Box::new(Function::new(
25583 "ELEMENT_AT".to_string(),
25584 vec![arr, idx],
25585 )))),
25586 }
25587 }
25588
25589 Action::CurrentUserParens => {
25590 // CURRENT_USER -> CURRENT_USER() for Snowflake
25591 Ok(Expression::Function(Box::new(Function::new(
25592 "CURRENT_USER".to_string(),
25593 vec![],
25594 ))))
25595 }
25596
25597 Action::ArrayAggToCollectList => {
25598 // ARRAY_AGG(x ORDER BY ...) -> COLLECT_LIST(x) for Hive/Spark
25599 // Python sqlglot Hive.arrayagg_sql strips ORDER BY for simple cases
25600 // but preserves it when DISTINCT/IGNORE NULLS/LIMIT are present
25601 match e {
25602 Expression::AggregateFunction(mut af) => {
25603 let is_simple =
25604 !af.distinct && af.ignore_nulls.is_none() && af.limit.is_none();
25605 let args = if af.args.is_empty() {
25606 vec![]
25607 } else {
25608 vec![af.args[0].clone()]
25609 };
25610 af.name = "COLLECT_LIST".to_string();
25611 af.args = args;
25612 if is_simple {
25613 af.order_by = Vec::new();
25614 }
25615 Ok(Expression::AggregateFunction(af))
25616 }
25617 Expression::ArrayAgg(agg) => {
25618 let is_simple =
25619 !agg.distinct && agg.ignore_nulls.is_none() && agg.limit.is_none();
25620 Ok(Expression::AggregateFunction(Box::new(
25621 crate::expressions::AggregateFunction {
25622 name: "COLLECT_LIST".to_string(),
25623 args: vec![agg.this.clone()],
25624 distinct: agg.distinct,
25625 filter: agg.filter.clone(),
25626 order_by: if is_simple {
25627 Vec::new()
25628 } else {
25629 agg.order_by.clone()
25630 },
25631 limit: agg.limit.clone(),
25632 ignore_nulls: agg.ignore_nulls,
25633 inferred_type: None,
25634 },
25635 )))
25636 }
25637 _ => Ok(e),
25638 }
25639 }
25640
25641 Action::ArraySyntaxConvert => {
25642 match e {
25643 // ARRAY[1, 2] (ArrayFunc bracket_notation=false) -> set bracket_notation=true
25644 // so the generator uses dialect-specific output (ARRAY() for Spark, [] for BigQuery)
25645 Expression::ArrayFunc(arr) if !arr.bracket_notation => Ok(
25646 Expression::ArrayFunc(Box::new(crate::expressions::ArrayConstructor {
25647 expressions: arr.expressions,
25648 bracket_notation: true,
25649 use_list_keyword: false,
25650 })),
25651 ),
25652 // ARRAY(y) function style -> ArrayFunc for target dialect
25653 // bracket_notation=true for BigQuery/DuckDB/ClickHouse/StarRocks (output []), false for Presto (output ARRAY[])
25654 Expression::Function(f) if f.name.eq_ignore_ascii_case("ARRAY") => {
25655 let bracket = matches!(
25656 target,
25657 DialectType::BigQuery
25658 | DialectType::DuckDB
25659 | DialectType::Snowflake
25660 | DialectType::ClickHouse
25661 | DialectType::StarRocks
25662 );
25663 Ok(Expression::ArrayFunc(Box::new(
25664 crate::expressions::ArrayConstructor {
25665 expressions: f.args,
25666 bracket_notation: bracket,
25667 use_list_keyword: false,
25668 },
25669 )))
25670 }
25671 _ => Ok(e),
25672 }
25673 }
25674
25675 Action::CastToJsonForSpark => {
25676 // CAST(x AS JSON) -> TO_JSON(x) for Spark
25677 if let Expression::Cast(c) = e {
25678 Ok(Expression::Function(Box::new(Function::new(
25679 "TO_JSON".to_string(),
25680 vec![c.this],
25681 ))))
25682 } else {
25683 Ok(e)
25684 }
25685 }
25686
25687 Action::CastJsonToFromJson => {
25688 // CAST(ParseJson(literal) AS ARRAY/MAP/STRUCT) -> FROM_JSON(literal, type_string) for Spark
25689 if let Expression::Cast(c) = e {
25690 // Extract the string literal from ParseJson
25691 let literal_expr = if let Expression::ParseJson(pj) = c.this {
25692 pj.this
25693 } else {
25694 c.this
25695 };
25696 // Convert the target DataType to Spark's type string format
25697 let type_str = Self::data_type_to_spark_string(&c.to);
25698 Ok(Expression::Function(Box::new(Function::new(
25699 "FROM_JSON".to_string(),
25700 vec![
25701 literal_expr,
25702 Expression::Literal(Box::new(Literal::String(type_str))),
25703 ],
25704 ))))
25705 } else {
25706 Ok(e)
25707 }
25708 }
25709
25710 Action::ToJsonConvert => {
25711 // TO_JSON(x) -> target-specific conversion
25712 if let Expression::ToJson(f) = e {
25713 let arg = f.this;
25714 match target {
25715 DialectType::Presto | DialectType::Trino => {
25716 // JSON_FORMAT(CAST(x AS JSON))
25717 let cast_json = Expression::Cast(Box::new(Cast {
25718 this: arg,
25719 to: DataType::Custom {
25720 name: "JSON".to_string(),
25721 },
25722 trailing_comments: vec![],
25723 double_colon_syntax: false,
25724 format: None,
25725 default: None,
25726 inferred_type: None,
25727 }));
25728 Ok(Expression::Function(Box::new(Function::new(
25729 "JSON_FORMAT".to_string(),
25730 vec![cast_json],
25731 ))))
25732 }
25733 DialectType::BigQuery => Ok(Expression::Function(Box::new(
25734 Function::new("TO_JSON_STRING".to_string(), vec![arg]),
25735 ))),
25736 DialectType::DuckDB => {
25737 // CAST(TO_JSON(x) AS TEXT)
25738 let to_json =
25739 Expression::ToJson(Box::new(crate::expressions::UnaryFunc {
25740 this: arg,
25741 original_name: None,
25742 inferred_type: None,
25743 }));
25744 Ok(Expression::Cast(Box::new(Cast {
25745 this: to_json,
25746 to: DataType::Text,
25747 trailing_comments: vec![],
25748 double_colon_syntax: false,
25749 format: None,
25750 default: None,
25751 inferred_type: None,
25752 })))
25753 }
25754 _ => Ok(Expression::ToJson(Box::new(
25755 crate::expressions::UnaryFunc {
25756 this: arg,
25757 original_name: None,
25758 inferred_type: None,
25759 },
25760 ))),
25761 }
25762 } else {
25763 Ok(e)
25764 }
25765 }
25766
25767 Action::VarianceToClickHouse => {
25768 if let Expression::Variance(f) = e {
25769 Ok(Expression::Function(Box::new(Function::new(
25770 "varSamp".to_string(),
25771 vec![f.this],
25772 ))))
25773 } else {
25774 Ok(e)
25775 }
25776 }
25777
25778 Action::StddevToClickHouse => {
25779 if let Expression::Stddev(f) = e {
25780 Ok(Expression::Function(Box::new(Function::new(
25781 "stddevSamp".to_string(),
25782 vec![f.this],
25783 ))))
25784 } else {
25785 Ok(e)
25786 }
25787 }
25788
25789 Action::ApproxQuantileConvert => {
25790 if let Expression::ApproxQuantile(aq) = e {
25791 let mut args = vec![*aq.this];
25792 if let Some(q) = aq.quantile {
25793 args.push(*q);
25794 }
25795 Ok(Expression::Function(Box::new(Function::new(
25796 "APPROX_PERCENTILE".to_string(),
25797 args,
25798 ))))
25799 } else {
25800 Ok(e)
25801 }
25802 }
25803
25804 Action::DollarParamConvert => {
25805 if let Expression::Parameter(p) = e {
25806 Ok(Expression::Parameter(Box::new(
25807 crate::expressions::Parameter {
25808 name: p.name,
25809 index: p.index,
25810 style: crate::expressions::ParameterStyle::At,
25811 quoted: p.quoted,
25812 string_quoted: p.string_quoted,
25813 expression: p.expression,
25814 },
25815 )))
25816 } else {
25817 Ok(e)
25818 }
25819 }
25820
25821 Action::EscapeStringNormalize => {
25822 if let Expression::Literal(ref lit) = e {
25823 if let Literal::EscapeString(s) = lit.as_ref() {
25824 // Strip prefix (e.g., "e:" or "E:") if present from tokenizer
25825 let stripped = if s.starts_with("e:") || s.starts_with("E:") {
25826 s[2..].to_string()
25827 } else {
25828 s.clone()
25829 };
25830 let normalized = stripped
25831 .replace('\n', "\\n")
25832 .replace('\r', "\\r")
25833 .replace('\t', "\\t");
25834 match target {
25835 DialectType::BigQuery => {
25836 // BigQuery: e'...' -> CAST(b'...' AS STRING)
25837 // Use Raw for the b'...' part to avoid double-escaping
25838 let raw_sql = format!("CAST(b'{}' AS STRING)", normalized);
25839 Ok(Expression::Raw(crate::expressions::Raw { sql: raw_sql }))
25840 }
25841 _ => Ok(Expression::Literal(Box::new(Literal::EscapeString(
25842 normalized,
25843 )))),
25844 }
25845 } else {
25846 Ok(e)
25847 }
25848 } else {
25849 Ok(e)
25850 }
25851 }
25852
25853 Action::StraightJoinCase => {
25854 // straight_join: keep lowercase for DuckDB, quote for MySQL
25855 if let Expression::Column(col) = e {
25856 if col.name.name == "STRAIGHT_JOIN" {
25857 let mut new_col = col;
25858 new_col.name.name = "straight_join".to_string();
25859 if matches!(target, DialectType::MySQL) {
25860 // MySQL: needs quoting since it's a reserved keyword
25861 new_col.name.quoted = true;
25862 }
25863 Ok(Expression::Column(new_col))
25864 } else {
25865 Ok(Expression::Column(col))
25866 }
25867 } else {
25868 Ok(e)
25869 }
25870 }
25871
25872 Action::TablesampleReservoir => {
25873 // TABLESAMPLE -> TABLESAMPLE RESERVOIR for DuckDB
25874 if let Expression::TableSample(mut ts) = e {
25875 if let Some(ref mut sample) = ts.sample {
25876 sample.method = crate::expressions::SampleMethod::Reservoir;
25877 sample.explicit_method = true;
25878 }
25879 Ok(Expression::TableSample(ts))
25880 } else {
25881 Ok(e)
25882 }
25883 }
25884
25885 Action::TablesampleSnowflakeStrip => {
25886 // Strip method and PERCENT for Snowflake target from non-Snowflake source
25887 match e {
25888 Expression::TableSample(mut ts) => {
25889 if let Some(ref mut sample) = ts.sample {
25890 sample.suppress_method_output = true;
25891 sample.unit_after_size = false;
25892 sample.is_percent = false;
25893 }
25894 Ok(Expression::TableSample(ts))
25895 }
25896 Expression::Table(mut t) => {
25897 if let Some(ref mut sample) = t.table_sample {
25898 sample.suppress_method_output = true;
25899 sample.unit_after_size = false;
25900 sample.is_percent = false;
25901 }
25902 Ok(Expression::Table(t))
25903 }
25904 _ => Ok(e),
25905 }
25906 }
25907
25908 Action::FirstToAnyValue => {
25909 // FIRST(col) IGNORE NULLS -> ANY_VALUE(col) for DuckDB
25910 if let Expression::First(mut agg) = e {
25911 agg.ignore_nulls = None;
25912 agg.name = Some("ANY_VALUE".to_string());
25913 Ok(Expression::AnyValue(agg))
25914 } else {
25915 Ok(e)
25916 }
25917 }
25918
25919 Action::ArrayIndexConvert => {
25920 // Subscript index: 1-based to 0-based for BigQuery
25921 if let Expression::Subscript(mut sub) = e {
25922 if let Expression::Literal(ref lit) = sub.index {
25923 if let Literal::Number(ref n) = lit.as_ref() {
25924 if let Ok(val) = n.parse::<i64>() {
25925 sub.index = Expression::Literal(Box::new(Literal::Number(
25926 (val - 1).to_string(),
25927 )));
25928 }
25929 }
25930 }
25931 Ok(Expression::Subscript(sub))
25932 } else {
25933 Ok(e)
25934 }
25935 }
25936
25937 Action::AnyValueIgnoreNulls => {
25938 // ANY_VALUE(x) -> ANY_VALUE(x) IGNORE NULLS for Spark
25939 if let Expression::AnyValue(mut av) = e {
25940 if av.ignore_nulls.is_none() {
25941 av.ignore_nulls = Some(true);
25942 }
25943 Ok(Expression::AnyValue(av))
25944 } else {
25945 Ok(e)
25946 }
25947 }
25948
25949 Action::BigQueryNullsOrdering => {
25950 // BigQuery doesn't support NULLS FIRST/LAST in window function ORDER BY
25951 if let Expression::WindowFunction(mut wf) = e {
25952 for o in &mut wf.over.order_by {
25953 o.nulls_first = None;
25954 }
25955 Ok(Expression::WindowFunction(wf))
25956 } else if let Expression::Ordered(mut o) = e {
25957 o.nulls_first = None;
25958 Ok(Expression::Ordered(o))
25959 } else {
25960 Ok(e)
25961 }
25962 }
25963
25964 Action::SnowflakeFloatProtect => {
25965 // Convert DataType::Float to DataType::Custom("FLOAT") to prevent
25966 // Snowflake's target transform from converting it to DOUBLE.
25967 // Non-Snowflake sources should keep their FLOAT spelling.
25968 if let Expression::DataType(DataType::Float { .. }) = e {
25969 Ok(Expression::DataType(DataType::Custom {
25970 name: "FLOAT".to_string(),
25971 }))
25972 } else {
25973 Ok(e)
25974 }
25975 }
25976
25977 Action::MysqlNullsOrdering => {
25978 // MySQL doesn't support NULLS FIRST/LAST - strip or rewrite
25979 if let Expression::Ordered(mut o) = e {
25980 let nulls_last = o.nulls_first == Some(false);
25981 let desc = o.desc;
25982 // MySQL default: ASC -> NULLS LAST, DESC -> NULLS FIRST
25983 // If requested ordering matches default, just strip NULLS clause
25984 let matches_default = if desc {
25985 // DESC default is NULLS FIRST, so nulls_first=true matches
25986 o.nulls_first == Some(true)
25987 } else {
25988 // ASC default is NULLS LAST, so nulls_first=false matches
25989 nulls_last
25990 };
25991 if matches_default {
25992 o.nulls_first = None;
25993 Ok(Expression::Ordered(o))
25994 } else {
25995 // Need CASE WHEN x IS NULL THEN 0/1 ELSE 0/1 END, x
25996 // For ASC NULLS FIRST: ORDER BY CASE WHEN x IS NULL THEN 0 ELSE 1 END, x ASC
25997 // For DESC NULLS LAST: ORDER BY CASE WHEN x IS NULL THEN 1 ELSE 0 END, x DESC
25998 let null_val = if desc { 1 } else { 0 };
25999 let non_null_val = if desc { 0 } else { 1 };
26000 let _case_expr = Expression::Case(Box::new(Case {
26001 operand: None,
26002 whens: vec![(
26003 Expression::IsNull(Box::new(crate::expressions::IsNull {
26004 this: o.this.clone(),
26005 not: false,
26006 postfix_form: false,
26007 })),
26008 Expression::number(null_val),
26009 )],
26010 else_: Some(Expression::number(non_null_val)),
26011 comments: Vec::new(),
26012 inferred_type: None,
26013 }));
26014 o.nulls_first = None;
26015 // Return a tuple of [case_expr, ordered_expr]
26016 // We need to return both as part of the ORDER BY
26017 // But since transform_recursive processes individual expressions,
26018 // we can't easily add extra ORDER BY items here.
26019 // Instead, strip the nulls_first
26020 o.nulls_first = None;
26021 Ok(Expression::Ordered(o))
26022 }
26023 } else {
26024 Ok(e)
26025 }
26026 }
26027
26028 Action::MysqlNullsLastRewrite => {
26029 // DuckDB -> MySQL: Add CASE WHEN IS NULL THEN 1 ELSE 0 END to ORDER BY
26030 // to simulate NULLS LAST for ASC ordering
26031 if let Expression::WindowFunction(mut wf) = e {
26032 let mut new_order_by = Vec::new();
26033 for o in wf.over.order_by {
26034 if !o.desc {
26035 // ASC: DuckDB has NULLS LAST, MySQL has NULLS FIRST
26036 // Add CASE WHEN expr IS NULL THEN 1 ELSE 0 END before expr
26037 let case_expr = Expression::Case(Box::new(Case {
26038 operand: None,
26039 whens: vec![(
26040 Expression::IsNull(Box::new(crate::expressions::IsNull {
26041 this: o.this.clone(),
26042 not: false,
26043 postfix_form: false,
26044 })),
26045 Expression::Literal(Box::new(Literal::Number(
26046 "1".to_string(),
26047 ))),
26048 )],
26049 else_: Some(Expression::Literal(Box::new(Literal::Number(
26050 "0".to_string(),
26051 )))),
26052 comments: Vec::new(),
26053 inferred_type: None,
26054 }));
26055 new_order_by.push(crate::expressions::Ordered {
26056 this: case_expr,
26057 desc: false,
26058 nulls_first: None,
26059 explicit_asc: false,
26060 with_fill: None,
26061 });
26062 let mut ordered = o;
26063 ordered.nulls_first = None;
26064 new_order_by.push(ordered);
26065 } else {
26066 // DESC: DuckDB has NULLS LAST, MySQL also has NULLS LAST (NULLs smallest in DESC)
26067 // No change needed
26068 let mut ordered = o;
26069 ordered.nulls_first = None;
26070 new_order_by.push(ordered);
26071 }
26072 }
26073 wf.over.order_by = new_order_by;
26074 Ok(Expression::WindowFunction(wf))
26075 } else {
26076 Ok(e)
26077 }
26078 }
26079
26080 Action::RespectNullsConvert => {
26081 // RESPECT NULLS -> strip for SQLite (FIRST_VALUE(c) OVER (...))
26082 if let Expression::WindowFunction(mut wf) = e {
26083 match &mut wf.this {
26084 Expression::FirstValue(ref mut vf) => {
26085 if vf.ignore_nulls == Some(false) {
26086 vf.ignore_nulls = None;
26087 // For SQLite, we'd need to add NULLS LAST to ORDER BY in the OVER clause
26088 // but that's handled by the generator's NULLS ordering
26089 }
26090 }
26091 Expression::LastValue(ref mut vf) => {
26092 if vf.ignore_nulls == Some(false) {
26093 vf.ignore_nulls = None;
26094 }
26095 }
26096 _ => {}
26097 }
26098 Ok(Expression::WindowFunction(wf))
26099 } else {
26100 Ok(e)
26101 }
26102 }
26103
26104 Action::SnowflakeWindowFrameStrip => {
26105 // Strip the default ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
26106 // for FIRST_VALUE/LAST_VALUE/NTH_VALUE when targeting Snowflake
26107 if let Expression::WindowFunction(mut wf) = e {
26108 wf.over.frame = None;
26109 Ok(Expression::WindowFunction(wf))
26110 } else {
26111 Ok(e)
26112 }
26113 }
26114
26115 Action::SnowflakeWindowFrameAdd => {
26116 // Add default ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
26117 // for FIRST_VALUE/LAST_VALUE/NTH_VALUE when transpiling from Snowflake to non-Snowflake
26118 if let Expression::WindowFunction(mut wf) = e {
26119 wf.over.frame = Some(crate::expressions::WindowFrame {
26120 kind: crate::expressions::WindowFrameKind::Rows,
26121 start: crate::expressions::WindowFrameBound::UnboundedPreceding,
26122 end: Some(crate::expressions::WindowFrameBound::UnboundedFollowing),
26123 exclude: None,
26124 kind_text: None,
26125 start_side_text: None,
26126 end_side_text: None,
26127 });
26128 Ok(Expression::WindowFunction(wf))
26129 } else {
26130 Ok(e)
26131 }
26132 }
26133
26134 Action::CreateTableStripComment => {
26135 // Strip COMMENT column constraint, USING, PARTITIONED BY for DuckDB
26136 if let Expression::CreateTable(mut ct) = e {
26137 for col in &mut ct.columns {
26138 col.comment = None;
26139 col.constraints.retain(|c| {
26140 !matches!(c, crate::expressions::ColumnConstraint::Comment(_))
26141 });
26142 // Also remove Comment from constraint_order
26143 col.constraint_order.retain(|c| {
26144 !matches!(c, crate::expressions::ConstraintType::Comment)
26145 });
26146 }
26147 // Strip properties (USING, PARTITIONED BY, etc.)
26148 ct.properties.clear();
26149 Ok(Expression::CreateTable(ct))
26150 } else {
26151 Ok(e)
26152 }
26153 }
26154
26155 Action::AlterTableToSpRename => {
26156 // ALTER TABLE db.t1 RENAME TO db.t2 -> EXEC sp_rename 'db.t1', 't2'
26157 if let Expression::AlterTable(ref at) = e {
26158 if let Some(crate::expressions::AlterTableAction::RenameTable(
26159 ref new_tbl,
26160 )) = at.actions.first()
26161 {
26162 // Build the old table name using TSQL bracket quoting
26163 let old_name = if let Some(ref schema) = at.name.schema {
26164 if at.name.name.quoted || schema.quoted {
26165 format!("[{}].[{}]", schema.name, at.name.name.name)
26166 } else {
26167 format!("{}.{}", schema.name, at.name.name.name)
26168 }
26169 } else {
26170 if at.name.name.quoted {
26171 format!("[{}]", at.name.name.name)
26172 } else {
26173 at.name.name.name.clone()
26174 }
26175 };
26176 let new_name = new_tbl.name.name.clone();
26177 // EXEC sp_rename 'old_name', 'new_name'
26178 let sql = format!("EXEC sp_rename '{}', '{}'", old_name, new_name);
26179 Ok(Expression::Raw(crate::expressions::Raw { sql }))
26180 } else {
26181 Ok(e)
26182 }
26183 } else {
26184 Ok(e)
26185 }
26186 }
26187
26188 Action::SnowflakeIntervalFormat => {
26189 // INTERVAL '2' HOUR -> INTERVAL '2 HOUR' for Snowflake
26190 if let Expression::Interval(mut iv) = e {
26191 if let (Some(Expression::Literal(lit)), Some(ref unit_spec)) =
26192 (&iv.this, &iv.unit)
26193 {
26194 if let Literal::String(ref val) = lit.as_ref() {
26195 let unit_str = match unit_spec {
26196 crate::expressions::IntervalUnitSpec::Simple {
26197 unit, ..
26198 } => match unit {
26199 crate::expressions::IntervalUnit::Year => "YEAR",
26200 crate::expressions::IntervalUnit::Quarter => "QUARTER",
26201 crate::expressions::IntervalUnit::Month => "MONTH",
26202 crate::expressions::IntervalUnit::Week => "WEEK",
26203 crate::expressions::IntervalUnit::Day => "DAY",
26204 crate::expressions::IntervalUnit::Hour => "HOUR",
26205 crate::expressions::IntervalUnit::Minute => "MINUTE",
26206 crate::expressions::IntervalUnit::Second => "SECOND",
26207 crate::expressions::IntervalUnit::Millisecond => {
26208 "MILLISECOND"
26209 }
26210 crate::expressions::IntervalUnit::Microsecond => {
26211 "MICROSECOND"
26212 }
26213 crate::expressions::IntervalUnit::Nanosecond => {
26214 "NANOSECOND"
26215 }
26216 },
26217 _ => "",
26218 };
26219 if !unit_str.is_empty() {
26220 let combined = format!("{} {}", val, unit_str);
26221 iv.this = Some(Expression::Literal(Box::new(Literal::String(
26222 combined,
26223 ))));
26224 iv.unit = None;
26225 }
26226 }
26227 }
26228 Ok(Expression::Interval(iv))
26229 } else {
26230 Ok(e)
26231 }
26232 }
26233
26234 Action::ArrayConcatBracketConvert => {
26235 // Expression::Array/ArrayFunc -> target-specific
26236 // For PostgreSQL: Array -> ArrayFunc (bracket_notation: false)
26237 // For Redshift: Array/ArrayFunc -> Function("ARRAY", args) to produce ARRAY(1, 2) with parens
26238 match e {
26239 Expression::Array(arr) => {
26240 if matches!(target, DialectType::Redshift) {
26241 Ok(Expression::Function(Box::new(Function::new(
26242 "ARRAY".to_string(),
26243 arr.expressions,
26244 ))))
26245 } else {
26246 Ok(Expression::ArrayFunc(Box::new(
26247 crate::expressions::ArrayConstructor {
26248 expressions: arr.expressions,
26249 bracket_notation: false,
26250 use_list_keyword: false,
26251 },
26252 )))
26253 }
26254 }
26255 Expression::ArrayFunc(arr) => {
26256 // Only for Redshift: convert bracket-notation ArrayFunc to Function("ARRAY")
26257 if matches!(target, DialectType::Redshift) {
26258 Ok(Expression::Function(Box::new(Function::new(
26259 "ARRAY".to_string(),
26260 arr.expressions,
26261 ))))
26262 } else {
26263 Ok(Expression::ArrayFunc(arr))
26264 }
26265 }
26266 _ => Ok(e),
26267 }
26268 }
26269
26270 Action::BitAggFloatCast => {
26271 // BIT_OR/BIT_AND/BIT_XOR with float/decimal cast arg -> wrap with ROUND+INT cast for DuckDB
26272 // For FLOAT/DOUBLE/REAL: CAST(ROUND(CAST(val AS type)) AS INT)
26273 // For DECIMAL: CAST(CAST(val AS DECIMAL(p,s)) AS INT)
26274 let int_type = DataType::Int {
26275 length: None,
26276 integer_spelling: false,
26277 };
26278 let wrap_agg = |agg_this: Expression, int_dt: DataType| -> Expression {
26279 if let Expression::Cast(c) = agg_this {
26280 match &c.to {
26281 DataType::Float { .. }
26282 | DataType::Double { .. }
26283 | DataType::Custom { .. } => {
26284 // FLOAT/DOUBLE/REAL: CAST(ROUND(CAST(val AS type)) AS INT)
26285 // Change FLOAT to REAL (Float with real_spelling=true) for DuckDB generator
26286 let inner_type = match &c.to {
26287 DataType::Float {
26288 precision, scale, ..
26289 } => DataType::Float {
26290 precision: *precision,
26291 scale: *scale,
26292 real_spelling: true,
26293 },
26294 other => other.clone(),
26295 };
26296 let inner_cast =
26297 Expression::Cast(Box::new(crate::expressions::Cast {
26298 this: c.this.clone(),
26299 to: inner_type,
26300 trailing_comments: Vec::new(),
26301 double_colon_syntax: false,
26302 format: None,
26303 default: None,
26304 inferred_type: None,
26305 }));
26306 let rounded = Expression::Function(Box::new(Function::new(
26307 "ROUND".to_string(),
26308 vec![inner_cast],
26309 )));
26310 Expression::Cast(Box::new(crate::expressions::Cast {
26311 this: rounded,
26312 to: int_dt,
26313 trailing_comments: Vec::new(),
26314 double_colon_syntax: false,
26315 format: None,
26316 default: None,
26317 inferred_type: None,
26318 }))
26319 }
26320 DataType::Decimal { .. } => {
26321 // DECIMAL: CAST(CAST(val AS DECIMAL(p,s)) AS INT)
26322 Expression::Cast(Box::new(crate::expressions::Cast {
26323 this: Expression::Cast(c),
26324 to: int_dt,
26325 trailing_comments: Vec::new(),
26326 double_colon_syntax: false,
26327 format: None,
26328 default: None,
26329 inferred_type: None,
26330 }))
26331 }
26332 _ => Expression::Cast(c),
26333 }
26334 } else {
26335 agg_this
26336 }
26337 };
26338 match e {
26339 Expression::BitwiseOrAgg(mut f) => {
26340 f.this = wrap_agg(f.this, int_type);
26341 Ok(Expression::BitwiseOrAgg(f))
26342 }
26343 Expression::BitwiseAndAgg(mut f) => {
26344 let int_type = DataType::Int {
26345 length: None,
26346 integer_spelling: false,
26347 };
26348 f.this = wrap_agg(f.this, int_type);
26349 Ok(Expression::BitwiseAndAgg(f))
26350 }
26351 Expression::BitwiseXorAgg(mut f) => {
26352 let int_type = DataType::Int {
26353 length: None,
26354 integer_spelling: false,
26355 };
26356 f.this = wrap_agg(f.this, int_type);
26357 Ok(Expression::BitwiseXorAgg(f))
26358 }
26359 _ => Ok(e),
26360 }
26361 }
26362
26363 Action::BitAggSnowflakeRename => {
26364 // BIT_OR -> BITORAGG, BIT_AND -> BITANDAGG, BIT_XOR -> BITXORAGG for Snowflake
26365 match e {
26366 Expression::BitwiseOrAgg(f) => Ok(Expression::Function(Box::new(
26367 Function::new("BITORAGG".to_string(), vec![f.this]),
26368 ))),
26369 Expression::BitwiseAndAgg(f) => Ok(Expression::Function(Box::new(
26370 Function::new("BITANDAGG".to_string(), vec![f.this]),
26371 ))),
26372 Expression::BitwiseXorAgg(f) => Ok(Expression::Function(Box::new(
26373 Function::new("BITXORAGG".to_string(), vec![f.this]),
26374 ))),
26375 _ => Ok(e),
26376 }
26377 }
26378
26379 Action::StrftimeCastTimestamp => {
26380 // CAST(x AS TIMESTAMP) -> CAST(x AS TIMESTAMP_NTZ) for Spark
26381 if let Expression::Cast(mut c) = e {
26382 if matches!(
26383 c.to,
26384 DataType::Timestamp {
26385 timezone: false,
26386 ..
26387 }
26388 ) {
26389 c.to = DataType::Custom {
26390 name: "TIMESTAMP_NTZ".to_string(),
26391 };
26392 }
26393 Ok(Expression::Cast(c))
26394 } else {
26395 Ok(e)
26396 }
26397 }
26398
26399 Action::DecimalDefaultPrecision => {
26400 // DECIMAL without precision -> DECIMAL(18, 3) for Snowflake
26401 if let Expression::Cast(mut c) = e {
26402 if matches!(
26403 c.to,
26404 DataType::Decimal {
26405 precision: None,
26406 ..
26407 }
26408 ) {
26409 c.to = DataType::Decimal {
26410 precision: Some(18),
26411 scale: Some(3),
26412 };
26413 }
26414 Ok(Expression::Cast(c))
26415 } else {
26416 Ok(e)
26417 }
26418 }
26419
26420 Action::FilterToIff => {
26421 // FILTER(WHERE cond) -> rewrite aggregate: AGG(IFF(cond, val, NULL))
26422 if let Expression::Filter(f) = e {
26423 let condition = *f.expression;
26424 let agg = *f.this;
26425 // Strip WHERE from condition
26426 let cond = match condition {
26427 Expression::Where(w) => w.this,
26428 other => other,
26429 };
26430 // Extract the aggregate function and its argument
26431 // We want AVG(IFF(condition, x, NULL))
26432 match agg {
26433 Expression::Function(mut func) => {
26434 if !func.args.is_empty() {
26435 let orig_arg = func.args[0].clone();
26436 let iff_call = Expression::Function(Box::new(Function::new(
26437 "IFF".to_string(),
26438 vec![cond, orig_arg, Expression::Null(Null)],
26439 )));
26440 func.args[0] = iff_call;
26441 Ok(Expression::Function(func))
26442 } else {
26443 Ok(Expression::Filter(Box::new(crate::expressions::Filter {
26444 this: Box::new(Expression::Function(func)),
26445 expression: Box::new(cond),
26446 })))
26447 }
26448 }
26449 Expression::Avg(mut avg) => {
26450 let iff_call = Expression::Function(Box::new(Function::new(
26451 "IFF".to_string(),
26452 vec![cond, avg.this.clone(), Expression::Null(Null)],
26453 )));
26454 avg.this = iff_call;
26455 Ok(Expression::Avg(avg))
26456 }
26457 Expression::Sum(mut s) => {
26458 let iff_call = Expression::Function(Box::new(Function::new(
26459 "IFF".to_string(),
26460 vec![cond, s.this.clone(), Expression::Null(Null)],
26461 )));
26462 s.this = iff_call;
26463 Ok(Expression::Sum(s))
26464 }
26465 Expression::Count(mut c) => {
26466 if let Some(ref this_expr) = c.this {
26467 let iff_call = Expression::Function(Box::new(Function::new(
26468 "IFF".to_string(),
26469 vec![cond, this_expr.clone(), Expression::Null(Null)],
26470 )));
26471 c.this = Some(iff_call);
26472 }
26473 Ok(Expression::Count(c))
26474 }
26475 other => {
26476 // Fallback: keep as Filter
26477 Ok(Expression::Filter(Box::new(crate::expressions::Filter {
26478 this: Box::new(other),
26479 expression: Box::new(cond),
26480 })))
26481 }
26482 }
26483 } else {
26484 Ok(e)
26485 }
26486 }
26487
26488 Action::AggFilterToIff => {
26489 // AggFunc.filter -> IFF wrapping: AVG(x) FILTER(WHERE cond) -> AVG(IFF(cond, x, NULL))
26490 // Helper macro to handle the common AggFunc case
26491 macro_rules! handle_agg_filter_to_iff {
26492 ($variant:ident, $agg:expr) => {{
26493 let mut agg = $agg;
26494 if let Some(filter_cond) = agg.filter.take() {
26495 let iff_call = Expression::Function(Box::new(Function::new(
26496 "IFF".to_string(),
26497 vec![filter_cond, agg.this.clone(), Expression::Null(Null)],
26498 )));
26499 agg.this = iff_call;
26500 }
26501 Ok(Expression::$variant(agg))
26502 }};
26503 }
26504
26505 match e {
26506 Expression::Avg(agg) => handle_agg_filter_to_iff!(Avg, agg),
26507 Expression::Sum(agg) => handle_agg_filter_to_iff!(Sum, agg),
26508 Expression::Min(agg) => handle_agg_filter_to_iff!(Min, agg),
26509 Expression::Max(agg) => handle_agg_filter_to_iff!(Max, agg),
26510 Expression::ArrayAgg(agg) => handle_agg_filter_to_iff!(ArrayAgg, agg),
26511 Expression::CountIf(agg) => handle_agg_filter_to_iff!(CountIf, agg),
26512 Expression::Stddev(agg) => handle_agg_filter_to_iff!(Stddev, agg),
26513 Expression::StddevPop(agg) => handle_agg_filter_to_iff!(StddevPop, agg),
26514 Expression::StddevSamp(agg) => handle_agg_filter_to_iff!(StddevSamp, agg),
26515 Expression::Variance(agg) => handle_agg_filter_to_iff!(Variance, agg),
26516 Expression::VarPop(agg) => handle_agg_filter_to_iff!(VarPop, agg),
26517 Expression::VarSamp(agg) => handle_agg_filter_to_iff!(VarSamp, agg),
26518 Expression::Median(agg) => handle_agg_filter_to_iff!(Median, agg),
26519 Expression::Mode(agg) => handle_agg_filter_to_iff!(Mode, agg),
26520 Expression::First(agg) => handle_agg_filter_to_iff!(First, agg),
26521 Expression::Last(agg) => handle_agg_filter_to_iff!(Last, agg),
26522 Expression::AnyValue(agg) => handle_agg_filter_to_iff!(AnyValue, agg),
26523 Expression::ApproxDistinct(agg) => {
26524 handle_agg_filter_to_iff!(ApproxDistinct, agg)
26525 }
26526 Expression::Count(mut c) => {
26527 if let Some(filter_cond) = c.filter.take() {
26528 if let Some(ref this_expr) = c.this {
26529 let iff_call = Expression::Function(Box::new(Function::new(
26530 "IFF".to_string(),
26531 vec![
26532 filter_cond,
26533 this_expr.clone(),
26534 Expression::Null(Null),
26535 ],
26536 )));
26537 c.this = Some(iff_call);
26538 }
26539 }
26540 Ok(Expression::Count(c))
26541 }
26542 other => Ok(other),
26543 }
26544 }
26545
26546 Action::JsonToGetPath => {
26547 // JSON_EXTRACT(x, '$.key') -> GET_PATH(PARSE_JSON(x), 'key')
26548 if let Expression::JsonExtract(je) = e {
26549 // Convert to PARSE_JSON() wrapper:
26550 // - JSON(x) -> PARSE_JSON(x)
26551 // - PARSE_JSON(x) -> keep as-is
26552 // - anything else -> wrap in PARSE_JSON()
26553 let this = match &je.this {
26554 Expression::Function(f)
26555 if f.name.eq_ignore_ascii_case("JSON") && f.args.len() == 1 =>
26556 {
26557 Expression::Function(Box::new(Function::new(
26558 "PARSE_JSON".to_string(),
26559 f.args.clone(),
26560 )))
26561 }
26562 Expression::Function(f)
26563 if f.name.eq_ignore_ascii_case("PARSE_JSON") =>
26564 {
26565 je.this.clone()
26566 }
26567 // GET_PATH result is already JSON, don't wrap
26568 Expression::Function(f) if f.name.eq_ignore_ascii_case("GET_PATH") => {
26569 je.this.clone()
26570 }
26571 other => {
26572 // Wrap non-JSON expressions in PARSE_JSON()
26573 Expression::Function(Box::new(Function::new(
26574 "PARSE_JSON".to_string(),
26575 vec![other.clone()],
26576 )))
26577 }
26578 };
26579 // Convert path: extract key from JSONPath or strip $. prefix from string
26580 let path = match &je.path {
26581 Expression::JSONPath(jp) => {
26582 // Extract the key from JSONPath: $root.key -> 'key'
26583 let mut key_parts = Vec::new();
26584 for expr in &jp.expressions {
26585 match expr {
26586 Expression::JSONPathRoot(_) => {} // skip root
26587 Expression::JSONPathKey(k) => {
26588 if let Expression::Literal(lit) = &*k.this {
26589 if let Literal::String(s) = lit.as_ref() {
26590 key_parts.push(s.clone());
26591 }
26592 }
26593 }
26594 _ => {}
26595 }
26596 }
26597 if !key_parts.is_empty() {
26598 Expression::Literal(Box::new(Literal::String(
26599 key_parts.join("."),
26600 )))
26601 } else {
26602 je.path.clone()
26603 }
26604 }
26605 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(s) if s.starts_with("$.")) =>
26606 {
26607 let Literal::String(s) = lit.as_ref() else {
26608 unreachable!()
26609 };
26610 let stripped = Self::strip_json_wildcards(&s[2..].to_string());
26611 Expression::Literal(Box::new(Literal::String(stripped)))
26612 }
26613 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(s) if s.starts_with('$')) =>
26614 {
26615 let Literal::String(s) = lit.as_ref() else {
26616 unreachable!()
26617 };
26618 let stripped = Self::strip_json_wildcards(&s[1..].to_string());
26619 Expression::Literal(Box::new(Literal::String(stripped)))
26620 }
26621 _ => je.path.clone(),
26622 };
26623 Ok(Expression::Function(Box::new(Function::new(
26624 "GET_PATH".to_string(),
26625 vec![this, path],
26626 ))))
26627 } else {
26628 Ok(e)
26629 }
26630 }
26631
26632 Action::StructToRow => {
26633 // DuckDB struct/dict -> BigQuery STRUCT(value AS key, ...) / Presto ROW
26634 // Handles both Expression::Struct and Expression::MapFunc(curly_brace_syntax=true)
26635
26636 // Extract key-value pairs from either Struct or MapFunc
26637 let kv_pairs: Option<Vec<(String, Expression)>> = match &e {
26638 Expression::Struct(s) => Some(
26639 s.fields
26640 .iter()
26641 .map(|(opt_name, field_expr)| {
26642 if let Some(name) = opt_name {
26643 (name.clone(), field_expr.clone())
26644 } else if let Expression::NamedArgument(na) = field_expr {
26645 (na.name.name.clone(), na.value.clone())
26646 } else {
26647 (String::new(), field_expr.clone())
26648 }
26649 })
26650 .collect(),
26651 ),
26652 Expression::MapFunc(m) if m.curly_brace_syntax => Some(
26653 m.keys
26654 .iter()
26655 .zip(m.values.iter())
26656 .map(|(key, value)| {
26657 let key_name = match key {
26658 Expression::Literal(lit)
26659 if matches!(lit.as_ref(), Literal::String(_)) =>
26660 {
26661 let Literal::String(s) = lit.as_ref() else {
26662 unreachable!()
26663 };
26664 s.clone()
26665 }
26666 Expression::Identifier(id) => id.name.clone(),
26667 _ => String::new(),
26668 };
26669 (key_name, value.clone())
26670 })
26671 .collect(),
26672 ),
26673 _ => None,
26674 };
26675
26676 if let Some(pairs) = kv_pairs {
26677 let mut named_args = Vec::new();
26678 for (key_name, value) in pairs {
26679 if matches!(target, DialectType::BigQuery) && !key_name.is_empty() {
26680 named_args.push(Expression::Alias(Box::new(
26681 crate::expressions::Alias::new(
26682 value,
26683 Identifier::new(key_name),
26684 ),
26685 )));
26686 } else if matches!(target, DialectType::Presto | DialectType::Trino) {
26687 named_args.push(value);
26688 } else {
26689 named_args.push(value);
26690 }
26691 }
26692
26693 if matches!(target, DialectType::BigQuery) {
26694 Ok(Expression::Function(Box::new(Function::new(
26695 "STRUCT".to_string(),
26696 named_args,
26697 ))))
26698 } else if matches!(target, DialectType::Presto | DialectType::Trino) {
26699 // For Presto/Trino, infer types and wrap in CAST(ROW(...) AS ROW(name TYPE, ...))
26700 let row_func = Expression::Function(Box::new(Function::new(
26701 "ROW".to_string(),
26702 named_args,
26703 )));
26704
26705 // Try to infer types for each pair
26706 let kv_pairs_again: Option<Vec<(String, Expression)>> = match &e {
26707 Expression::Struct(s) => Some(
26708 s.fields
26709 .iter()
26710 .map(|(opt_name, field_expr)| {
26711 if let Some(name) = opt_name {
26712 (name.clone(), field_expr.clone())
26713 } else if let Expression::NamedArgument(na) = field_expr
26714 {
26715 (na.name.name.clone(), na.value.clone())
26716 } else {
26717 (String::new(), field_expr.clone())
26718 }
26719 })
26720 .collect(),
26721 ),
26722 Expression::MapFunc(m) if m.curly_brace_syntax => Some(
26723 m.keys
26724 .iter()
26725 .zip(m.values.iter())
26726 .map(|(key, value)| {
26727 let key_name = match key {
26728 Expression::Literal(lit)
26729 if matches!(
26730 lit.as_ref(),
26731 Literal::String(_)
26732 ) =>
26733 {
26734 let Literal::String(s) = lit.as_ref() else {
26735 unreachable!()
26736 };
26737 s.clone()
26738 }
26739 Expression::Identifier(id) => id.name.clone(),
26740 _ => String::new(),
26741 };
26742 (key_name, value.clone())
26743 })
26744 .collect(),
26745 ),
26746 _ => None,
26747 };
26748
26749 if let Some(pairs) = kv_pairs_again {
26750 // Infer types for all values
26751 let mut all_inferred = true;
26752 let mut fields = Vec::new();
26753 for (name, value) in &pairs {
26754 let inferred_type = match value {
26755 Expression::Literal(lit)
26756 if matches!(lit.as_ref(), Literal::Number(_)) =>
26757 {
26758 let Literal::Number(n) = lit.as_ref() else {
26759 unreachable!()
26760 };
26761 if n.contains('.') {
26762 Some(DataType::Double {
26763 precision: None,
26764 scale: None,
26765 })
26766 } else {
26767 Some(DataType::Int {
26768 length: None,
26769 integer_spelling: true,
26770 })
26771 }
26772 }
26773 Expression::Literal(lit)
26774 if matches!(lit.as_ref(), Literal::String(_)) =>
26775 {
26776 Some(DataType::VarChar {
26777 length: None,
26778 parenthesized_length: false,
26779 })
26780 }
26781 Expression::Boolean(_) => Some(DataType::Boolean),
26782 _ => None,
26783 };
26784 if let Some(dt) = inferred_type {
26785 fields.push(crate::expressions::StructField::new(
26786 name.clone(),
26787 dt,
26788 ));
26789 } else {
26790 all_inferred = false;
26791 break;
26792 }
26793 }
26794
26795 if all_inferred && !fields.is_empty() {
26796 let row_type = DataType::Struct {
26797 fields,
26798 nested: true,
26799 };
26800 Ok(Expression::Cast(Box::new(Cast {
26801 this: row_func,
26802 to: row_type,
26803 trailing_comments: Vec::new(),
26804 double_colon_syntax: false,
26805 format: None,
26806 default: None,
26807 inferred_type: None,
26808 })))
26809 } else {
26810 Ok(row_func)
26811 }
26812 } else {
26813 Ok(row_func)
26814 }
26815 } else {
26816 Ok(Expression::Function(Box::new(Function::new(
26817 "ROW".to_string(),
26818 named_args,
26819 ))))
26820 }
26821 } else {
26822 Ok(e)
26823 }
26824 }
26825
26826 Action::SparkStructConvert => {
26827 // Spark STRUCT(val AS name, ...) -> Presto CAST(ROW(...) AS ROW(name TYPE, ...))
26828 // or DuckDB {'name': val, ...}
26829 if let Expression::Function(f) = e {
26830 // Extract name-value pairs from aliased args
26831 let mut pairs: Vec<(String, Expression)> = Vec::new();
26832 for arg in &f.args {
26833 match arg {
26834 Expression::Alias(a) => {
26835 pairs.push((a.alias.name.clone(), a.this.clone()));
26836 }
26837 _ => {
26838 pairs.push((String::new(), arg.clone()));
26839 }
26840 }
26841 }
26842
26843 match target {
26844 DialectType::DuckDB => {
26845 // Convert to DuckDB struct literal {'name': value, ...}
26846 let mut keys = Vec::new();
26847 let mut values = Vec::new();
26848 for (name, value) in &pairs {
26849 keys.push(Expression::Literal(Box::new(Literal::String(
26850 name.clone(),
26851 ))));
26852 values.push(value.clone());
26853 }
26854 Ok(Expression::MapFunc(Box::new(
26855 crate::expressions::MapConstructor {
26856 keys,
26857 values,
26858 curly_brace_syntax: true,
26859 with_map_keyword: false,
26860 },
26861 )))
26862 }
26863 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
26864 // Convert to CAST(ROW(val1, val2) AS ROW(name1 TYPE1, name2 TYPE2))
26865 let row_args: Vec<Expression> =
26866 pairs.iter().map(|(_, v)| v.clone()).collect();
26867 let row_func = Expression::Function(Box::new(Function::new(
26868 "ROW".to_string(),
26869 row_args,
26870 )));
26871
26872 // Infer types
26873 let mut all_inferred = true;
26874 let mut fields = Vec::new();
26875 for (name, value) in &pairs {
26876 let inferred_type = match value {
26877 Expression::Literal(lit)
26878 if matches!(lit.as_ref(), Literal::Number(_)) =>
26879 {
26880 let Literal::Number(n) = lit.as_ref() else {
26881 unreachable!()
26882 };
26883 if n.contains('.') {
26884 Some(DataType::Double {
26885 precision: None,
26886 scale: None,
26887 })
26888 } else {
26889 Some(DataType::Int {
26890 length: None,
26891 integer_spelling: true,
26892 })
26893 }
26894 }
26895 Expression::Literal(lit)
26896 if matches!(lit.as_ref(), Literal::String(_)) =>
26897 {
26898 Some(DataType::VarChar {
26899 length: None,
26900 parenthesized_length: false,
26901 })
26902 }
26903 Expression::Boolean(_) => Some(DataType::Boolean),
26904 _ => None,
26905 };
26906 if let Some(dt) = inferred_type {
26907 fields.push(crate::expressions::StructField::new(
26908 name.clone(),
26909 dt,
26910 ));
26911 } else {
26912 all_inferred = false;
26913 break;
26914 }
26915 }
26916
26917 if all_inferred && !fields.is_empty() {
26918 let row_type = DataType::Struct {
26919 fields,
26920 nested: true,
26921 };
26922 Ok(Expression::Cast(Box::new(Cast {
26923 this: row_func,
26924 to: row_type,
26925 trailing_comments: Vec::new(),
26926 double_colon_syntax: false,
26927 format: None,
26928 default: None,
26929 inferred_type: None,
26930 })))
26931 } else {
26932 Ok(row_func)
26933 }
26934 }
26935 _ => Ok(Expression::Function(f)),
26936 }
26937 } else {
26938 Ok(e)
26939 }
26940 }
26941
26942 Action::ApproxCountDistinctToApproxDistinct => {
26943 // APPROX_COUNT_DISTINCT(x) -> APPROX_DISTINCT(x)
26944 if let Expression::ApproxCountDistinct(f) = e {
26945 Ok(Expression::ApproxDistinct(f))
26946 } else {
26947 Ok(e)
26948 }
26949 }
26950
26951 Action::CollectListToArrayAgg => {
26952 // COLLECT_LIST(x) -> ARRAY_AGG(x) FILTER(WHERE x IS NOT NULL)
26953 if let Expression::AggregateFunction(f) = e {
26954 let filter_expr = if !f.args.is_empty() {
26955 let arg = f.args[0].clone();
26956 Some(Expression::IsNull(Box::new(crate::expressions::IsNull {
26957 this: arg,
26958 not: true,
26959 postfix_form: false,
26960 })))
26961 } else {
26962 None
26963 };
26964 let agg = crate::expressions::AggFunc {
26965 this: if f.args.is_empty() {
26966 Expression::Null(crate::expressions::Null)
26967 } else {
26968 f.args[0].clone()
26969 },
26970 distinct: f.distinct,
26971 order_by: f.order_by.clone(),
26972 filter: filter_expr,
26973 ignore_nulls: None,
26974 name: None,
26975 having_max: None,
26976 limit: None,
26977 inferred_type: None,
26978 };
26979 Ok(Expression::ArrayAgg(Box::new(agg)))
26980 } else {
26981 Ok(e)
26982 }
26983 }
26984
26985 Action::CollectSetConvert => {
26986 // COLLECT_SET(x) -> target-specific
26987 if let Expression::AggregateFunction(f) = e {
26988 match target {
26989 DialectType::Presto => Ok(Expression::AggregateFunction(Box::new(
26990 crate::expressions::AggregateFunction {
26991 name: "SET_AGG".to_string(),
26992 args: f.args,
26993 distinct: false,
26994 order_by: f.order_by,
26995 filter: f.filter,
26996 limit: f.limit,
26997 ignore_nulls: f.ignore_nulls,
26998 inferred_type: None,
26999 },
27000 ))),
27001 DialectType::Snowflake => Ok(Expression::AggregateFunction(Box::new(
27002 crate::expressions::AggregateFunction {
27003 name: "ARRAY_UNIQUE_AGG".to_string(),
27004 args: f.args,
27005 distinct: false,
27006 order_by: f.order_by,
27007 filter: f.filter,
27008 limit: f.limit,
27009 ignore_nulls: f.ignore_nulls,
27010 inferred_type: None,
27011 },
27012 ))),
27013 DialectType::Trino | DialectType::DuckDB => {
27014 let agg = crate::expressions::AggFunc {
27015 this: if f.args.is_empty() {
27016 Expression::Null(crate::expressions::Null)
27017 } else {
27018 f.args[0].clone()
27019 },
27020 distinct: true,
27021 order_by: Vec::new(),
27022 filter: None,
27023 ignore_nulls: None,
27024 name: None,
27025 having_max: None,
27026 limit: None,
27027 inferred_type: None,
27028 };
27029 Ok(Expression::ArrayAgg(Box::new(agg)))
27030 }
27031 _ => Ok(Expression::AggregateFunction(f)),
27032 }
27033 } else {
27034 Ok(e)
27035 }
27036 }
27037
27038 Action::PercentileConvert => {
27039 // PERCENTILE(x, 0.5) -> QUANTILE(x, 0.5) / APPROX_PERCENTILE(x, 0.5)
27040 if let Expression::AggregateFunction(f) = e {
27041 let name = match target {
27042 DialectType::DuckDB => "QUANTILE",
27043 DialectType::Presto | DialectType::Trino => "APPROX_PERCENTILE",
27044 _ => "PERCENTILE",
27045 };
27046 Ok(Expression::AggregateFunction(Box::new(
27047 crate::expressions::AggregateFunction {
27048 name: name.to_string(),
27049 args: f.args,
27050 distinct: f.distinct,
27051 order_by: f.order_by,
27052 filter: f.filter,
27053 limit: f.limit,
27054 ignore_nulls: f.ignore_nulls,
27055 inferred_type: None,
27056 },
27057 )))
27058 } else {
27059 Ok(e)
27060 }
27061 }
27062
27063 Action::CorrIsnanWrap => {
27064 // CORR(a, b) -> CASE WHEN ISNAN(CORR(a, b)) THEN NULL ELSE CORR(a, b) END
27065 // The CORR expression could be AggregateFunction, WindowFunction, or Filter-wrapped
27066 let corr_clone = e.clone();
27067 let isnan = Expression::Function(Box::new(Function::new(
27068 "ISNAN".to_string(),
27069 vec![corr_clone.clone()],
27070 )));
27071 let case_expr = Expression::Case(Box::new(Case {
27072 operand: None,
27073 whens: vec![(isnan, Expression::Null(crate::expressions::Null))],
27074 else_: Some(corr_clone),
27075 comments: Vec::new(),
27076 inferred_type: None,
27077 }));
27078 Ok(case_expr)
27079 }
27080
27081 Action::TruncToDateTrunc => {
27082 // TRUNC(timestamp, 'MONTH') -> DATE_TRUNC('MONTH', timestamp)
27083 if let Expression::Function(f) = e {
27084 if f.args.len() == 2 {
27085 let timestamp = f.args[0].clone();
27086 let unit_expr = f.args[1].clone();
27087
27088 if matches!(target, DialectType::ClickHouse) {
27089 // For ClickHouse, produce Expression::DateTrunc which the generator
27090 // outputs as DATE_TRUNC(...) without going through the ClickHouse
27091 // target transform that would convert it to dateTrunc
27092 let unit_str = Self::get_unit_str_static(&unit_expr);
27093 let dt_field = match unit_str.as_str() {
27094 "YEAR" => DateTimeField::Year,
27095 "MONTH" => DateTimeField::Month,
27096 "DAY" => DateTimeField::Day,
27097 "HOUR" => DateTimeField::Hour,
27098 "MINUTE" => DateTimeField::Minute,
27099 "SECOND" => DateTimeField::Second,
27100 "WEEK" => DateTimeField::Week,
27101 "QUARTER" => DateTimeField::Quarter,
27102 _ => DateTimeField::Custom(unit_str),
27103 };
27104 Ok(Expression::DateTrunc(Box::new(
27105 crate::expressions::DateTruncFunc {
27106 this: timestamp,
27107 unit: dt_field,
27108 },
27109 )))
27110 } else {
27111 let new_args = vec![unit_expr, timestamp];
27112 Ok(Expression::Function(Box::new(Function::new(
27113 "DATE_TRUNC".to_string(),
27114 new_args,
27115 ))))
27116 }
27117 } else {
27118 Ok(Expression::Function(f))
27119 }
27120 } else {
27121 Ok(e)
27122 }
27123 }
27124
27125 Action::ArrayContainsConvert => {
27126 if let Expression::ArrayContains(f) = e {
27127 match target {
27128 DialectType::Presto | DialectType::Trino => {
27129 // ARRAY_CONTAINS(arr, val) -> CONTAINS(arr, val)
27130 Ok(Expression::Function(Box::new(Function::new(
27131 "CONTAINS".to_string(),
27132 vec![f.this, f.expression],
27133 ))))
27134 }
27135 DialectType::Snowflake => {
27136 // ARRAY_CONTAINS(arr, val) -> ARRAY_CONTAINS(CAST(val AS VARIANT), arr)
27137 let cast_val =
27138 Expression::Cast(Box::new(crate::expressions::Cast {
27139 this: f.expression,
27140 to: crate::expressions::DataType::Custom {
27141 name: "VARIANT".to_string(),
27142 },
27143 trailing_comments: Vec::new(),
27144 double_colon_syntax: false,
27145 format: None,
27146 default: None,
27147 inferred_type: None,
27148 }));
27149 Ok(Expression::Function(Box::new(Function::new(
27150 "ARRAY_CONTAINS".to_string(),
27151 vec![cast_val, f.this],
27152 ))))
27153 }
27154 _ => Ok(Expression::ArrayContains(f)),
27155 }
27156 } else {
27157 Ok(e)
27158 }
27159 }
27160
27161 Action::ArrayExceptConvert => {
27162 if let Expression::ArrayExcept(f) = e {
27163 let source_arr = f.this;
27164 let exclude_arr = f.expression;
27165 match target {
27166 DialectType::DuckDB if matches!(source, DialectType::Snowflake) => {
27167 // Snowflake ARRAY_EXCEPT -> DuckDB bag semantics:
27168 // CASE WHEN source IS NULL OR exclude IS NULL THEN NULL
27169 // ELSE LIST_TRANSFORM(LIST_FILTER(
27170 // LIST_ZIP(source, GENERATE_SERIES(1, LENGTH(source))),
27171 // pair -> (LENGTH(LIST_FILTER(source[1:pair[2]], e -> e IS NOT DISTINCT FROM pair[1]))
27172 // > LENGTH(LIST_FILTER(exclude, e -> e IS NOT DISTINCT FROM pair[1])))),
27173 // pair -> pair[1])
27174 // END
27175
27176 // Build null check
27177 let source_is_null =
27178 Expression::IsNull(Box::new(crate::expressions::IsNull {
27179 this: source_arr.clone(),
27180 not: false,
27181 postfix_form: false,
27182 }));
27183 let exclude_is_null =
27184 Expression::IsNull(Box::new(crate::expressions::IsNull {
27185 this: exclude_arr.clone(),
27186 not: false,
27187 postfix_form: false,
27188 }));
27189 let null_check =
27190 Expression::Or(Box::new(crate::expressions::BinaryOp {
27191 left: source_is_null,
27192 right: exclude_is_null,
27193 left_comments: vec![],
27194 operator_comments: vec![],
27195 trailing_comments: vec![],
27196 inferred_type: None,
27197 }));
27198
27199 // GENERATE_SERIES(1, LENGTH(source))
27200 let gen_series = Expression::Function(Box::new(Function::new(
27201 "GENERATE_SERIES".to_string(),
27202 vec![
27203 Expression::number(1),
27204 Expression::Function(Box::new(Function::new(
27205 "LENGTH".to_string(),
27206 vec![source_arr.clone()],
27207 ))),
27208 ],
27209 )));
27210
27211 // LIST_ZIP(source, GENERATE_SERIES(1, LENGTH(source)))
27212 let list_zip = Expression::Function(Box::new(Function::new(
27213 "LIST_ZIP".to_string(),
27214 vec![source_arr.clone(), gen_series],
27215 )));
27216
27217 // pair[1] and pair[2]
27218 let pair_col = Expression::column("pair");
27219 let pair_1 = Expression::Subscript(Box::new(
27220 crate::expressions::Subscript {
27221 this: pair_col.clone(),
27222 index: Expression::number(1),
27223 },
27224 ));
27225 let pair_2 = Expression::Subscript(Box::new(
27226 crate::expressions::Subscript {
27227 this: pair_col.clone(),
27228 index: Expression::number(2),
27229 },
27230 ));
27231
27232 // source[1:pair[2]]
27233 let source_slice = Expression::ArraySlice(Box::new(
27234 crate::expressions::ArraySlice {
27235 this: source_arr.clone(),
27236 start: Some(Expression::number(1)),
27237 end: Some(pair_2),
27238 },
27239 ));
27240
27241 let e_col = Expression::column("e");
27242
27243 // e -> e IS NOT DISTINCT FROM pair[1]
27244 let inner_lambda1 =
27245 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
27246 parameters: vec![crate::expressions::Identifier::new("e")],
27247 body: Expression::NullSafeEq(Box::new(
27248 crate::expressions::BinaryOp {
27249 left: e_col.clone(),
27250 right: pair_1.clone(),
27251 left_comments: vec![],
27252 operator_comments: vec![],
27253 trailing_comments: vec![],
27254 inferred_type: None,
27255 },
27256 )),
27257 colon: false,
27258 parameter_types: vec![],
27259 }));
27260
27261 // LIST_FILTER(source[1:pair[2]], e -> e IS NOT DISTINCT FROM pair[1])
27262 let inner_filter1 = Expression::Function(Box::new(Function::new(
27263 "LIST_FILTER".to_string(),
27264 vec![source_slice, inner_lambda1],
27265 )));
27266
27267 // LENGTH(LIST_FILTER(source[1:pair[2]], ...))
27268 let len1 = Expression::Function(Box::new(Function::new(
27269 "LENGTH".to_string(),
27270 vec![inner_filter1],
27271 )));
27272
27273 // e -> e IS NOT DISTINCT FROM pair[1]
27274 let inner_lambda2 =
27275 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
27276 parameters: vec![crate::expressions::Identifier::new("e")],
27277 body: Expression::NullSafeEq(Box::new(
27278 crate::expressions::BinaryOp {
27279 left: e_col,
27280 right: pair_1.clone(),
27281 left_comments: vec![],
27282 operator_comments: vec![],
27283 trailing_comments: vec![],
27284 inferred_type: None,
27285 },
27286 )),
27287 colon: false,
27288 parameter_types: vec![],
27289 }));
27290
27291 // LIST_FILTER(exclude, e -> e IS NOT DISTINCT FROM pair[1])
27292 let inner_filter2 = Expression::Function(Box::new(Function::new(
27293 "LIST_FILTER".to_string(),
27294 vec![exclude_arr.clone(), inner_lambda2],
27295 )));
27296
27297 // LENGTH(LIST_FILTER(exclude, ...))
27298 let len2 = Expression::Function(Box::new(Function::new(
27299 "LENGTH".to_string(),
27300 vec![inner_filter2],
27301 )));
27302
27303 // (LENGTH(...) > LENGTH(...))
27304 let cond = Expression::Paren(Box::new(Paren {
27305 this: Expression::Gt(Box::new(crate::expressions::BinaryOp {
27306 left: len1,
27307 right: len2,
27308 left_comments: vec![],
27309 operator_comments: vec![],
27310 trailing_comments: vec![],
27311 inferred_type: None,
27312 })),
27313 trailing_comments: vec![],
27314 }));
27315
27316 // pair -> (condition)
27317 let filter_lambda =
27318 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
27319 parameters: vec![crate::expressions::Identifier::new(
27320 "pair",
27321 )],
27322 body: cond,
27323 colon: false,
27324 parameter_types: vec![],
27325 }));
27326
27327 // LIST_FILTER(LIST_ZIP(...), pair -> ...)
27328 let outer_filter = Expression::Function(Box::new(Function::new(
27329 "LIST_FILTER".to_string(),
27330 vec![list_zip, filter_lambda],
27331 )));
27332
27333 // pair -> pair[1]
27334 let transform_lambda =
27335 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
27336 parameters: vec![crate::expressions::Identifier::new(
27337 "pair",
27338 )],
27339 body: pair_1,
27340 colon: false,
27341 parameter_types: vec![],
27342 }));
27343
27344 // LIST_TRANSFORM(LIST_FILTER(...), pair -> pair[1])
27345 let list_transform = Expression::Function(Box::new(Function::new(
27346 "LIST_TRANSFORM".to_string(),
27347 vec![outer_filter, transform_lambda],
27348 )));
27349
27350 Ok(Expression::Case(Box::new(Case {
27351 operand: None,
27352 whens: vec![(null_check, Expression::Null(Null))],
27353 else_: Some(list_transform),
27354 comments: Vec::new(),
27355 inferred_type: None,
27356 })))
27357 }
27358 DialectType::DuckDB => {
27359 // ARRAY_EXCEPT(source, exclude) -> set semantics for DuckDB:
27360 // CASE WHEN source IS NULL OR exclude IS NULL THEN NULL
27361 // ELSE LIST_FILTER(LIST_DISTINCT(source),
27362 // e -> LENGTH(LIST_FILTER(exclude, x -> x IS NOT DISTINCT FROM e)) = 0)
27363 // END
27364
27365 // Build: source IS NULL
27366 let source_is_null =
27367 Expression::IsNull(Box::new(crate::expressions::IsNull {
27368 this: source_arr.clone(),
27369 not: false,
27370 postfix_form: false,
27371 }));
27372 // Build: exclude IS NULL
27373 let exclude_is_null =
27374 Expression::IsNull(Box::new(crate::expressions::IsNull {
27375 this: exclude_arr.clone(),
27376 not: false,
27377 postfix_form: false,
27378 }));
27379 // source IS NULL OR exclude IS NULL
27380 let null_check =
27381 Expression::Or(Box::new(crate::expressions::BinaryOp {
27382 left: source_is_null,
27383 right: exclude_is_null,
27384 left_comments: vec![],
27385 operator_comments: vec![],
27386 trailing_comments: vec![],
27387 inferred_type: None,
27388 }));
27389
27390 // LIST_DISTINCT(source)
27391 let list_distinct = Expression::Function(Box::new(Function::new(
27392 "LIST_DISTINCT".to_string(),
27393 vec![source_arr.clone()],
27394 )));
27395
27396 // x IS NOT DISTINCT FROM e
27397 let x_col = Expression::column("x");
27398 let e_col = Expression::column("e");
27399 let is_not_distinct = Expression::NullSafeEq(Box::new(
27400 crate::expressions::BinaryOp {
27401 left: x_col,
27402 right: e_col.clone(),
27403 left_comments: vec![],
27404 operator_comments: vec![],
27405 trailing_comments: vec![],
27406 inferred_type: None,
27407 },
27408 ));
27409
27410 // x -> x IS NOT DISTINCT FROM e
27411 let inner_lambda =
27412 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
27413 parameters: vec![crate::expressions::Identifier::new("x")],
27414 body: is_not_distinct,
27415 colon: false,
27416 parameter_types: vec![],
27417 }));
27418
27419 // LIST_FILTER(exclude, x -> x IS NOT DISTINCT FROM e)
27420 let inner_list_filter =
27421 Expression::Function(Box::new(Function::new(
27422 "LIST_FILTER".to_string(),
27423 vec![exclude_arr.clone(), inner_lambda],
27424 )));
27425
27426 // LENGTH(LIST_FILTER(exclude, x -> x IS NOT DISTINCT FROM e))
27427 let len_inner = Expression::Function(Box::new(Function::new(
27428 "LENGTH".to_string(),
27429 vec![inner_list_filter],
27430 )));
27431
27432 // LENGTH(...) = 0
27433 let eq_zero =
27434 Expression::Eq(Box::new(crate::expressions::BinaryOp {
27435 left: len_inner,
27436 right: Expression::number(0),
27437 left_comments: vec![],
27438 operator_comments: vec![],
27439 trailing_comments: vec![],
27440 inferred_type: None,
27441 }));
27442
27443 // e -> LENGTH(LIST_FILTER(...)) = 0
27444 let outer_lambda =
27445 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
27446 parameters: vec![crate::expressions::Identifier::new("e")],
27447 body: eq_zero,
27448 colon: false,
27449 parameter_types: vec![],
27450 }));
27451
27452 // LIST_FILTER(LIST_DISTINCT(source), e -> ...)
27453 let outer_list_filter =
27454 Expression::Function(Box::new(Function::new(
27455 "LIST_FILTER".to_string(),
27456 vec![list_distinct, outer_lambda],
27457 )));
27458
27459 // CASE WHEN ... IS NULL ... THEN NULL ELSE LIST_FILTER(...) END
27460 Ok(Expression::Case(Box::new(Case {
27461 operand: None,
27462 whens: vec![(null_check, Expression::Null(Null))],
27463 else_: Some(outer_list_filter),
27464 comments: Vec::new(),
27465 inferred_type: None,
27466 })))
27467 }
27468 DialectType::Snowflake => {
27469 // Snowflake: ARRAY_EXCEPT(source, exclude) - keep as-is
27470 Ok(Expression::ArrayExcept(Box::new(
27471 crate::expressions::BinaryFunc {
27472 this: source_arr,
27473 expression: exclude_arr,
27474 original_name: None,
27475 inferred_type: None,
27476 },
27477 )))
27478 }
27479 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
27480 // Presto/Trino: ARRAY_EXCEPT(source, exclude) - keep function name, array syntax already converted
27481 Ok(Expression::Function(Box::new(Function::new(
27482 "ARRAY_EXCEPT".to_string(),
27483 vec![source_arr, exclude_arr],
27484 ))))
27485 }
27486 _ => Ok(Expression::ArrayExcept(Box::new(
27487 crate::expressions::BinaryFunc {
27488 this: source_arr,
27489 expression: exclude_arr,
27490 original_name: None,
27491 inferred_type: None,
27492 },
27493 ))),
27494 }
27495 } else {
27496 Ok(e)
27497 }
27498 }
27499
27500 Action::RegexpLikeExasolAnchor => {
27501 // RegexpLike -> Exasol: wrap pattern with .*...*
27502 // Exasol REGEXP_LIKE does full-string match, but RLIKE/REGEXP from other
27503 // dialects does partial match, so we need to anchor with .* on both sides
27504 if let Expression::RegexpLike(mut f) = e {
27505 match &f.pattern {
27506 Expression::Literal(lit)
27507 if matches!(lit.as_ref(), Literal::String(_)) =>
27508 {
27509 let Literal::String(s) = lit.as_ref() else {
27510 unreachable!()
27511 };
27512 // String literal: wrap with .*...*
27513 f.pattern = Expression::Literal(Box::new(Literal::String(
27514 format!(".*{}.*", s),
27515 )));
27516 }
27517 _ => {
27518 // Non-literal: wrap with CONCAT('.*', pattern, '.*')
27519 f.pattern =
27520 Expression::Paren(Box::new(crate::expressions::Paren {
27521 this: Expression::Concat(Box::new(
27522 crate::expressions::BinaryOp {
27523 left: Expression::Concat(Box::new(
27524 crate::expressions::BinaryOp {
27525 left: Expression::Literal(Box::new(
27526 Literal::String(".*".to_string()),
27527 )),
27528 right: f.pattern,
27529 left_comments: vec![],
27530 operator_comments: vec![],
27531 trailing_comments: vec![],
27532 inferred_type: None,
27533 },
27534 )),
27535 right: Expression::Literal(Box::new(
27536 Literal::String(".*".to_string()),
27537 )),
27538 left_comments: vec![],
27539 operator_comments: vec![],
27540 trailing_comments: vec![],
27541 inferred_type: None,
27542 },
27543 )),
27544 trailing_comments: vec![],
27545 }));
27546 }
27547 }
27548 Ok(Expression::RegexpLike(f))
27549 } else {
27550 Ok(e)
27551 }
27552 }
27553
27554 Action::ArrayPositionSnowflakeSwap => {
27555 // ARRAY_POSITION(arr, elem) -> ARRAY_POSITION(elem, arr) for Snowflake
27556 if let Expression::ArrayPosition(f) = e {
27557 Ok(Expression::ArrayPosition(Box::new(
27558 crate::expressions::BinaryFunc {
27559 this: f.expression,
27560 expression: f.this,
27561 original_name: f.original_name,
27562 inferred_type: f.inferred_type,
27563 },
27564 )))
27565 } else {
27566 Ok(e)
27567 }
27568 }
27569
27570 Action::SnowflakeArrayPositionToDuckDB => {
27571 // Snowflake ARRAY_POSITION(value, array) -> DuckDB ARRAY_POSITION(array, value) - 1
27572 // Snowflake uses 0-based indexing, DuckDB uses 1-based
27573 // The parser has this=value, expression=array (Snowflake order)
27574 if let Expression::ArrayPosition(f) = e {
27575 // Create ARRAY_POSITION(array, value) in standard order
27576 let standard_pos =
27577 Expression::ArrayPosition(Box::new(crate::expressions::BinaryFunc {
27578 this: f.expression, // array
27579 expression: f.this, // value
27580 original_name: f.original_name,
27581 inferred_type: f.inferred_type,
27582 }));
27583 // Subtract 1 for zero-based indexing
27584 Ok(Expression::Sub(Box::new(BinaryOp {
27585 left: standard_pos,
27586 right: Expression::number(1),
27587 left_comments: vec![],
27588 operator_comments: vec![],
27589 trailing_comments: vec![],
27590 inferred_type: None,
27591 })))
27592 } else {
27593 Ok(e)
27594 }
27595 }
27596
27597 Action::ArrayDistinctConvert => {
27598 // ARRAY_DISTINCT(arr) -> DuckDB NULL-aware CASE:
27599 // CASE WHEN ARRAY_LENGTH(arr) <> LIST_COUNT(arr)
27600 // THEN LIST_APPEND(LIST_DISTINCT(LIST_FILTER(arr, _u -> NOT _u IS NULL)), NULL)
27601 // ELSE LIST_DISTINCT(arr)
27602 // END
27603 if let Expression::ArrayDistinct(f) = e {
27604 let arr = f.this;
27605
27606 // ARRAY_LENGTH(arr)
27607 let array_length = Expression::Function(Box::new(Function::new(
27608 "ARRAY_LENGTH".to_string(),
27609 vec![arr.clone()],
27610 )));
27611 // LIST_COUNT(arr)
27612 let list_count = Expression::Function(Box::new(Function::new(
27613 "LIST_COUNT".to_string(),
27614 vec![arr.clone()],
27615 )));
27616 // ARRAY_LENGTH(arr) <> LIST_COUNT(arr)
27617 let neq = Expression::Neq(Box::new(crate::expressions::BinaryOp {
27618 left: array_length,
27619 right: list_count,
27620 left_comments: vec![],
27621 operator_comments: vec![],
27622 trailing_comments: vec![],
27623 inferred_type: None,
27624 }));
27625
27626 // _u column
27627 let u_col = Expression::column("_u");
27628 // NOT _u IS NULL
27629 let u_is_null = Expression::IsNull(Box::new(crate::expressions::IsNull {
27630 this: u_col.clone(),
27631 not: false,
27632 postfix_form: false,
27633 }));
27634 let not_u_is_null =
27635 Expression::Not(Box::new(crate::expressions::UnaryOp {
27636 this: u_is_null,
27637 inferred_type: None,
27638 }));
27639 // _u -> NOT _u IS NULL
27640 let filter_lambda =
27641 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
27642 parameters: vec![crate::expressions::Identifier::new("_u")],
27643 body: not_u_is_null,
27644 colon: false,
27645 parameter_types: vec![],
27646 }));
27647 // LIST_FILTER(arr, _u -> NOT _u IS NULL)
27648 let list_filter = Expression::Function(Box::new(Function::new(
27649 "LIST_FILTER".to_string(),
27650 vec![arr.clone(), filter_lambda],
27651 )));
27652 // LIST_DISTINCT(LIST_FILTER(arr, ...))
27653 let list_distinct_filtered = Expression::Function(Box::new(Function::new(
27654 "LIST_DISTINCT".to_string(),
27655 vec![list_filter],
27656 )));
27657 // LIST_APPEND(LIST_DISTINCT(LIST_FILTER(...)), NULL)
27658 let list_append = Expression::Function(Box::new(Function::new(
27659 "LIST_APPEND".to_string(),
27660 vec![list_distinct_filtered, Expression::Null(Null)],
27661 )));
27662
27663 // LIST_DISTINCT(arr)
27664 let list_distinct = Expression::Function(Box::new(Function::new(
27665 "LIST_DISTINCT".to_string(),
27666 vec![arr],
27667 )));
27668
27669 // CASE WHEN neq THEN list_append ELSE list_distinct END
27670 Ok(Expression::Case(Box::new(Case {
27671 operand: None,
27672 whens: vec![(neq, list_append)],
27673 else_: Some(list_distinct),
27674 comments: Vec::new(),
27675 inferred_type: None,
27676 })))
27677 } else {
27678 Ok(e)
27679 }
27680 }
27681
27682 Action::ArrayDistinctClickHouse => {
27683 // ARRAY_DISTINCT(arr) -> arrayDistinct(arr) for ClickHouse
27684 if let Expression::ArrayDistinct(f) = e {
27685 Ok(Expression::Function(Box::new(Function::new(
27686 "arrayDistinct".to_string(),
27687 vec![f.this],
27688 ))))
27689 } else {
27690 Ok(e)
27691 }
27692 }
27693
27694 Action::ArrayContainsDuckDBConvert => {
27695 // Snowflake ARRAY_CONTAINS(value, array) -> DuckDB NULL-aware:
27696 // CASE WHEN value IS NULL
27697 // THEN NULLIF(ARRAY_LENGTH(array) <> LIST_COUNT(array), FALSE)
27698 // ELSE ARRAY_CONTAINS(array, value)
27699 // END
27700 // Note: In Rust AST from Snowflake parse, this=value (first arg), expression=array (second arg)
27701 if let Expression::ArrayContains(f) = e {
27702 let value = f.this;
27703 let array = f.expression;
27704
27705 // value IS NULL
27706 let value_is_null =
27707 Expression::IsNull(Box::new(crate::expressions::IsNull {
27708 this: value.clone(),
27709 not: false,
27710 postfix_form: false,
27711 }));
27712
27713 // ARRAY_LENGTH(array)
27714 let array_length = Expression::Function(Box::new(Function::new(
27715 "ARRAY_LENGTH".to_string(),
27716 vec![array.clone()],
27717 )));
27718 // LIST_COUNT(array)
27719 let list_count = Expression::Function(Box::new(Function::new(
27720 "LIST_COUNT".to_string(),
27721 vec![array.clone()],
27722 )));
27723 // ARRAY_LENGTH(array) <> LIST_COUNT(array)
27724 let neq = Expression::Neq(Box::new(crate::expressions::BinaryOp {
27725 left: array_length,
27726 right: list_count,
27727 left_comments: vec![],
27728 operator_comments: vec![],
27729 trailing_comments: vec![],
27730 inferred_type: None,
27731 }));
27732 // NULLIF(ARRAY_LENGTH(array) <> LIST_COUNT(array), FALSE)
27733 let nullif = Expression::Nullif(Box::new(crate::expressions::Nullif {
27734 this: Box::new(neq),
27735 expression: Box::new(Expression::Boolean(
27736 crate::expressions::BooleanLiteral { value: false },
27737 )),
27738 }));
27739
27740 // ARRAY_CONTAINS(array, value) - DuckDB syntax: array first, value second
27741 let array_contains = Expression::Function(Box::new(Function::new(
27742 "ARRAY_CONTAINS".to_string(),
27743 vec![array, value],
27744 )));
27745
27746 // CASE WHEN value IS NULL THEN NULLIF(...) ELSE ARRAY_CONTAINS(array, value) END
27747 Ok(Expression::Case(Box::new(Case {
27748 operand: None,
27749 whens: vec![(value_is_null, nullif)],
27750 else_: Some(array_contains),
27751 comments: Vec::new(),
27752 inferred_type: None,
27753 })))
27754 } else {
27755 Ok(e)
27756 }
27757 }
27758
27759 Action::StrPositionExpand => {
27760 // StrPosition with position arg -> complex STRPOS expansion for Presto/DuckDB
27761 // For Presto: IF(STRPOS(SUBSTRING(str, pos), substr) = 0, 0, STRPOS(SUBSTRING(str, pos), substr) + pos - 1)
27762 // For DuckDB: CASE WHEN STRPOS(SUBSTRING(str, pos), substr) = 0 THEN 0 ELSE STRPOS(SUBSTRING(str, pos), substr) + pos - 1 END
27763 if let Expression::StrPosition(sp) = e {
27764 let crate::expressions::StrPosition {
27765 this,
27766 substr,
27767 position,
27768 occurrence,
27769 } = *sp;
27770 let string = *this;
27771 let substr_expr = match substr {
27772 Some(s) => *s,
27773 None => Expression::Null(Null),
27774 };
27775 let pos = match position {
27776 Some(p) => *p,
27777 None => Expression::number(1),
27778 };
27779
27780 // SUBSTRING(string, pos)
27781 let substring_call = Expression::Function(Box::new(Function::new(
27782 "SUBSTRING".to_string(),
27783 vec![string.clone(), pos.clone()],
27784 )));
27785 // STRPOS(SUBSTRING(string, pos), substr)
27786 let strpos_call = Expression::Function(Box::new(Function::new(
27787 "STRPOS".to_string(),
27788 vec![substring_call, substr_expr.clone()],
27789 )));
27790 // STRPOS(...) + pos - 1
27791 let pos_adjusted =
27792 Expression::Sub(Box::new(crate::expressions::BinaryOp::new(
27793 Expression::Add(Box::new(crate::expressions::BinaryOp::new(
27794 strpos_call.clone(),
27795 pos.clone(),
27796 ))),
27797 Expression::number(1),
27798 )));
27799 // STRPOS(...) = 0
27800 let is_zero = Expression::Eq(Box::new(crate::expressions::BinaryOp::new(
27801 strpos_call.clone(),
27802 Expression::number(0),
27803 )));
27804
27805 match target {
27806 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
27807 // IF(STRPOS(SUBSTRING(str, pos), substr) = 0, 0, STRPOS(SUBSTRING(str, pos), substr) + pos - 1)
27808 Ok(Expression::Function(Box::new(Function::new(
27809 "IF".to_string(),
27810 vec![is_zero, Expression::number(0), pos_adjusted],
27811 ))))
27812 }
27813 DialectType::DuckDB => {
27814 // CASE WHEN STRPOS(SUBSTRING(str, pos), substr) = 0 THEN 0 ELSE STRPOS(SUBSTRING(str, pos), substr) + pos - 1 END
27815 Ok(Expression::Case(Box::new(Case {
27816 operand: None,
27817 whens: vec![(is_zero, Expression::number(0))],
27818 else_: Some(pos_adjusted),
27819 comments: Vec::new(),
27820 inferred_type: None,
27821 })))
27822 }
27823 _ => {
27824 // Reconstruct StrPosition
27825 Ok(Expression::StrPosition(Box::new(
27826 crate::expressions::StrPosition {
27827 this: Box::new(string),
27828 substr: Some(Box::new(substr_expr)),
27829 position: Some(Box::new(pos)),
27830 occurrence,
27831 },
27832 )))
27833 }
27834 }
27835 } else {
27836 Ok(e)
27837 }
27838 }
27839
27840 Action::MonthsBetweenConvert => {
27841 if let Expression::MonthsBetween(mb) = e {
27842 let crate::expressions::BinaryFunc {
27843 this: end_date,
27844 expression: start_date,
27845 ..
27846 } = *mb;
27847 match target {
27848 DialectType::DuckDB => {
27849 let cast_end = Self::ensure_cast_date(end_date);
27850 let cast_start = Self::ensure_cast_date(start_date);
27851 let dd = Expression::Function(Box::new(Function::new(
27852 "DATE_DIFF".to_string(),
27853 vec![
27854 Expression::string("MONTH"),
27855 cast_start.clone(),
27856 cast_end.clone(),
27857 ],
27858 )));
27859 let day_end = Expression::Function(Box::new(Function::new(
27860 "DAY".to_string(),
27861 vec![cast_end.clone()],
27862 )));
27863 let day_start = Expression::Function(Box::new(Function::new(
27864 "DAY".to_string(),
27865 vec![cast_start.clone()],
27866 )));
27867 let last_day_end = Expression::Function(Box::new(Function::new(
27868 "LAST_DAY".to_string(),
27869 vec![cast_end.clone()],
27870 )));
27871 let last_day_start = Expression::Function(Box::new(Function::new(
27872 "LAST_DAY".to_string(),
27873 vec![cast_start.clone()],
27874 )));
27875 let day_last_end = Expression::Function(Box::new(Function::new(
27876 "DAY".to_string(),
27877 vec![last_day_end],
27878 )));
27879 let day_last_start = Expression::Function(Box::new(Function::new(
27880 "DAY".to_string(),
27881 vec![last_day_start],
27882 )));
27883 let cond1 = Expression::Eq(Box::new(BinaryOp::new(
27884 day_end.clone(),
27885 day_last_end,
27886 )));
27887 let cond2 = Expression::Eq(Box::new(BinaryOp::new(
27888 day_start.clone(),
27889 day_last_start,
27890 )));
27891 let both_cond =
27892 Expression::And(Box::new(BinaryOp::new(cond1, cond2)));
27893 let day_diff =
27894 Expression::Sub(Box::new(BinaryOp::new(day_end, day_start)));
27895 let day_diff_paren =
27896 Expression::Paren(Box::new(crate::expressions::Paren {
27897 this: day_diff,
27898 trailing_comments: Vec::new(),
27899 }));
27900 let frac = Expression::Div(Box::new(BinaryOp::new(
27901 day_diff_paren,
27902 Expression::Literal(Box::new(Literal::Number(
27903 "31.0".to_string(),
27904 ))),
27905 )));
27906 let case_expr = Expression::Case(Box::new(Case {
27907 operand: None,
27908 whens: vec![(both_cond, Expression::number(0))],
27909 else_: Some(frac),
27910 comments: Vec::new(),
27911 inferred_type: None,
27912 }));
27913 Ok(Expression::Add(Box::new(BinaryOp::new(dd, case_expr))))
27914 }
27915 DialectType::Snowflake | DialectType::Redshift => {
27916 let unit = Expression::Identifier(Identifier::new("MONTH"));
27917 Ok(Expression::Function(Box::new(Function::new(
27918 "DATEDIFF".to_string(),
27919 vec![unit, start_date, end_date],
27920 ))))
27921 }
27922 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
27923 Ok(Expression::Function(Box::new(Function::new(
27924 "DATE_DIFF".to_string(),
27925 vec![Expression::string("MONTH"), start_date, end_date],
27926 ))))
27927 }
27928 _ => Ok(Expression::MonthsBetween(Box::new(
27929 crate::expressions::BinaryFunc {
27930 this: end_date,
27931 expression: start_date,
27932 original_name: None,
27933 inferred_type: None,
27934 },
27935 ))),
27936 }
27937 } else {
27938 Ok(e)
27939 }
27940 }
27941
27942 Action::AddMonthsConvert => {
27943 if let Expression::AddMonths(am) = e {
27944 let date = am.this;
27945 let val = am.expression;
27946 match target {
27947 DialectType::TSQL | DialectType::Fabric => {
27948 let cast_date = Self::ensure_cast_datetime2(date);
27949 Ok(Expression::Function(Box::new(Function::new(
27950 "DATEADD".to_string(),
27951 vec![
27952 Expression::Identifier(Identifier::new("MONTH")),
27953 val,
27954 cast_date,
27955 ],
27956 ))))
27957 }
27958 DialectType::DuckDB if matches!(source, DialectType::Snowflake) => {
27959 // DuckDB ADD_MONTHS from Snowflake: CASE WHEN LAST_DAY(date) = date THEN LAST_DAY(date + interval) ELSE date + interval END
27960 // Optionally wrapped in CAST(... AS type) if the input had a specific type
27961
27962 // Determine the cast type from the date expression
27963 let (cast_date, return_type) = match &date {
27964 Expression::Literal(lit)
27965 if matches!(lit.as_ref(), Literal::String(_)) =>
27966 {
27967 // String literal: CAST(str AS TIMESTAMP), no outer CAST
27968 (
27969 Expression::Cast(Box::new(Cast {
27970 this: date.clone(),
27971 to: DataType::Timestamp {
27972 precision: None,
27973 timezone: false,
27974 },
27975 trailing_comments: Vec::new(),
27976 double_colon_syntax: false,
27977 format: None,
27978 default: None,
27979 inferred_type: None,
27980 })),
27981 None,
27982 )
27983 }
27984 Expression::Cast(c) => {
27985 // Already cast (e.g., '2023-01-31'::DATE) - keep the cast, wrap result in CAST(... AS type)
27986 (date.clone(), Some(c.to.clone()))
27987 }
27988 _ => {
27989 // Expression or NULL::TYPE - keep as-is, check for cast type
27990 if let Expression::Cast(c) = &date {
27991 (date.clone(), Some(c.to.clone()))
27992 } else {
27993 (date.clone(), None)
27994 }
27995 }
27996 };
27997
27998 // Build the interval expression
27999 // For non-integer values (float, decimal, cast), use TO_MONTHS(CAST(ROUND(val) AS INT))
28000 // For integer values, use INTERVAL val MONTH
28001 let is_non_integer_val = match &val {
28002 Expression::Literal(lit)
28003 if matches!(lit.as_ref(), Literal::Number(_)) =>
28004 {
28005 let Literal::Number(n) = lit.as_ref() else {
28006 unreachable!()
28007 };
28008 n.contains('.')
28009 }
28010 Expression::Cast(_) => true, // e.g., 3.2::DECIMAL(10,2)
28011 Expression::Neg(n) => {
28012 if let Expression::Literal(lit) = &n.this {
28013 if let Literal::Number(s) = lit.as_ref() {
28014 s.contains('.')
28015 } else {
28016 false
28017 }
28018 } else {
28019 false
28020 }
28021 }
28022 _ => false,
28023 };
28024
28025 let add_interval = if is_non_integer_val {
28026 // TO_MONTHS(CAST(ROUND(val) AS INT))
28027 let round_val = Expression::Function(Box::new(Function::new(
28028 "ROUND".to_string(),
28029 vec![val.clone()],
28030 )));
28031 let cast_int = Expression::Cast(Box::new(Cast {
28032 this: round_val,
28033 to: DataType::Int {
28034 length: None,
28035 integer_spelling: false,
28036 },
28037 trailing_comments: Vec::new(),
28038 double_colon_syntax: false,
28039 format: None,
28040 default: None,
28041 inferred_type: None,
28042 }));
28043 Expression::Function(Box::new(Function::new(
28044 "TO_MONTHS".to_string(),
28045 vec![cast_int],
28046 )))
28047 } else {
28048 // INTERVAL val MONTH
28049 // For negative numbers, wrap in parens
28050 let interval_val = match &val {
28051 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(n) if n.starts_with('-')) =>
28052 {
28053 let Literal::Number(_) = lit.as_ref() else {
28054 unreachable!()
28055 };
28056 Expression::Paren(Box::new(Paren {
28057 this: val.clone(),
28058 trailing_comments: Vec::new(),
28059 }))
28060 }
28061 Expression::Neg(_) => Expression::Paren(Box::new(Paren {
28062 this: val.clone(),
28063 trailing_comments: Vec::new(),
28064 })),
28065 Expression::Null(_) => Expression::Paren(Box::new(Paren {
28066 this: val.clone(),
28067 trailing_comments: Vec::new(),
28068 })),
28069 _ => val.clone(),
28070 };
28071 Expression::Interval(Box::new(crate::expressions::Interval {
28072 this: Some(interval_val),
28073 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
28074 unit: crate::expressions::IntervalUnit::Month,
28075 use_plural: false,
28076 }),
28077 }))
28078 };
28079
28080 // Build: date + interval
28081 let date_plus_interval = Expression::Add(Box::new(BinaryOp::new(
28082 cast_date.clone(),
28083 add_interval.clone(),
28084 )));
28085
28086 // Build LAST_DAY(date)
28087 let last_day_date = Expression::Function(Box::new(Function::new(
28088 "LAST_DAY".to_string(),
28089 vec![cast_date.clone()],
28090 )));
28091
28092 // Build LAST_DAY(date + interval)
28093 let last_day_date_plus =
28094 Expression::Function(Box::new(Function::new(
28095 "LAST_DAY".to_string(),
28096 vec![date_plus_interval.clone()],
28097 )));
28098
28099 // Build: CASE WHEN LAST_DAY(date) = date THEN LAST_DAY(date + interval) ELSE date + interval END
28100 let case_expr = Expression::Case(Box::new(Case {
28101 operand: None,
28102 whens: vec![(
28103 Expression::Eq(Box::new(BinaryOp::new(
28104 last_day_date,
28105 cast_date.clone(),
28106 ))),
28107 last_day_date_plus,
28108 )],
28109 else_: Some(date_plus_interval),
28110 comments: Vec::new(),
28111 inferred_type: None,
28112 }));
28113
28114 // Wrap in CAST(... AS type) if needed
28115 if let Some(dt) = return_type {
28116 Ok(Expression::Cast(Box::new(Cast {
28117 this: case_expr,
28118 to: dt,
28119 trailing_comments: Vec::new(),
28120 double_colon_syntax: false,
28121 format: None,
28122 default: None,
28123 inferred_type: None,
28124 })))
28125 } else {
28126 Ok(case_expr)
28127 }
28128 }
28129 DialectType::DuckDB => {
28130 // Non-Snowflake source: simple date + INTERVAL
28131 let cast_date = if matches!(&date, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)))
28132 {
28133 Expression::Cast(Box::new(Cast {
28134 this: date,
28135 to: DataType::Timestamp {
28136 precision: None,
28137 timezone: false,
28138 },
28139 trailing_comments: Vec::new(),
28140 double_colon_syntax: false,
28141 format: None,
28142 default: None,
28143 inferred_type: None,
28144 }))
28145 } else {
28146 date
28147 };
28148 let interval =
28149 Expression::Interval(Box::new(crate::expressions::Interval {
28150 this: Some(val),
28151 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
28152 unit: crate::expressions::IntervalUnit::Month,
28153 use_plural: false,
28154 }),
28155 }));
28156 Ok(Expression::Add(Box::new(BinaryOp::new(
28157 cast_date, interval,
28158 ))))
28159 }
28160 DialectType::Snowflake => {
28161 // Keep ADD_MONTHS when source is also Snowflake
28162 if matches!(source, DialectType::Snowflake) {
28163 Ok(Expression::Function(Box::new(Function::new(
28164 "ADD_MONTHS".to_string(),
28165 vec![date, val],
28166 ))))
28167 } else {
28168 Ok(Expression::Function(Box::new(Function::new(
28169 "DATEADD".to_string(),
28170 vec![
28171 Expression::Identifier(Identifier::new("MONTH")),
28172 val,
28173 date,
28174 ],
28175 ))))
28176 }
28177 }
28178 DialectType::Redshift => {
28179 Ok(Expression::Function(Box::new(Function::new(
28180 "DATEADD".to_string(),
28181 vec![
28182 Expression::Identifier(Identifier::new("MONTH")),
28183 val,
28184 date,
28185 ],
28186 ))))
28187 }
28188 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
28189 let cast_date = if matches!(&date, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)))
28190 {
28191 Expression::Cast(Box::new(Cast {
28192 this: date,
28193 to: DataType::Timestamp {
28194 precision: None,
28195 timezone: false,
28196 },
28197 trailing_comments: Vec::new(),
28198 double_colon_syntax: false,
28199 format: None,
28200 default: None,
28201 inferred_type: None,
28202 }))
28203 } else {
28204 date
28205 };
28206 Ok(Expression::Function(Box::new(Function::new(
28207 "DATE_ADD".to_string(),
28208 vec![Expression::string("MONTH"), val, cast_date],
28209 ))))
28210 }
28211 DialectType::BigQuery => {
28212 let interval =
28213 Expression::Interval(Box::new(crate::expressions::Interval {
28214 this: Some(val),
28215 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
28216 unit: crate::expressions::IntervalUnit::Month,
28217 use_plural: false,
28218 }),
28219 }));
28220 let cast_date = if matches!(&date, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)))
28221 {
28222 Expression::Cast(Box::new(Cast {
28223 this: date,
28224 to: DataType::Custom {
28225 name: "DATETIME".to_string(),
28226 },
28227 trailing_comments: Vec::new(),
28228 double_colon_syntax: false,
28229 format: None,
28230 default: None,
28231 inferred_type: None,
28232 }))
28233 } else {
28234 date
28235 };
28236 Ok(Expression::Function(Box::new(Function::new(
28237 "DATE_ADD".to_string(),
28238 vec![cast_date, interval],
28239 ))))
28240 }
28241 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
28242 Ok(Expression::Function(Box::new(Function::new(
28243 "ADD_MONTHS".to_string(),
28244 vec![date, val],
28245 ))))
28246 }
28247 _ => {
28248 // Default: keep as AddMonths expression
28249 Ok(Expression::AddMonths(Box::new(
28250 crate::expressions::BinaryFunc {
28251 this: date,
28252 expression: val,
28253 original_name: None,
28254 inferred_type: None,
28255 },
28256 )))
28257 }
28258 }
28259 } else {
28260 Ok(e)
28261 }
28262 }
28263
28264 Action::PercentileContConvert => {
28265 // PERCENTILE_CONT(p) WITHIN GROUP (ORDER BY col) ->
28266 // Presto/Trino: APPROX_PERCENTILE(col, p)
28267 // Spark/Databricks: PERCENTILE_APPROX(col, p)
28268 if let Expression::WithinGroup(wg) = e {
28269 // Extract percentile value and order by column
28270 let (percentile, _is_disc) = match &wg.this {
28271 Expression::Function(f) => {
28272 let is_disc = f.name.eq_ignore_ascii_case("PERCENTILE_DISC");
28273 let pct = f.args.first().cloned().unwrap_or(Expression::Literal(
28274 Box::new(Literal::Number("0.5".to_string())),
28275 ));
28276 (pct, is_disc)
28277 }
28278 Expression::AggregateFunction(af) => {
28279 let is_disc = af.name.eq_ignore_ascii_case("PERCENTILE_DISC");
28280 let pct = af.args.first().cloned().unwrap_or(Expression::Literal(
28281 Box::new(Literal::Number("0.5".to_string())),
28282 ));
28283 (pct, is_disc)
28284 }
28285 Expression::PercentileCont(pc) => (pc.percentile.clone(), false),
28286 _ => return Ok(Expression::WithinGroup(wg)),
28287 };
28288 let col = wg.order_by.first().map(|o| o.this.clone()).unwrap_or(
28289 Expression::Literal(Box::new(Literal::Number("1".to_string()))),
28290 );
28291
28292 let func_name = match target {
28293 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
28294 "APPROX_PERCENTILE"
28295 }
28296 _ => "PERCENTILE_APPROX", // Spark, Databricks
28297 };
28298 Ok(Expression::Function(Box::new(Function::new(
28299 func_name.to_string(),
28300 vec![col, percentile],
28301 ))))
28302 } else {
28303 Ok(e)
28304 }
28305 }
28306
28307 Action::CurrentUserSparkParens => {
28308 // CURRENT_USER -> CURRENT_USER() for Spark
28309 if let Expression::CurrentUser(_) = e {
28310 Ok(Expression::Function(Box::new(Function::new(
28311 "CURRENT_USER".to_string(),
28312 vec![],
28313 ))))
28314 } else {
28315 Ok(e)
28316 }
28317 }
28318
28319 Action::SparkDateFuncCast => {
28320 // MONTH/YEAR/DAY('string') from Spark -> wrap arg in CAST to DATE
28321 let cast_arg = |arg: Expression| -> Expression {
28322 match target {
28323 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
28324 Self::double_cast_timestamp_date(arg)
28325 }
28326 _ => {
28327 // DuckDB, PostgreSQL, etc: CAST(arg AS DATE)
28328 Self::ensure_cast_date(arg)
28329 }
28330 }
28331 };
28332 match e {
28333 Expression::Month(f) => Ok(Expression::Month(Box::new(
28334 crate::expressions::UnaryFunc::new(cast_arg(f.this)),
28335 ))),
28336 Expression::Year(f) => Ok(Expression::Year(Box::new(
28337 crate::expressions::UnaryFunc::new(cast_arg(f.this)),
28338 ))),
28339 Expression::Day(f) => Ok(Expression::Day(Box::new(
28340 crate::expressions::UnaryFunc::new(cast_arg(f.this)),
28341 ))),
28342 other => Ok(other),
28343 }
28344 }
28345
28346 Action::MapFromArraysConvert => {
28347 // Expression::MapFromArrays -> target-specific
28348 if let Expression::MapFromArrays(mfa) = e {
28349 let keys = mfa.this;
28350 let values = mfa.expression;
28351 match target {
28352 DialectType::Snowflake => Ok(Expression::Function(Box::new(
28353 Function::new("OBJECT_CONSTRUCT".to_string(), vec![keys, values]),
28354 ))),
28355 _ => {
28356 // Hive, Presto, DuckDB, etc.: MAP(keys, values)
28357 Ok(Expression::Function(Box::new(Function::new(
28358 "MAP".to_string(),
28359 vec![keys, values],
28360 ))))
28361 }
28362 }
28363 } else {
28364 Ok(e)
28365 }
28366 }
28367
28368 Action::AnyToExists => {
28369 if let Expression::Any(q) = e {
28370 if let Some(op) = q.op.clone() {
28371 let lambda_param = crate::expressions::Identifier::new("x");
28372 let rhs = Expression::Identifier(lambda_param.clone());
28373 let body = match op {
28374 crate::expressions::QuantifiedOp::Eq => {
28375 Expression::Eq(Box::new(BinaryOp::new(q.this, rhs)))
28376 }
28377 crate::expressions::QuantifiedOp::Neq => {
28378 Expression::Neq(Box::new(BinaryOp::new(q.this, rhs)))
28379 }
28380 crate::expressions::QuantifiedOp::Lt => {
28381 Expression::Lt(Box::new(BinaryOp::new(q.this, rhs)))
28382 }
28383 crate::expressions::QuantifiedOp::Lte => {
28384 Expression::Lte(Box::new(BinaryOp::new(q.this, rhs)))
28385 }
28386 crate::expressions::QuantifiedOp::Gt => {
28387 Expression::Gt(Box::new(BinaryOp::new(q.this, rhs)))
28388 }
28389 crate::expressions::QuantifiedOp::Gte => {
28390 Expression::Gte(Box::new(BinaryOp::new(q.this, rhs)))
28391 }
28392 };
28393 let lambda =
28394 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
28395 parameters: vec![lambda_param],
28396 body,
28397 colon: false,
28398 parameter_types: Vec::new(),
28399 }));
28400 Ok(Expression::Function(Box::new(Function::new(
28401 "EXISTS".to_string(),
28402 vec![q.subquery, lambda],
28403 ))))
28404 } else {
28405 Ok(Expression::Any(q))
28406 }
28407 } else {
28408 Ok(e)
28409 }
28410 }
28411
28412 Action::GenerateSeriesConvert => {
28413 // GENERATE_SERIES(start, end[, step]) -> SEQUENCE for Spark/Databricks/Hive, wrapped in UNNEST/EXPLODE
28414 // For DuckDB target: wrap in UNNEST(GENERATE_SERIES(...))
28415 // For PG/Redshift target: keep as GENERATE_SERIES but normalize interval string step
28416 if let Expression::Function(f) = e {
28417 if f.name.eq_ignore_ascii_case("GENERATE_SERIES") && f.args.len() >= 2 {
28418 let start = f.args[0].clone();
28419 let end = f.args[1].clone();
28420 let step = f.args.get(2).cloned();
28421
28422 // Normalize step: convert string interval like '1day' or ' 2 days ' to INTERVAL expression
28423 let step = step.map(|s| Self::normalize_interval_string(s, target));
28424
28425 // Helper: wrap CURRENT_TIMESTAMP in CAST(... AS TIMESTAMP) for Presto/Trino/Spark
28426 let maybe_cast_timestamp = |arg: Expression| -> Expression {
28427 if matches!(
28428 target,
28429 DialectType::Presto
28430 | DialectType::Trino
28431 | DialectType::Athena
28432 | DialectType::Spark
28433 | DialectType::Databricks
28434 | DialectType::Hive
28435 ) {
28436 match &arg {
28437 Expression::CurrentTimestamp(_) => {
28438 Expression::Cast(Box::new(Cast {
28439 this: arg,
28440 to: DataType::Timestamp {
28441 precision: None,
28442 timezone: false,
28443 },
28444 trailing_comments: Vec::new(),
28445 double_colon_syntax: false,
28446 format: None,
28447 default: None,
28448 inferred_type: None,
28449 }))
28450 }
28451 _ => arg,
28452 }
28453 } else {
28454 arg
28455 }
28456 };
28457
28458 let start = maybe_cast_timestamp(start);
28459 let end = maybe_cast_timestamp(end);
28460
28461 // For PostgreSQL/Redshift target, keep as GENERATE_SERIES
28462 if matches!(target, DialectType::PostgreSQL | DialectType::Redshift) {
28463 let mut gs_args = vec![start, end];
28464 if let Some(step) = step {
28465 gs_args.push(step);
28466 }
28467 return Ok(Expression::Function(Box::new(Function::new(
28468 "GENERATE_SERIES".to_string(),
28469 gs_args,
28470 ))));
28471 }
28472
28473 // For DuckDB target: wrap in UNNEST(GENERATE_SERIES(...))
28474 if matches!(target, DialectType::DuckDB) {
28475 let mut gs_args = vec![start, end];
28476 if let Some(step) = step {
28477 gs_args.push(step);
28478 }
28479 let gs = Expression::Function(Box::new(Function::new(
28480 "GENERATE_SERIES".to_string(),
28481 gs_args,
28482 )));
28483 return Ok(Expression::Function(Box::new(Function::new(
28484 "UNNEST".to_string(),
28485 vec![gs],
28486 ))));
28487 }
28488
28489 let mut seq_args = vec![start, end];
28490 if let Some(step) = step {
28491 seq_args.push(step);
28492 }
28493
28494 let seq = Expression::Function(Box::new(Function::new(
28495 "SEQUENCE".to_string(),
28496 seq_args,
28497 )));
28498
28499 match target {
28500 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
28501 // Wrap in UNNEST
28502 Ok(Expression::Function(Box::new(Function::new(
28503 "UNNEST".to_string(),
28504 vec![seq],
28505 ))))
28506 }
28507 DialectType::Spark
28508 | DialectType::Databricks
28509 | DialectType::Hive => {
28510 // Wrap in EXPLODE
28511 Ok(Expression::Function(Box::new(Function::new(
28512 "EXPLODE".to_string(),
28513 vec![seq],
28514 ))))
28515 }
28516 _ => {
28517 // Just SEQUENCE for others
28518 Ok(seq)
28519 }
28520 }
28521 } else {
28522 Ok(Expression::Function(f))
28523 }
28524 } else {
28525 Ok(e)
28526 }
28527 }
28528
28529 Action::ConcatCoalesceWrap => {
28530 // CONCAT(a, b) function -> CONCAT(COALESCE(CAST(a AS VARCHAR), ''), ...) for Presto
28531 // CONCAT(a, b) function -> CONCAT(COALESCE(a, ''), ...) for ClickHouse
28532 if let Expression::Function(f) = e {
28533 if f.name.eq_ignore_ascii_case("CONCAT") {
28534 let new_args: Vec<Expression> = f
28535 .args
28536 .into_iter()
28537 .map(|arg| {
28538 let cast_arg = if matches!(
28539 target,
28540 DialectType::Presto
28541 | DialectType::Trino
28542 | DialectType::Athena
28543 ) {
28544 Expression::Cast(Box::new(Cast {
28545 this: arg,
28546 to: DataType::VarChar {
28547 length: None,
28548 parenthesized_length: false,
28549 },
28550 trailing_comments: Vec::new(),
28551 double_colon_syntax: false,
28552 format: None,
28553 default: None,
28554 inferred_type: None,
28555 }))
28556 } else {
28557 arg
28558 };
28559 Expression::Function(Box::new(Function::new(
28560 "COALESCE".to_string(),
28561 vec![cast_arg, Expression::string("")],
28562 )))
28563 })
28564 .collect();
28565 Ok(Expression::Function(Box::new(Function::new(
28566 "CONCAT".to_string(),
28567 new_args,
28568 ))))
28569 } else {
28570 Ok(Expression::Function(f))
28571 }
28572 } else {
28573 Ok(e)
28574 }
28575 }
28576
28577 Action::PipeConcatToConcat => {
28578 // a || b (Concat operator) -> CONCAT(CAST(a AS VARCHAR), CAST(b AS VARCHAR)) for Presto/Trino
28579 if let Expression::Concat(op) = e {
28580 let cast_left = Expression::Cast(Box::new(Cast {
28581 this: op.left,
28582 to: DataType::VarChar {
28583 length: None,
28584 parenthesized_length: false,
28585 },
28586 trailing_comments: Vec::new(),
28587 double_colon_syntax: false,
28588 format: None,
28589 default: None,
28590 inferred_type: None,
28591 }));
28592 let cast_right = Expression::Cast(Box::new(Cast {
28593 this: op.right,
28594 to: DataType::VarChar {
28595 length: None,
28596 parenthesized_length: false,
28597 },
28598 trailing_comments: Vec::new(),
28599 double_colon_syntax: false,
28600 format: None,
28601 default: None,
28602 inferred_type: None,
28603 }));
28604 Ok(Expression::Function(Box::new(Function::new(
28605 "CONCAT".to_string(),
28606 vec![cast_left, cast_right],
28607 ))))
28608 } else {
28609 Ok(e)
28610 }
28611 }
28612
28613 Action::DivFuncConvert => {
28614 // DIV(a, b) -> target-specific integer division
28615 if let Expression::Function(f) = e {
28616 if f.name.eq_ignore_ascii_case("DIV") && f.args.len() == 2 {
28617 let a = f.args[0].clone();
28618 let b = f.args[1].clone();
28619 match target {
28620 DialectType::DuckDB => {
28621 // DIV(a, b) -> CAST(a // b AS DECIMAL)
28622 let int_div = Expression::IntDiv(Box::new(
28623 crate::expressions::BinaryFunc {
28624 this: a,
28625 expression: b,
28626 original_name: None,
28627 inferred_type: None,
28628 },
28629 ));
28630 Ok(Expression::Cast(Box::new(Cast {
28631 this: int_div,
28632 to: DataType::Decimal {
28633 precision: None,
28634 scale: None,
28635 },
28636 trailing_comments: Vec::new(),
28637 double_colon_syntax: false,
28638 format: None,
28639 default: None,
28640 inferred_type: None,
28641 })))
28642 }
28643 DialectType::BigQuery => {
28644 // DIV(a, b) -> CAST(DIV(a, b) AS NUMERIC)
28645 let div_func = Expression::Function(Box::new(Function::new(
28646 "DIV".to_string(),
28647 vec![a, b],
28648 )));
28649 Ok(Expression::Cast(Box::new(Cast {
28650 this: div_func,
28651 to: DataType::Custom {
28652 name: "NUMERIC".to_string(),
28653 },
28654 trailing_comments: Vec::new(),
28655 double_colon_syntax: false,
28656 format: None,
28657 default: None,
28658 inferred_type: None,
28659 })))
28660 }
28661 DialectType::SQLite => {
28662 // DIV(a, b) -> CAST(CAST(CAST(a AS REAL) / b AS INTEGER) AS REAL)
28663 let cast_a = Expression::Cast(Box::new(Cast {
28664 this: a,
28665 to: DataType::Custom {
28666 name: "REAL".to_string(),
28667 },
28668 trailing_comments: Vec::new(),
28669 double_colon_syntax: false,
28670 format: None,
28671 default: None,
28672 inferred_type: None,
28673 }));
28674 let div = Expression::Div(Box::new(BinaryOp::new(cast_a, b)));
28675 let cast_int = Expression::Cast(Box::new(Cast {
28676 this: div,
28677 to: DataType::Int {
28678 length: None,
28679 integer_spelling: true,
28680 },
28681 trailing_comments: Vec::new(),
28682 double_colon_syntax: false,
28683 format: None,
28684 default: None,
28685 inferred_type: None,
28686 }));
28687 Ok(Expression::Cast(Box::new(Cast {
28688 this: cast_int,
28689 to: DataType::Custom {
28690 name: "REAL".to_string(),
28691 },
28692 trailing_comments: Vec::new(),
28693 double_colon_syntax: false,
28694 format: None,
28695 default: None,
28696 inferred_type: None,
28697 })))
28698 }
28699 _ => Ok(Expression::Function(f)),
28700 }
28701 } else {
28702 Ok(Expression::Function(f))
28703 }
28704 } else {
28705 Ok(e)
28706 }
28707 }
28708
28709 Action::JsonObjectAggConvert => {
28710 // JSON_OBJECT_AGG/JSONB_OBJECT_AGG -> JSON_GROUP_OBJECT for DuckDB
28711 match e {
28712 Expression::Function(f) => Ok(Expression::Function(Box::new(
28713 Function::new("JSON_GROUP_OBJECT".to_string(), f.args),
28714 ))),
28715 Expression::AggregateFunction(af) => {
28716 // AggregateFunction stores all args in the `args` vec
28717 Ok(Expression::Function(Box::new(Function::new(
28718 "JSON_GROUP_OBJECT".to_string(),
28719 af.args,
28720 ))))
28721 }
28722 other => Ok(other),
28723 }
28724 }
28725
28726 Action::JsonbExistsConvert => {
28727 // JSONB_EXISTS('json', 'key') -> JSON_EXISTS('json', '$.key') for DuckDB
28728 if let Expression::Function(f) = e {
28729 if f.args.len() == 2 {
28730 let json_expr = f.args[0].clone();
28731 let key = match &f.args[1] {
28732 Expression::Literal(lit)
28733 if matches!(
28734 lit.as_ref(),
28735 crate::expressions::Literal::String(_)
28736 ) =>
28737 {
28738 let crate::expressions::Literal::String(s) = lit.as_ref()
28739 else {
28740 unreachable!()
28741 };
28742 format!("$.{}", s)
28743 }
28744 _ => return Ok(Expression::Function(f)),
28745 };
28746 Ok(Expression::Function(Box::new(Function::new(
28747 "JSON_EXISTS".to_string(),
28748 vec![json_expr, Expression::string(&key)],
28749 ))))
28750 } else {
28751 Ok(Expression::Function(f))
28752 }
28753 } else {
28754 Ok(e)
28755 }
28756 }
28757
28758 Action::DateBinConvert => {
28759 // DATE_BIN('interval', ts, origin) -> TIME_BUCKET('interval', ts, origin) for DuckDB
28760 if let Expression::Function(f) = e {
28761 Ok(Expression::Function(Box::new(Function::new(
28762 "TIME_BUCKET".to_string(),
28763 f.args,
28764 ))))
28765 } else {
28766 Ok(e)
28767 }
28768 }
28769
28770 Action::MysqlCastCharToText => {
28771 // MySQL CAST(x AS CHAR) was originally TEXT -> convert to target text type
28772 if let Expression::Cast(mut c) = e {
28773 c.to = DataType::Text;
28774 Ok(Expression::Cast(c))
28775 } else {
28776 Ok(e)
28777 }
28778 }
28779
28780 Action::SparkCastVarcharToString => {
28781 // Spark parses VARCHAR(n)/CHAR(n) as TEXT -> normalize to STRING
28782 match e {
28783 Expression::Cast(mut c) => {
28784 c.to = Self::normalize_varchar_to_string(c.to);
28785 Ok(Expression::Cast(c))
28786 }
28787 Expression::TryCast(mut c) => {
28788 c.to = Self::normalize_varchar_to_string(c.to);
28789 Ok(Expression::TryCast(c))
28790 }
28791 _ => Ok(e),
28792 }
28793 }
28794
28795 Action::MinMaxToLeastGreatest => {
28796 // Multi-arg MIN(a,b,c) -> LEAST(a,b,c), MAX(a,b,c) -> GREATEST(a,b,c)
28797 if let Expression::Function(f) = e {
28798 let new_name = if f.name.eq_ignore_ascii_case("MIN") {
28799 "LEAST"
28800 } else if f.name.eq_ignore_ascii_case("MAX") {
28801 "GREATEST"
28802 } else {
28803 return Ok(Expression::Function(f));
28804 };
28805 Ok(Expression::Function(Box::new(Function::new(
28806 new_name.to_string(),
28807 f.args,
28808 ))))
28809 } else {
28810 Ok(e)
28811 }
28812 }
28813
28814 Action::ClickHouseUniqToApproxCountDistinct => {
28815 // ClickHouse uniq(x) -> APPROX_COUNT_DISTINCT(x) for non-ClickHouse targets
28816 if let Expression::Function(f) = e {
28817 Ok(Expression::Function(Box::new(Function::new(
28818 "APPROX_COUNT_DISTINCT".to_string(),
28819 f.args,
28820 ))))
28821 } else {
28822 Ok(e)
28823 }
28824 }
28825
28826 Action::ClickHouseAnyToAnyValue => {
28827 // ClickHouse any(x) -> ANY_VALUE(x) for non-ClickHouse targets
28828 if let Expression::Function(f) = e {
28829 Ok(Expression::Function(Box::new(Function::new(
28830 "ANY_VALUE".to_string(),
28831 f.args,
28832 ))))
28833 } else {
28834 Ok(e)
28835 }
28836 }
28837
28838 Action::OracleVarchar2ToVarchar => {
28839 // Oracle VARCHAR2(N CHAR/BYTE) / NVARCHAR2(N) -> VarChar(N) for non-Oracle targets
28840 if let Expression::DataType(DataType::Custom { ref name }) = e {
28841 // Extract length from VARCHAR2(N ...) or NVARCHAR2(N ...)
28842 let starts_varchar2 =
28843 name.len() >= 9 && name[..9].eq_ignore_ascii_case("VARCHAR2(");
28844 let starts_nvarchar2 =
28845 name.len() >= 10 && name[..10].eq_ignore_ascii_case("NVARCHAR2(");
28846 let inner = if starts_varchar2 || starts_nvarchar2 {
28847 let start = if starts_nvarchar2 { 10 } else { 9 }; // skip "NVARCHAR2(" or "VARCHAR2("
28848 let end = name.len() - 1; // skip trailing ")"
28849 Some(&name[start..end])
28850 } else {
28851 Option::None
28852 };
28853 if let Some(inner_str) = inner {
28854 // Parse the number part, ignoring BYTE/CHAR qualifier
28855 let num_str = inner_str.split_whitespace().next().unwrap_or("");
28856 if let Ok(n) = num_str.parse::<u32>() {
28857 Ok(Expression::DataType(DataType::VarChar {
28858 length: Some(n),
28859 parenthesized_length: false,
28860 }))
28861 } else {
28862 Ok(e)
28863 }
28864 } else {
28865 // Plain VARCHAR2 / NVARCHAR2 without parens
28866 Ok(Expression::DataType(DataType::VarChar {
28867 length: Option::None,
28868 parenthesized_length: false,
28869 }))
28870 }
28871 } else {
28872 Ok(e)
28873 }
28874 }
28875
28876 Action::Nvl2Expand => {
28877 // NVL2(a, b[, c]) -> CASE WHEN NOT a IS NULL THEN b [ELSE c] END
28878 // But keep as NVL2 for dialects that support it natively
28879 let nvl2_native = matches!(
28880 target,
28881 DialectType::Oracle
28882 | DialectType::Snowflake
28883 | DialectType::Redshift
28884 | DialectType::Teradata
28885 | DialectType::Spark
28886 | DialectType::Databricks
28887 );
28888 let (a, b, c) = if let Expression::Nvl2(nvl2) = e {
28889 if nvl2_native {
28890 return Ok(Expression::Nvl2(nvl2));
28891 }
28892 (nvl2.this, nvl2.true_value, Some(nvl2.false_value))
28893 } else if let Expression::Function(f) = e {
28894 if nvl2_native {
28895 return Ok(Expression::Function(Box::new(Function::new(
28896 "NVL2".to_string(),
28897 f.args,
28898 ))));
28899 }
28900 if f.args.len() < 2 {
28901 return Ok(Expression::Function(f));
28902 }
28903 let mut args = f.args;
28904 let a = args.remove(0);
28905 let b = args.remove(0);
28906 let c = if !args.is_empty() {
28907 Some(args.remove(0))
28908 } else {
28909 Option::None
28910 };
28911 (a, b, c)
28912 } else {
28913 return Ok(e);
28914 };
28915 // Build: NOT (a IS NULL)
28916 let is_null = Expression::IsNull(Box::new(IsNull {
28917 this: a,
28918 not: false,
28919 postfix_form: false,
28920 }));
28921 let not_null = Expression::Not(Box::new(crate::expressions::UnaryOp {
28922 this: is_null,
28923 inferred_type: None,
28924 }));
28925 Ok(Expression::Case(Box::new(Case {
28926 operand: Option::None,
28927 whens: vec![(not_null, b)],
28928 else_: c,
28929 comments: Vec::new(),
28930 inferred_type: None,
28931 })))
28932 }
28933
28934 Action::IfnullToCoalesce => {
28935 // IFNULL(a, b) -> COALESCE(a, b): clear original_name to output COALESCE
28936 if let Expression::Coalesce(mut cf) = e {
28937 cf.original_name = Option::None;
28938 Ok(Expression::Coalesce(cf))
28939 } else if let Expression::Function(f) = e {
28940 Ok(Expression::Function(Box::new(Function::new(
28941 "COALESCE".to_string(),
28942 f.args,
28943 ))))
28944 } else {
28945 Ok(e)
28946 }
28947 }
28948
28949 Action::IsAsciiConvert => {
28950 // IS_ASCII(x) -> dialect-specific ASCII check
28951 if let Expression::Function(f) = e {
28952 let arg = f.args.into_iter().next().unwrap();
28953 match target {
28954 DialectType::MySQL | DialectType::SingleStore | DialectType::TiDB => {
28955 // REGEXP_LIKE(x, '^[[:ascii:]]*$')
28956 Ok(Expression::Function(Box::new(Function::new(
28957 "REGEXP_LIKE".to_string(),
28958 vec![
28959 arg,
28960 Expression::Literal(Box::new(Literal::String(
28961 "^[[:ascii:]]*$".to_string(),
28962 ))),
28963 ],
28964 ))))
28965 }
28966 DialectType::PostgreSQL
28967 | DialectType::Redshift
28968 | DialectType::Materialize
28969 | DialectType::RisingWave => {
28970 // (x ~ '^[[:ascii:]]*$')
28971 Ok(Expression::Paren(Box::new(Paren {
28972 this: Expression::RegexpLike(Box::new(
28973 crate::expressions::RegexpFunc {
28974 this: arg,
28975 pattern: Expression::Literal(Box::new(
28976 Literal::String("^[[:ascii:]]*$".to_string()),
28977 )),
28978 flags: Option::None,
28979 },
28980 )),
28981 trailing_comments: Vec::new(),
28982 })))
28983 }
28984 DialectType::SQLite => {
28985 // (NOT x GLOB CAST(x'2a5b5e012d7f5d2a' AS TEXT))
28986 let hex_lit = Expression::Literal(Box::new(Literal::HexString(
28987 "2a5b5e012d7f5d2a".to_string(),
28988 )));
28989 let cast_expr = Expression::Cast(Box::new(Cast {
28990 this: hex_lit,
28991 to: DataType::Text,
28992 trailing_comments: Vec::new(),
28993 double_colon_syntax: false,
28994 format: Option::None,
28995 default: Option::None,
28996 inferred_type: None,
28997 }));
28998 let glob = Expression::Glob(Box::new(BinaryOp {
28999 left: arg,
29000 right: cast_expr,
29001 left_comments: Vec::new(),
29002 operator_comments: Vec::new(),
29003 trailing_comments: Vec::new(),
29004 inferred_type: None,
29005 }));
29006 Ok(Expression::Paren(Box::new(Paren {
29007 this: Expression::Not(Box::new(crate::expressions::UnaryOp {
29008 this: glob,
29009 inferred_type: None,
29010 })),
29011 trailing_comments: Vec::new(),
29012 })))
29013 }
29014 DialectType::TSQL | DialectType::Fabric => {
29015 // (PATINDEX(CONVERT(VARCHAR(MAX), 0x255b5e002d7f5d25) COLLATE Latin1_General_BIN, x) = 0)
29016 let hex_lit = Expression::Literal(Box::new(Literal::HexNumber(
29017 "255b5e002d7f5d25".to_string(),
29018 )));
29019 let convert_expr = Expression::Convert(Box::new(
29020 crate::expressions::ConvertFunc {
29021 this: hex_lit,
29022 to: DataType::Text, // Text generates as VARCHAR(MAX) for TSQL
29023 style: None,
29024 },
29025 ));
29026 let collated = Expression::Collation(Box::new(
29027 crate::expressions::CollationExpr {
29028 this: convert_expr,
29029 collation: "Latin1_General_BIN".to_string(),
29030 quoted: false,
29031 double_quoted: false,
29032 },
29033 ));
29034 let patindex = Expression::Function(Box::new(Function::new(
29035 "PATINDEX".to_string(),
29036 vec![collated, arg],
29037 )));
29038 let zero =
29039 Expression::Literal(Box::new(Literal::Number("0".to_string())));
29040 let eq_zero = Expression::Eq(Box::new(BinaryOp {
29041 left: patindex,
29042 right: zero,
29043 left_comments: Vec::new(),
29044 operator_comments: Vec::new(),
29045 trailing_comments: Vec::new(),
29046 inferred_type: None,
29047 }));
29048 Ok(Expression::Paren(Box::new(Paren {
29049 this: eq_zero,
29050 trailing_comments: Vec::new(),
29051 })))
29052 }
29053 DialectType::Oracle => {
29054 // NVL(REGEXP_LIKE(x, '^[' || CHR(1) || '-' || CHR(127) || ']*$'), TRUE)
29055 // Build the pattern: '^[' || CHR(1) || '-' || CHR(127) || ']*$'
29056 let s1 = Expression::Literal(Box::new(Literal::String(
29057 "^[".to_string(),
29058 )));
29059 let chr1 = Expression::Function(Box::new(Function::new(
29060 "CHR".to_string(),
29061 vec![Expression::Literal(Box::new(Literal::Number(
29062 "1".to_string(),
29063 )))],
29064 )));
29065 let dash =
29066 Expression::Literal(Box::new(Literal::String("-".to_string())));
29067 let chr127 = Expression::Function(Box::new(Function::new(
29068 "CHR".to_string(),
29069 vec![Expression::Literal(Box::new(Literal::Number(
29070 "127".to_string(),
29071 )))],
29072 )));
29073 let s2 = Expression::Literal(Box::new(Literal::String(
29074 "]*$".to_string(),
29075 )));
29076 // Build: '^[' || CHR(1) || '-' || CHR(127) || ']*$'
29077 let concat1 =
29078 Expression::DPipe(Box::new(crate::expressions::DPipe {
29079 this: Box::new(s1),
29080 expression: Box::new(chr1),
29081 safe: None,
29082 }));
29083 let concat2 =
29084 Expression::DPipe(Box::new(crate::expressions::DPipe {
29085 this: Box::new(concat1),
29086 expression: Box::new(dash),
29087 safe: None,
29088 }));
29089 let concat3 =
29090 Expression::DPipe(Box::new(crate::expressions::DPipe {
29091 this: Box::new(concat2),
29092 expression: Box::new(chr127),
29093 safe: None,
29094 }));
29095 let concat4 =
29096 Expression::DPipe(Box::new(crate::expressions::DPipe {
29097 this: Box::new(concat3),
29098 expression: Box::new(s2),
29099 safe: None,
29100 }));
29101 let regexp_like = Expression::Function(Box::new(Function::new(
29102 "REGEXP_LIKE".to_string(),
29103 vec![arg, concat4],
29104 )));
29105 // Use Column("TRUE") to output literal TRUE keyword (not boolean 1/0)
29106 let true_expr =
29107 Expression::Column(Box::new(crate::expressions::Column {
29108 name: Identifier {
29109 name: "TRUE".to_string(),
29110 quoted: false,
29111 trailing_comments: Vec::new(),
29112 span: None,
29113 },
29114 table: None,
29115 join_mark: false,
29116 trailing_comments: Vec::new(),
29117 span: None,
29118 inferred_type: None,
29119 }));
29120 let nvl = Expression::Function(Box::new(Function::new(
29121 "NVL".to_string(),
29122 vec![regexp_like, true_expr],
29123 )));
29124 Ok(nvl)
29125 }
29126 _ => Ok(Expression::Function(Box::new(Function::new(
29127 "IS_ASCII".to_string(),
29128 vec![arg],
29129 )))),
29130 }
29131 } else {
29132 Ok(e)
29133 }
29134 }
29135
29136 Action::StrPositionConvert => {
29137 // STR_POSITION(haystack, needle[, position[, occurrence]]) -> dialect-specific
29138 if let Expression::Function(f) = e {
29139 if f.args.len() < 2 {
29140 return Ok(Expression::Function(f));
29141 }
29142 let mut args = f.args;
29143
29144 let haystack = args.remove(0);
29145 let needle = args.remove(0);
29146 let position = if !args.is_empty() {
29147 Some(args.remove(0))
29148 } else {
29149 Option::None
29150 };
29151 let occurrence = if !args.is_empty() {
29152 Some(args.remove(0))
29153 } else {
29154 Option::None
29155 };
29156
29157 // Helper to build: STRPOS/INSTR(SUBSTRING(haystack, pos), needle) expansion
29158 // Returns: CASE/IF WHEN func(SUBSTRING(haystack, pos), needle[, occ]) = 0 THEN 0 ELSE ... + pos - 1 END
29159 fn build_position_expansion(
29160 haystack: Expression,
29161 needle: Expression,
29162 pos: Expression,
29163 occurrence: Option<Expression>,
29164 inner_func: &str,
29165 wrapper: &str, // "CASE", "IF", "IIF"
29166 ) -> Expression {
29167 let substr = Expression::Function(Box::new(Function::new(
29168 "SUBSTRING".to_string(),
29169 vec![haystack, pos.clone()],
29170 )));
29171 let mut inner_args = vec![substr, needle];
29172 if let Some(occ) = occurrence {
29173 inner_args.push(occ);
29174 }
29175 let inner_call = Expression::Function(Box::new(Function::new(
29176 inner_func.to_string(),
29177 inner_args,
29178 )));
29179 let zero =
29180 Expression::Literal(Box::new(Literal::Number("0".to_string())));
29181 let one =
29182 Expression::Literal(Box::new(Literal::Number("1".to_string())));
29183 let eq_zero = Expression::Eq(Box::new(BinaryOp {
29184 left: inner_call.clone(),
29185 right: zero.clone(),
29186 left_comments: Vec::new(),
29187 operator_comments: Vec::new(),
29188 trailing_comments: Vec::new(),
29189 inferred_type: None,
29190 }));
29191 let add_pos = Expression::Add(Box::new(BinaryOp {
29192 left: inner_call,
29193 right: pos,
29194 left_comments: Vec::new(),
29195 operator_comments: Vec::new(),
29196 trailing_comments: Vec::new(),
29197 inferred_type: None,
29198 }));
29199 let sub_one = Expression::Sub(Box::new(BinaryOp {
29200 left: add_pos,
29201 right: one,
29202 left_comments: Vec::new(),
29203 operator_comments: Vec::new(),
29204 trailing_comments: Vec::new(),
29205 inferred_type: None,
29206 }));
29207
29208 match wrapper {
29209 "CASE" => Expression::Case(Box::new(Case {
29210 operand: Option::None,
29211 whens: vec![(eq_zero, zero)],
29212 else_: Some(sub_one),
29213 comments: Vec::new(),
29214 inferred_type: None,
29215 })),
29216 "IIF" => Expression::Function(Box::new(Function::new(
29217 "IIF".to_string(),
29218 vec![eq_zero, zero, sub_one],
29219 ))),
29220 _ => Expression::Function(Box::new(Function::new(
29221 "IF".to_string(),
29222 vec![eq_zero, zero, sub_one],
29223 ))),
29224 }
29225 }
29226
29227 match target {
29228 // STRPOS group: Athena, DuckDB, Presto, Trino, Drill
29229 DialectType::Athena
29230 | DialectType::DuckDB
29231 | DialectType::Presto
29232 | DialectType::Trino
29233 | DialectType::Drill => {
29234 if let Some(pos) = position {
29235 let wrapper = if matches!(target, DialectType::DuckDB) {
29236 "CASE"
29237 } else {
29238 "IF"
29239 };
29240 let result = build_position_expansion(
29241 haystack, needle, pos, occurrence, "STRPOS", wrapper,
29242 );
29243 if matches!(target, DialectType::Drill) {
29244 // Drill uses backtick-quoted `IF`
29245 if let Expression::Function(mut f) = result {
29246 f.name = "`IF`".to_string();
29247 Ok(Expression::Function(f))
29248 } else {
29249 Ok(result)
29250 }
29251 } else {
29252 Ok(result)
29253 }
29254 } else {
29255 Ok(Expression::Function(Box::new(Function::new(
29256 "STRPOS".to_string(),
29257 vec![haystack, needle],
29258 ))))
29259 }
29260 }
29261 // SQLite: IIF wrapper
29262 DialectType::SQLite => {
29263 if let Some(pos) = position {
29264 Ok(build_position_expansion(
29265 haystack, needle, pos, occurrence, "INSTR", "IIF",
29266 ))
29267 } else {
29268 Ok(Expression::Function(Box::new(Function::new(
29269 "INSTR".to_string(),
29270 vec![haystack, needle],
29271 ))))
29272 }
29273 }
29274 // INSTR group: Teradata, BigQuery, Oracle
29275 DialectType::Teradata | DialectType::BigQuery | DialectType::Oracle => {
29276 let mut a = vec![haystack, needle];
29277 if let Some(pos) = position {
29278 a.push(pos);
29279 }
29280 if let Some(occ) = occurrence {
29281 a.push(occ);
29282 }
29283 Ok(Expression::Function(Box::new(Function::new(
29284 "INSTR".to_string(),
29285 a,
29286 ))))
29287 }
29288 // CHARINDEX group: Snowflake, TSQL
29289 DialectType::Snowflake | DialectType::TSQL | DialectType::Fabric => {
29290 let mut a = vec![needle, haystack];
29291 if let Some(pos) = position {
29292 a.push(pos);
29293 }
29294 Ok(Expression::Function(Box::new(Function::new(
29295 "CHARINDEX".to_string(),
29296 a,
29297 ))))
29298 }
29299 // POSITION(needle IN haystack): PostgreSQL, Materialize, RisingWave, Redshift
29300 DialectType::PostgreSQL
29301 | DialectType::Materialize
29302 | DialectType::RisingWave
29303 | DialectType::Redshift => {
29304 if let Some(pos) = position {
29305 // Build: CASE WHEN POSITION(needle IN SUBSTRING(haystack FROM pos)) = 0 THEN 0
29306 // ELSE POSITION(...) + pos - 1 END
29307 let substr = Expression::Substring(Box::new(
29308 crate::expressions::SubstringFunc {
29309 this: haystack,
29310 start: pos.clone(),
29311 length: Option::None,
29312 from_for_syntax: true,
29313 },
29314 ));
29315 let pos_in = Expression::StrPosition(Box::new(
29316 crate::expressions::StrPosition {
29317 this: Box::new(substr),
29318 substr: Some(Box::new(needle)),
29319 position: Option::None,
29320 occurrence: Option::None,
29321 },
29322 ));
29323 let zero = Expression::Literal(Box::new(Literal::Number(
29324 "0".to_string(),
29325 )));
29326 let one = Expression::Literal(Box::new(Literal::Number(
29327 "1".to_string(),
29328 )));
29329 let eq_zero = Expression::Eq(Box::new(BinaryOp {
29330 left: pos_in.clone(),
29331 right: zero.clone(),
29332 left_comments: Vec::new(),
29333 operator_comments: Vec::new(),
29334 trailing_comments: Vec::new(),
29335 inferred_type: None,
29336 }));
29337 let add_pos = Expression::Add(Box::new(BinaryOp {
29338 left: pos_in,
29339 right: pos,
29340 left_comments: Vec::new(),
29341 operator_comments: Vec::new(),
29342 trailing_comments: Vec::new(),
29343 inferred_type: None,
29344 }));
29345 let sub_one = Expression::Sub(Box::new(BinaryOp {
29346 left: add_pos,
29347 right: one,
29348 left_comments: Vec::new(),
29349 operator_comments: Vec::new(),
29350 trailing_comments: Vec::new(),
29351 inferred_type: None,
29352 }));
29353 Ok(Expression::Case(Box::new(Case {
29354 operand: Option::None,
29355 whens: vec![(eq_zero, zero)],
29356 else_: Some(sub_one),
29357 comments: Vec::new(),
29358 inferred_type: None,
29359 })))
29360 } else {
29361 Ok(Expression::StrPosition(Box::new(
29362 crate::expressions::StrPosition {
29363 this: Box::new(haystack),
29364 substr: Some(Box::new(needle)),
29365 position: Option::None,
29366 occurrence: Option::None,
29367 },
29368 )))
29369 }
29370 }
29371 // LOCATE group: MySQL, Hive, Spark, Databricks, Doris
29372 DialectType::MySQL
29373 | DialectType::SingleStore
29374 | DialectType::TiDB
29375 | DialectType::Hive
29376 | DialectType::Spark
29377 | DialectType::Databricks
29378 | DialectType::Doris
29379 | DialectType::StarRocks => {
29380 let mut a = vec![needle, haystack];
29381 if let Some(pos) = position {
29382 a.push(pos);
29383 }
29384 Ok(Expression::Function(Box::new(Function::new(
29385 "LOCATE".to_string(),
29386 a,
29387 ))))
29388 }
29389 // ClickHouse: POSITION(haystack, needle[, position])
29390 DialectType::ClickHouse => {
29391 let mut a = vec![haystack, needle];
29392 if let Some(pos) = position {
29393 a.push(pos);
29394 }
29395 Ok(Expression::Function(Box::new(Function::new(
29396 "POSITION".to_string(),
29397 a,
29398 ))))
29399 }
29400 _ => {
29401 let mut a = vec![haystack, needle];
29402 if let Some(pos) = position {
29403 a.push(pos);
29404 }
29405 if let Some(occ) = occurrence {
29406 a.push(occ);
29407 }
29408 Ok(Expression::Function(Box::new(Function::new(
29409 "STR_POSITION".to_string(),
29410 a,
29411 ))))
29412 }
29413 }
29414 } else {
29415 Ok(e)
29416 }
29417 }
29418
29419 Action::ArraySumConvert => {
29420 // ARRAY_SUM(arr) -> dialect-specific
29421 if let Expression::Function(f) = e {
29422 let args = f.args;
29423 match target {
29424 DialectType::DuckDB => Ok(Expression::Function(Box::new(
29425 Function::new("LIST_SUM".to_string(), args),
29426 ))),
29427 DialectType::Spark | DialectType::Databricks => {
29428 // AGGREGATE(arr, 0, (acc, x) -> acc + x, acc -> acc)
29429 let arr = args.into_iter().next().unwrap();
29430 let zero =
29431 Expression::Literal(Box::new(Literal::Number("0".to_string())));
29432 let acc_id = Identifier::new("acc");
29433 let x_id = Identifier::new("x");
29434 let acc = Expression::Identifier(acc_id.clone());
29435 let x = Expression::Identifier(x_id.clone());
29436 let add = Expression::Add(Box::new(BinaryOp {
29437 left: acc.clone(),
29438 right: x,
29439 left_comments: Vec::new(),
29440 operator_comments: Vec::new(),
29441 trailing_comments: Vec::new(),
29442 inferred_type: None,
29443 }));
29444 let lambda1 =
29445 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
29446 parameters: vec![acc_id.clone(), x_id],
29447 body: add,
29448 colon: false,
29449 parameter_types: Vec::new(),
29450 }));
29451 let lambda2 =
29452 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
29453 parameters: vec![acc_id],
29454 body: acc,
29455 colon: false,
29456 parameter_types: Vec::new(),
29457 }));
29458 Ok(Expression::Function(Box::new(Function::new(
29459 "AGGREGATE".to_string(),
29460 vec![arr, zero, lambda1, lambda2],
29461 ))))
29462 }
29463 DialectType::Presto | DialectType::Athena => {
29464 // Presto/Athena keep ARRAY_SUM natively
29465 Ok(Expression::Function(Box::new(Function::new(
29466 "ARRAY_SUM".to_string(),
29467 args,
29468 ))))
29469 }
29470 DialectType::Trino => {
29471 // REDUCE(arr, 0, (acc, x) -> acc + x, acc -> acc)
29472 if args.len() == 1 {
29473 let arr = args.into_iter().next().unwrap();
29474 let zero = Expression::Literal(Box::new(Literal::Number(
29475 "0".to_string(),
29476 )));
29477 let acc_id = Identifier::new("acc");
29478 let x_id = Identifier::new("x");
29479 let acc = Expression::Identifier(acc_id.clone());
29480 let x = Expression::Identifier(x_id.clone());
29481 let add = Expression::Add(Box::new(BinaryOp {
29482 left: acc.clone(),
29483 right: x,
29484 left_comments: Vec::new(),
29485 operator_comments: Vec::new(),
29486 trailing_comments: Vec::new(),
29487 inferred_type: None,
29488 }));
29489 let lambda1 = Expression::Lambda(Box::new(
29490 crate::expressions::LambdaExpr {
29491 parameters: vec![acc_id.clone(), x_id],
29492 body: add,
29493 colon: false,
29494 parameter_types: Vec::new(),
29495 },
29496 ));
29497 let lambda2 = Expression::Lambda(Box::new(
29498 crate::expressions::LambdaExpr {
29499 parameters: vec![acc_id],
29500 body: acc,
29501 colon: false,
29502 parameter_types: Vec::new(),
29503 },
29504 ));
29505 Ok(Expression::Function(Box::new(Function::new(
29506 "REDUCE".to_string(),
29507 vec![arr, zero, lambda1, lambda2],
29508 ))))
29509 } else {
29510 Ok(Expression::Function(Box::new(Function::new(
29511 "ARRAY_SUM".to_string(),
29512 args,
29513 ))))
29514 }
29515 }
29516 DialectType::ClickHouse => {
29517 // arraySum(lambda, arr) or arraySum(arr)
29518 Ok(Expression::Function(Box::new(Function::new(
29519 "arraySum".to_string(),
29520 args,
29521 ))))
29522 }
29523 _ => Ok(Expression::Function(Box::new(Function::new(
29524 "ARRAY_SUM".to_string(),
29525 args,
29526 )))),
29527 }
29528 } else {
29529 Ok(e)
29530 }
29531 }
29532
29533 Action::ArraySizeConvert => {
29534 if let Expression::Function(f) = e {
29535 Ok(Expression::Function(Box::new(Function::new(
29536 "REPEATED_COUNT".to_string(),
29537 f.args,
29538 ))))
29539 } else {
29540 Ok(e)
29541 }
29542 }
29543
29544 Action::ArrayAnyConvert => {
29545 if let Expression::Function(f) = e {
29546 let mut args = f.args;
29547 if args.len() == 2 {
29548 let arr = args.remove(0);
29549 let lambda = args.remove(0);
29550
29551 // Extract lambda parameter name and body
29552 let (param_name, pred_body) =
29553 if let Expression::Lambda(ref lam) = lambda {
29554 let name = if let Some(p) = lam.parameters.first() {
29555 p.name.clone()
29556 } else {
29557 "x".to_string()
29558 };
29559 (name, lam.body.clone())
29560 } else {
29561 ("x".to_string(), lambda.clone())
29562 };
29563
29564 // Helper: build a function call Expression
29565 let make_func = |name: &str, args: Vec<Expression>| -> Expression {
29566 Expression::Function(Box::new(Function::new(
29567 name.to_string(),
29568 args,
29569 )))
29570 };
29571
29572 // Helper: build (len_func(arr) = 0 OR len_func(filter_expr) <> 0) wrapped in Paren
29573 let build_filter_pattern = |len_func: &str,
29574 len_args_extra: Vec<Expression>,
29575 filter_expr: Expression|
29576 -> Expression {
29577 // len_func(arr, ...extra) = 0
29578 let mut len_arr_args = vec![arr.clone()];
29579 len_arr_args.extend(len_args_extra.clone());
29580 let len_arr = make_func(len_func, len_arr_args);
29581 let eq_zero = Expression::Eq(Box::new(BinaryOp::new(
29582 len_arr,
29583 Expression::number(0),
29584 )));
29585
29586 // len_func(filter_expr, ...extra) <> 0
29587 let mut len_filter_args = vec![filter_expr];
29588 len_filter_args.extend(len_args_extra);
29589 let len_filter = make_func(len_func, len_filter_args);
29590 let neq_zero = Expression::Neq(Box::new(BinaryOp::new(
29591 len_filter,
29592 Expression::number(0),
29593 )));
29594
29595 // (eq_zero OR neq_zero)
29596 let or_expr =
29597 Expression::Or(Box::new(BinaryOp::new(eq_zero, neq_zero)));
29598 Expression::Paren(Box::new(Paren {
29599 this: or_expr,
29600 trailing_comments: Vec::new(),
29601 }))
29602 };
29603
29604 match target {
29605 DialectType::Trino | DialectType::Presto | DialectType::Athena => {
29606 Ok(make_func("ANY_MATCH", vec![arr, lambda]))
29607 }
29608 DialectType::ClickHouse => {
29609 // (LENGTH(arr) = 0 OR LENGTH(arrayFilter(x -> pred, arr)) <> 0)
29610 // ClickHouse arrayFilter takes lambda first, then array
29611 let filter_expr =
29612 make_func("arrayFilter", vec![lambda, arr.clone()]);
29613 Ok(build_filter_pattern("LENGTH", vec![], filter_expr))
29614 }
29615 DialectType::Databricks | DialectType::Spark => {
29616 // (SIZE(arr) = 0 OR SIZE(FILTER(arr, x -> pred)) <> 0)
29617 let filter_expr =
29618 make_func("FILTER", vec![arr.clone(), lambda]);
29619 Ok(build_filter_pattern("SIZE", vec![], filter_expr))
29620 }
29621 DialectType::DuckDB => {
29622 // (ARRAY_LENGTH(arr) = 0 OR ARRAY_LENGTH(LIST_FILTER(arr, x -> pred)) <> 0)
29623 let filter_expr =
29624 make_func("LIST_FILTER", vec![arr.clone(), lambda]);
29625 Ok(build_filter_pattern("ARRAY_LENGTH", vec![], filter_expr))
29626 }
29627 DialectType::Teradata => {
29628 // (CARDINALITY(arr) = 0 OR CARDINALITY(FILTER(arr, x -> pred)) <> 0)
29629 let filter_expr =
29630 make_func("FILTER", vec![arr.clone(), lambda]);
29631 Ok(build_filter_pattern("CARDINALITY", vec![], filter_expr))
29632 }
29633 DialectType::BigQuery => {
29634 // (ARRAY_LENGTH(arr) = 0 OR ARRAY_LENGTH(ARRAY(SELECT x FROM UNNEST(arr) AS x WHERE pred)) <> 0)
29635 // Build: SELECT x FROM UNNEST(arr) AS x WHERE pred
29636 let param_col = Expression::column(¶m_name);
29637 let unnest_expr = Expression::Unnest(Box::new(
29638 crate::expressions::UnnestFunc {
29639 this: arr.clone(),
29640 expressions: vec![],
29641 with_ordinality: false,
29642 alias: Some(Identifier::new(¶m_name)),
29643 offset_alias: None,
29644 },
29645 ));
29646 let mut sel = crate::expressions::Select::default();
29647 sel.expressions = vec![param_col];
29648 sel.from = Some(crate::expressions::From {
29649 expressions: vec![unnest_expr],
29650 });
29651 sel.where_clause =
29652 Some(crate::expressions::Where { this: pred_body });
29653 let array_subquery =
29654 make_func("ARRAY", vec![Expression::Select(Box::new(sel))]);
29655 Ok(build_filter_pattern("ARRAY_LENGTH", vec![], array_subquery))
29656 }
29657 DialectType::PostgreSQL => {
29658 // (ARRAY_LENGTH(arr, 1) = 0 OR ARRAY_LENGTH(ARRAY(SELECT x FROM UNNEST(arr) AS _t0(x) WHERE pred), 1) <> 0)
29659 // Build: SELECT x FROM UNNEST(arr) AS _t0(x) WHERE pred
29660 let param_col = Expression::column(¶m_name);
29661 // For PostgreSQL, UNNEST uses AS _t0(x) syntax - use TableAlias
29662 let unnest_with_alias =
29663 Expression::Alias(Box::new(crate::expressions::Alias {
29664 this: Expression::Unnest(Box::new(
29665 crate::expressions::UnnestFunc {
29666 this: arr.clone(),
29667 expressions: vec![],
29668 with_ordinality: false,
29669 alias: None,
29670 offset_alias: None,
29671 },
29672 )),
29673 alias: Identifier::new("_t0"),
29674 column_aliases: vec![Identifier::new(¶m_name)],
29675 alias_explicit_as: false,
29676 alias_keyword: None,
29677 pre_alias_comments: Vec::new(),
29678 trailing_comments: Vec::new(),
29679 inferred_type: None,
29680 }));
29681 let mut sel = crate::expressions::Select::default();
29682 sel.expressions = vec![param_col];
29683 sel.from = Some(crate::expressions::From {
29684 expressions: vec![unnest_with_alias],
29685 });
29686 sel.where_clause =
29687 Some(crate::expressions::Where { this: pred_body });
29688 let array_subquery =
29689 make_func("ARRAY", vec![Expression::Select(Box::new(sel))]);
29690 Ok(build_filter_pattern(
29691 "ARRAY_LENGTH",
29692 vec![Expression::number(1)],
29693 array_subquery,
29694 ))
29695 }
29696 _ => Ok(Expression::Function(Box::new(Function::new(
29697 "ARRAY_ANY".to_string(),
29698 vec![arr, lambda],
29699 )))),
29700 }
29701 } else {
29702 Ok(Expression::Function(Box::new(Function::new(
29703 "ARRAY_ANY".to_string(),
29704 args,
29705 ))))
29706 }
29707 } else {
29708 Ok(e)
29709 }
29710 }
29711
29712 Action::DecodeSimplify => {
29713 // DECODE(x, search1, result1, ..., default) -> CASE WHEN ... THEN result1 ... [ELSE default] END
29714 // For literal search values: CASE WHEN x = search THEN result
29715 // For NULL search: CASE WHEN x IS NULL THEN result
29716 // For non-literal (column, expr): CASE WHEN x = search OR (x IS NULL AND search IS NULL) THEN result
29717 fn is_decode_literal(e: &Expression) -> bool {
29718 matches!(
29719 e,
29720 Expression::Literal(_) | Expression::Boolean(_) | Expression::Neg(_)
29721 )
29722 }
29723
29724 let build_decode_case =
29725 |this_expr: Expression,
29726 pairs: Vec<(Expression, Expression)>,
29727 default: Option<Expression>| {
29728 let whens: Vec<(Expression, Expression)> = pairs
29729 .into_iter()
29730 .map(|(search, result)| {
29731 if matches!(&search, Expression::Null(_)) {
29732 // NULL search -> IS NULL
29733 let condition = Expression::Is(Box::new(BinaryOp {
29734 left: this_expr.clone(),
29735 right: Expression::Null(crate::expressions::Null),
29736 left_comments: Vec::new(),
29737 operator_comments: Vec::new(),
29738 trailing_comments: Vec::new(),
29739 inferred_type: None,
29740 }));
29741 (condition, result)
29742 } else if is_decode_literal(&search)
29743 || is_decode_literal(&this_expr)
29744 {
29745 // At least one side is a literal -> simple equality (no NULL check needed)
29746 let eq = Expression::Eq(Box::new(BinaryOp {
29747 left: this_expr.clone(),
29748 right: search,
29749 left_comments: Vec::new(),
29750 operator_comments: Vec::new(),
29751 trailing_comments: Vec::new(),
29752 inferred_type: None,
29753 }));
29754 (eq, result)
29755 } else {
29756 // Non-literal -> null-safe comparison
29757 let needs_paren = matches!(
29758 &search,
29759 Expression::Eq(_)
29760 | Expression::Neq(_)
29761 | Expression::Gt(_)
29762 | Expression::Gte(_)
29763 | Expression::Lt(_)
29764 | Expression::Lte(_)
29765 );
29766 let search_ref = if needs_paren {
29767 Expression::Paren(Box::new(crate::expressions::Paren {
29768 this: search.clone(),
29769 trailing_comments: Vec::new(),
29770 }))
29771 } else {
29772 search.clone()
29773 };
29774 // Build: x = search OR (x IS NULL AND search IS NULL)
29775 let eq = Expression::Eq(Box::new(BinaryOp {
29776 left: this_expr.clone(),
29777 right: search_ref,
29778 left_comments: Vec::new(),
29779 operator_comments: Vec::new(),
29780 trailing_comments: Vec::new(),
29781 inferred_type: None,
29782 }));
29783 let search_in_null = if needs_paren {
29784 Expression::Paren(Box::new(crate::expressions::Paren {
29785 this: search.clone(),
29786 trailing_comments: Vec::new(),
29787 }))
29788 } else {
29789 search.clone()
29790 };
29791 let x_is_null = Expression::Is(Box::new(BinaryOp {
29792 left: this_expr.clone(),
29793 right: Expression::Null(crate::expressions::Null),
29794 left_comments: Vec::new(),
29795 operator_comments: Vec::new(),
29796 trailing_comments: Vec::new(),
29797 inferred_type: None,
29798 }));
29799 let search_is_null = Expression::Is(Box::new(BinaryOp {
29800 left: search_in_null,
29801 right: Expression::Null(crate::expressions::Null),
29802 left_comments: Vec::new(),
29803 operator_comments: Vec::new(),
29804 trailing_comments: Vec::new(),
29805 inferred_type: None,
29806 }));
29807 let both_null = Expression::And(Box::new(BinaryOp {
29808 left: x_is_null,
29809 right: search_is_null,
29810 left_comments: Vec::new(),
29811 operator_comments: Vec::new(),
29812 trailing_comments: Vec::new(),
29813 inferred_type: None,
29814 }));
29815 let condition = Expression::Or(Box::new(BinaryOp {
29816 left: eq,
29817 right: Expression::Paren(Box::new(
29818 crate::expressions::Paren {
29819 this: both_null,
29820 trailing_comments: Vec::new(),
29821 },
29822 )),
29823 left_comments: Vec::new(),
29824 operator_comments: Vec::new(),
29825 trailing_comments: Vec::new(),
29826 inferred_type: None,
29827 }));
29828 (condition, result)
29829 }
29830 })
29831 .collect();
29832 Expression::Case(Box::new(Case {
29833 operand: None,
29834 whens,
29835 else_: default,
29836 comments: Vec::new(),
29837 inferred_type: None,
29838 }))
29839 };
29840
29841 if let Expression::Decode(decode) = e {
29842 Ok(build_decode_case(
29843 decode.this,
29844 decode.search_results,
29845 decode.default,
29846 ))
29847 } else if let Expression::DecodeCase(dc) = e {
29848 // DecodeCase has flat expressions: [x, s1, r1, s2, r2, ..., default?]
29849 let mut exprs = dc.expressions;
29850 if exprs.len() < 3 {
29851 return Ok(Expression::DecodeCase(Box::new(
29852 crate::expressions::DecodeCase { expressions: exprs },
29853 )));
29854 }
29855 let this_expr = exprs.remove(0);
29856 let mut pairs = Vec::new();
29857 let mut default = None;
29858 let mut i = 0;
29859 while i + 1 < exprs.len() {
29860 pairs.push((exprs[i].clone(), exprs[i + 1].clone()));
29861 i += 2;
29862 }
29863 if i < exprs.len() {
29864 // Odd remaining element is the default
29865 default = Some(exprs[i].clone());
29866 }
29867 Ok(build_decode_case(this_expr, pairs, default))
29868 } else {
29869 Ok(e)
29870 }
29871 }
29872
29873 Action::CreateTableLikeToCtas => {
29874 // CREATE TABLE a LIKE b -> CREATE TABLE a AS SELECT * FROM b LIMIT 0
29875 if let Expression::CreateTable(ct) = e {
29876 let like_source = ct.constraints.iter().find_map(|c| {
29877 if let crate::expressions::TableConstraint::Like { source, .. } = c {
29878 Some(source.clone())
29879 } else {
29880 None
29881 }
29882 });
29883 if let Some(source_table) = like_source {
29884 let mut new_ct = *ct;
29885 new_ct.constraints.clear();
29886 // Build: SELECT * FROM b LIMIT 0
29887 let select = Expression::Select(Box::new(crate::expressions::Select {
29888 expressions: vec![Expression::Star(crate::expressions::Star {
29889 table: None,
29890 except: None,
29891 replace: None,
29892 rename: None,
29893 trailing_comments: Vec::new(),
29894 span: None,
29895 })],
29896 from: Some(crate::expressions::From {
29897 expressions: vec![Expression::Table(Box::new(source_table))],
29898 }),
29899 limit: Some(crate::expressions::Limit {
29900 this: Expression::Literal(Box::new(Literal::Number(
29901 "0".to_string(),
29902 ))),
29903 percent: false,
29904 comments: Vec::new(),
29905 }),
29906 ..Default::default()
29907 }));
29908 new_ct.as_select = Some(select);
29909 Ok(Expression::CreateTable(Box::new(new_ct)))
29910 } else {
29911 Ok(Expression::CreateTable(ct))
29912 }
29913 } else {
29914 Ok(e)
29915 }
29916 }
29917
29918 Action::CreateTableLikeToSelectInto => {
29919 // CREATE TABLE a LIKE b -> SELECT TOP 0 * INTO a FROM b AS temp
29920 if let Expression::CreateTable(ct) = e {
29921 let like_source = ct.constraints.iter().find_map(|c| {
29922 if let crate::expressions::TableConstraint::Like { source, .. } = c {
29923 Some(source.clone())
29924 } else {
29925 None
29926 }
29927 });
29928 if let Some(source_table) = like_source {
29929 let mut aliased_source = source_table;
29930 aliased_source.alias = Some(Identifier::new("temp"));
29931 // Build: SELECT TOP 0 * INTO a FROM b AS temp
29932 let select = Expression::Select(Box::new(crate::expressions::Select {
29933 expressions: vec![Expression::Star(crate::expressions::Star {
29934 table: None,
29935 except: None,
29936 replace: None,
29937 rename: None,
29938 trailing_comments: Vec::new(),
29939 span: None,
29940 })],
29941 from: Some(crate::expressions::From {
29942 expressions: vec![Expression::Table(Box::new(aliased_source))],
29943 }),
29944 into: Some(crate::expressions::SelectInto {
29945 this: Expression::Table(Box::new(ct.name.clone())),
29946 temporary: false,
29947 unlogged: false,
29948 bulk_collect: false,
29949 expressions: Vec::new(),
29950 }),
29951 top: Some(crate::expressions::Top {
29952 this: Expression::Literal(Box::new(Literal::Number(
29953 "0".to_string(),
29954 ))),
29955 percent: false,
29956 with_ties: false,
29957 parenthesized: false,
29958 }),
29959 ..Default::default()
29960 }));
29961 Ok(select)
29962 } else {
29963 Ok(Expression::CreateTable(ct))
29964 }
29965 } else {
29966 Ok(e)
29967 }
29968 }
29969
29970 Action::CreateTableLikeToAs => {
29971 // CREATE TABLE a LIKE b -> CREATE TABLE a AS b (ClickHouse)
29972 if let Expression::CreateTable(ct) = e {
29973 let like_source = ct.constraints.iter().find_map(|c| {
29974 if let crate::expressions::TableConstraint::Like { source, .. } = c {
29975 Some(source.clone())
29976 } else {
29977 None
29978 }
29979 });
29980 if let Some(source_table) = like_source {
29981 let mut new_ct = *ct;
29982 new_ct.constraints.clear();
29983 // AS b (just a table reference, not a SELECT)
29984 new_ct.as_select = Some(Expression::Table(Box::new(source_table)));
29985 Ok(Expression::CreateTable(Box::new(new_ct)))
29986 } else {
29987 Ok(Expression::CreateTable(ct))
29988 }
29989 } else {
29990 Ok(e)
29991 }
29992 }
29993
29994 Action::TsOrDsToDateConvert => {
29995 // TS_OR_DS_TO_DATE(x[, fmt]) -> dialect-specific date conversion
29996 if let Expression::Function(f) = e {
29997 let mut args = f.args;
29998 let this = args.remove(0);
29999 let fmt = if !args.is_empty() {
30000 match &args[0] {
30001 Expression::Literal(lit)
30002 if matches!(lit.as_ref(), Literal::String(_)) =>
30003 {
30004 let Literal::String(s) = lit.as_ref() else {
30005 unreachable!()
30006 };
30007 Some(s.clone())
30008 }
30009 _ => None,
30010 }
30011 } else {
30012 None
30013 };
30014 Ok(Expression::TsOrDsToDate(Box::new(
30015 crate::expressions::TsOrDsToDate {
30016 this: Box::new(this),
30017 format: fmt,
30018 safe: None,
30019 },
30020 )))
30021 } else {
30022 Ok(e)
30023 }
30024 }
30025
30026 Action::TsOrDsToDateStrConvert => {
30027 // TS_OR_DS_TO_DATE_STR(x) -> SUBSTRING(CAST(x AS type), 1, 10)
30028 if let Expression::Function(f) = e {
30029 let arg = f.args.into_iter().next().unwrap();
30030 let str_type = match target {
30031 DialectType::DuckDB
30032 | DialectType::PostgreSQL
30033 | DialectType::Materialize => DataType::Text,
30034 DialectType::Hive | DialectType::Spark | DialectType::Databricks => {
30035 DataType::Custom {
30036 name: "STRING".to_string(),
30037 }
30038 }
30039 DialectType::Presto
30040 | DialectType::Trino
30041 | DialectType::Athena
30042 | DialectType::Drill => DataType::VarChar {
30043 length: None,
30044 parenthesized_length: false,
30045 },
30046 DialectType::MySQL | DialectType::Doris | DialectType::StarRocks => {
30047 DataType::Custom {
30048 name: "STRING".to_string(),
30049 }
30050 }
30051 _ => DataType::VarChar {
30052 length: None,
30053 parenthesized_length: false,
30054 },
30055 };
30056 let cast_expr = Expression::Cast(Box::new(Cast {
30057 this: arg,
30058 to: str_type,
30059 double_colon_syntax: false,
30060 trailing_comments: Vec::new(),
30061 format: None,
30062 default: None,
30063 inferred_type: None,
30064 }));
30065 Ok(Expression::Substring(Box::new(
30066 crate::expressions::SubstringFunc {
30067 this: cast_expr,
30068 start: Expression::number(1),
30069 length: Some(Expression::number(10)),
30070 from_for_syntax: false,
30071 },
30072 )))
30073 } else {
30074 Ok(e)
30075 }
30076 }
30077
30078 Action::DateStrToDateConvert => {
30079 // DATE_STR_TO_DATE(x) -> dialect-specific
30080 if let Expression::Function(f) = e {
30081 let arg = f.args.into_iter().next().unwrap();
30082 match target {
30083 DialectType::SQLite => {
30084 // SQLite: just the bare expression (dates are strings)
30085 Ok(arg)
30086 }
30087 _ => Ok(Expression::Cast(Box::new(Cast {
30088 this: arg,
30089 to: DataType::Date,
30090 double_colon_syntax: false,
30091 trailing_comments: Vec::new(),
30092 format: None,
30093 default: None,
30094 inferred_type: None,
30095 }))),
30096 }
30097 } else {
30098 Ok(e)
30099 }
30100 }
30101
30102 Action::TimeStrToDateConvert => {
30103 // TIME_STR_TO_DATE(x) -> dialect-specific
30104 if let Expression::Function(f) = e {
30105 let arg = f.args.into_iter().next().unwrap();
30106 match target {
30107 DialectType::Hive
30108 | DialectType::Doris
30109 | DialectType::StarRocks
30110 | DialectType::Snowflake => Ok(Expression::Function(Box::new(
30111 Function::new("TO_DATE".to_string(), vec![arg]),
30112 ))),
30113 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
30114 // Presto: CAST(x AS TIMESTAMP)
30115 Ok(Expression::Cast(Box::new(Cast {
30116 this: arg,
30117 to: DataType::Timestamp {
30118 timezone: false,
30119 precision: None,
30120 },
30121 double_colon_syntax: false,
30122 trailing_comments: Vec::new(),
30123 format: None,
30124 default: None,
30125 inferred_type: None,
30126 })))
30127 }
30128 _ => {
30129 // Default: CAST(x AS DATE)
30130 Ok(Expression::Cast(Box::new(Cast {
30131 this: arg,
30132 to: DataType::Date,
30133 double_colon_syntax: false,
30134 trailing_comments: Vec::new(),
30135 format: None,
30136 default: None,
30137 inferred_type: None,
30138 })))
30139 }
30140 }
30141 } else {
30142 Ok(e)
30143 }
30144 }
30145
30146 Action::TimeStrToTimeConvert => {
30147 // TIME_STR_TO_TIME(x[, zone]) -> dialect-specific CAST to timestamp type
30148 if let Expression::Function(f) = e {
30149 let mut args = f.args;
30150 let this = args.remove(0);
30151 let zone = if !args.is_empty() {
30152 match &args[0] {
30153 Expression::Literal(lit)
30154 if matches!(lit.as_ref(), Literal::String(_)) =>
30155 {
30156 let Literal::String(s) = lit.as_ref() else {
30157 unreachable!()
30158 };
30159 Some(s.clone())
30160 }
30161 _ => None,
30162 }
30163 } else {
30164 None
30165 };
30166 let has_zone = zone.is_some();
30167
30168 match target {
30169 DialectType::SQLite => {
30170 // SQLite: just the bare expression
30171 Ok(this)
30172 }
30173 DialectType::MySQL => {
30174 if has_zone {
30175 // MySQL with zone: TIMESTAMP(x)
30176 Ok(Expression::Function(Box::new(Function::new(
30177 "TIMESTAMP".to_string(),
30178 vec![this],
30179 ))))
30180 } else {
30181 // MySQL: CAST(x AS DATETIME) or with precision
30182 // Use DataType::Custom to avoid MySQL's transform_cast converting
30183 // CAST(x AS TIMESTAMP) -> TIMESTAMP(x)
30184 let precision = if let Expression::Literal(ref lit) = this {
30185 if let Literal::String(ref s) = lit.as_ref() {
30186 if let Some(dot_pos) = s.rfind('.') {
30187 let frac = &s[dot_pos + 1..];
30188 let digit_count = frac
30189 .chars()
30190 .take_while(|c| c.is_ascii_digit())
30191 .count();
30192 if digit_count > 0 {
30193 Some(digit_count)
30194 } else {
30195 None
30196 }
30197 } else {
30198 None
30199 }
30200 } else {
30201 None
30202 }
30203 } else {
30204 None
30205 };
30206 let type_name = match precision {
30207 Some(p) => format!("DATETIME({})", p),
30208 None => "DATETIME".to_string(),
30209 };
30210 Ok(Expression::Cast(Box::new(Cast {
30211 this,
30212 to: DataType::Custom { name: type_name },
30213 double_colon_syntax: false,
30214 trailing_comments: Vec::new(),
30215 format: None,
30216 default: None,
30217 inferred_type: None,
30218 })))
30219 }
30220 }
30221 DialectType::ClickHouse => {
30222 if has_zone {
30223 // ClickHouse with zone: CAST(x AS DateTime64(6, 'zone'))
30224 // We need to strip the timezone offset from the literal if present
30225 let clean_this = if let Expression::Literal(ref lit) = this {
30226 if let Literal::String(ref s) = lit.as_ref() {
30227 // Strip timezone offset like "-08:00" or "+00:00"
30228 let re_offset = s.rfind(|c: char| c == '+' || c == '-');
30229 if let Some(offset_pos) = re_offset {
30230 if offset_pos > 10 {
30231 // After the date part
30232 let trimmed = s[..offset_pos].to_string();
30233 Expression::Literal(Box::new(Literal::String(
30234 trimmed,
30235 )))
30236 } else {
30237 this.clone()
30238 }
30239 } else {
30240 this.clone()
30241 }
30242 } else {
30243 this.clone()
30244 }
30245 } else {
30246 this.clone()
30247 };
30248 let zone_str = zone.unwrap();
30249 // Build: CAST(x AS DateTime64(6, 'zone'))
30250 let type_name = format!("DateTime64(6, '{}')", zone_str);
30251 Ok(Expression::Cast(Box::new(Cast {
30252 this: clean_this,
30253 to: DataType::Custom { name: type_name },
30254 double_colon_syntax: false,
30255 trailing_comments: Vec::new(),
30256 format: None,
30257 default: None,
30258 inferred_type: None,
30259 })))
30260 } else {
30261 Ok(Expression::Cast(Box::new(Cast {
30262 this,
30263 to: DataType::Custom {
30264 name: "DateTime64(6)".to_string(),
30265 },
30266 double_colon_syntax: false,
30267 trailing_comments: Vec::new(),
30268 format: None,
30269 default: None,
30270 inferred_type: None,
30271 })))
30272 }
30273 }
30274 DialectType::BigQuery => {
30275 if has_zone {
30276 // BigQuery with zone: CAST(x AS TIMESTAMP)
30277 Ok(Expression::Cast(Box::new(Cast {
30278 this,
30279 to: DataType::Timestamp {
30280 timezone: false,
30281 precision: None,
30282 },
30283 double_colon_syntax: false,
30284 trailing_comments: Vec::new(),
30285 format: None,
30286 default: None,
30287 inferred_type: None,
30288 })))
30289 } else {
30290 // BigQuery: CAST(x AS DATETIME) - Timestamp{tz:false} renders as DATETIME for BigQuery
30291 Ok(Expression::Cast(Box::new(Cast {
30292 this,
30293 to: DataType::Custom {
30294 name: "DATETIME".to_string(),
30295 },
30296 double_colon_syntax: false,
30297 trailing_comments: Vec::new(),
30298 format: None,
30299 default: None,
30300 inferred_type: None,
30301 })))
30302 }
30303 }
30304 DialectType::Doris => {
30305 // Doris: CAST(x AS DATETIME)
30306 Ok(Expression::Cast(Box::new(Cast {
30307 this,
30308 to: DataType::Custom {
30309 name: "DATETIME".to_string(),
30310 },
30311 double_colon_syntax: false,
30312 trailing_comments: Vec::new(),
30313 format: None,
30314 default: None,
30315 inferred_type: None,
30316 })))
30317 }
30318 DialectType::TSQL | DialectType::Fabric => {
30319 if has_zone {
30320 // TSQL with zone: CAST(x AS DATETIMEOFFSET) AT TIME ZONE 'UTC'
30321 let cast_expr = Expression::Cast(Box::new(Cast {
30322 this,
30323 to: DataType::Custom {
30324 name: "DATETIMEOFFSET".to_string(),
30325 },
30326 double_colon_syntax: false,
30327 trailing_comments: Vec::new(),
30328 format: None,
30329 default: None,
30330 inferred_type: None,
30331 }));
30332 Ok(Expression::AtTimeZone(Box::new(
30333 crate::expressions::AtTimeZone {
30334 this: cast_expr,
30335 zone: Expression::Literal(Box::new(Literal::String(
30336 "UTC".to_string(),
30337 ))),
30338 },
30339 )))
30340 } else {
30341 // TSQL: CAST(x AS DATETIME2)
30342 Ok(Expression::Cast(Box::new(Cast {
30343 this,
30344 to: DataType::Custom {
30345 name: "DATETIME2".to_string(),
30346 },
30347 double_colon_syntax: false,
30348 trailing_comments: Vec::new(),
30349 format: None,
30350 default: None,
30351 inferred_type: None,
30352 })))
30353 }
30354 }
30355 DialectType::DuckDB => {
30356 if has_zone {
30357 // DuckDB with zone: CAST(x AS TIMESTAMPTZ)
30358 Ok(Expression::Cast(Box::new(Cast {
30359 this,
30360 to: DataType::Timestamp {
30361 timezone: true,
30362 precision: None,
30363 },
30364 double_colon_syntax: false,
30365 trailing_comments: Vec::new(),
30366 format: None,
30367 default: None,
30368 inferred_type: None,
30369 })))
30370 } else {
30371 // DuckDB: CAST(x AS TIMESTAMP)
30372 Ok(Expression::Cast(Box::new(Cast {
30373 this,
30374 to: DataType::Timestamp {
30375 timezone: false,
30376 precision: None,
30377 },
30378 double_colon_syntax: false,
30379 trailing_comments: Vec::new(),
30380 format: None,
30381 default: None,
30382 inferred_type: None,
30383 })))
30384 }
30385 }
30386 DialectType::PostgreSQL
30387 | DialectType::Materialize
30388 | DialectType::RisingWave => {
30389 if has_zone {
30390 // PostgreSQL with zone: CAST(x AS TIMESTAMPTZ)
30391 Ok(Expression::Cast(Box::new(Cast {
30392 this,
30393 to: DataType::Timestamp {
30394 timezone: true,
30395 precision: None,
30396 },
30397 double_colon_syntax: false,
30398 trailing_comments: Vec::new(),
30399 format: None,
30400 default: None,
30401 inferred_type: None,
30402 })))
30403 } else {
30404 // PostgreSQL: CAST(x AS TIMESTAMP)
30405 Ok(Expression::Cast(Box::new(Cast {
30406 this,
30407 to: DataType::Timestamp {
30408 timezone: false,
30409 precision: None,
30410 },
30411 double_colon_syntax: false,
30412 trailing_comments: Vec::new(),
30413 format: None,
30414 default: None,
30415 inferred_type: None,
30416 })))
30417 }
30418 }
30419 DialectType::Snowflake => {
30420 if has_zone {
30421 // Snowflake with zone: CAST(x AS TIMESTAMPTZ)
30422 Ok(Expression::Cast(Box::new(Cast {
30423 this,
30424 to: DataType::Timestamp {
30425 timezone: true,
30426 precision: None,
30427 },
30428 double_colon_syntax: false,
30429 trailing_comments: Vec::new(),
30430 format: None,
30431 default: None,
30432 inferred_type: None,
30433 })))
30434 } else {
30435 // Snowflake: CAST(x AS TIMESTAMP)
30436 Ok(Expression::Cast(Box::new(Cast {
30437 this,
30438 to: DataType::Timestamp {
30439 timezone: false,
30440 precision: None,
30441 },
30442 double_colon_syntax: false,
30443 trailing_comments: Vec::new(),
30444 format: None,
30445 default: None,
30446 inferred_type: None,
30447 })))
30448 }
30449 }
30450 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
30451 if has_zone {
30452 // Presto/Trino with zone: CAST(x AS TIMESTAMP WITH TIME ZONE)
30453 // Check for precision from sub-second digits
30454 let precision = if let Expression::Literal(ref lit) = this {
30455 if let Literal::String(ref s) = lit.as_ref() {
30456 if let Some(dot_pos) = s.rfind('.') {
30457 let frac = &s[dot_pos + 1..];
30458 let digit_count = frac
30459 .chars()
30460 .take_while(|c| c.is_ascii_digit())
30461 .count();
30462 if digit_count > 0
30463 && matches!(target, DialectType::Trino)
30464 {
30465 Some(digit_count as u32)
30466 } else {
30467 None
30468 }
30469 } else {
30470 None
30471 }
30472 } else {
30473 None
30474 }
30475 } else {
30476 None
30477 };
30478 let dt = if let Some(prec) = precision {
30479 DataType::Timestamp {
30480 timezone: true,
30481 precision: Some(prec),
30482 }
30483 } else {
30484 DataType::Timestamp {
30485 timezone: true,
30486 precision: None,
30487 }
30488 };
30489 Ok(Expression::Cast(Box::new(Cast {
30490 this,
30491 to: dt,
30492 double_colon_syntax: false,
30493 trailing_comments: Vec::new(),
30494 format: None,
30495 default: None,
30496 inferred_type: None,
30497 })))
30498 } else {
30499 // Check for sub-second precision for Trino
30500 let precision = if let Expression::Literal(ref lit) = this {
30501 if let Literal::String(ref s) = lit.as_ref() {
30502 if let Some(dot_pos) = s.rfind('.') {
30503 let frac = &s[dot_pos + 1..];
30504 let digit_count = frac
30505 .chars()
30506 .take_while(|c| c.is_ascii_digit())
30507 .count();
30508 if digit_count > 0
30509 && matches!(target, DialectType::Trino)
30510 {
30511 Some(digit_count as u32)
30512 } else {
30513 None
30514 }
30515 } else {
30516 None
30517 }
30518 } else {
30519 None
30520 }
30521 } else {
30522 None
30523 };
30524 let dt = DataType::Timestamp {
30525 timezone: false,
30526 precision,
30527 };
30528 Ok(Expression::Cast(Box::new(Cast {
30529 this,
30530 to: dt,
30531 double_colon_syntax: false,
30532 trailing_comments: Vec::new(),
30533 format: None,
30534 default: None,
30535 inferred_type: None,
30536 })))
30537 }
30538 }
30539 DialectType::Redshift => {
30540 if has_zone {
30541 // Redshift with zone: CAST(x AS TIMESTAMP WITH TIME ZONE)
30542 Ok(Expression::Cast(Box::new(Cast {
30543 this,
30544 to: DataType::Timestamp {
30545 timezone: true,
30546 precision: None,
30547 },
30548 double_colon_syntax: false,
30549 trailing_comments: Vec::new(),
30550 format: None,
30551 default: None,
30552 inferred_type: None,
30553 })))
30554 } else {
30555 // Redshift: CAST(x AS TIMESTAMP)
30556 Ok(Expression::Cast(Box::new(Cast {
30557 this,
30558 to: DataType::Timestamp {
30559 timezone: false,
30560 precision: None,
30561 },
30562 double_colon_syntax: false,
30563 trailing_comments: Vec::new(),
30564 format: None,
30565 default: None,
30566 inferred_type: None,
30567 })))
30568 }
30569 }
30570 _ => {
30571 // Default: CAST(x AS TIMESTAMP)
30572 Ok(Expression::Cast(Box::new(Cast {
30573 this,
30574 to: DataType::Timestamp {
30575 timezone: false,
30576 precision: None,
30577 },
30578 double_colon_syntax: false,
30579 trailing_comments: Vec::new(),
30580 format: None,
30581 default: None,
30582 inferred_type: None,
30583 })))
30584 }
30585 }
30586 } else {
30587 Ok(e)
30588 }
30589 }
30590
30591 Action::DateToDateStrConvert => {
30592 // DATE_TO_DATE_STR(x) -> CAST(x AS text_type) per dialect
30593 if let Expression::Function(f) = e {
30594 let arg = f.args.into_iter().next().unwrap();
30595 let str_type = match target {
30596 DialectType::DuckDB => DataType::Text,
30597 DialectType::Hive | DialectType::Spark | DialectType::Databricks => {
30598 DataType::Custom {
30599 name: "STRING".to_string(),
30600 }
30601 }
30602 DialectType::Presto
30603 | DialectType::Trino
30604 | DialectType::Athena
30605 | DialectType::Drill => DataType::VarChar {
30606 length: None,
30607 parenthesized_length: false,
30608 },
30609 _ => DataType::VarChar {
30610 length: None,
30611 parenthesized_length: false,
30612 },
30613 };
30614 Ok(Expression::Cast(Box::new(Cast {
30615 this: arg,
30616 to: str_type,
30617 double_colon_syntax: false,
30618 trailing_comments: Vec::new(),
30619 format: None,
30620 default: None,
30621 inferred_type: None,
30622 })))
30623 } else {
30624 Ok(e)
30625 }
30626 }
30627
30628 Action::DateToDiConvert => {
30629 // DATE_TO_DI(x) -> CAST(format_func(x, fmt) AS INT)
30630 if let Expression::Function(f) = e {
30631 let arg = f.args.into_iter().next().unwrap();
30632 let inner = match target {
30633 DialectType::DuckDB => {
30634 // STRFTIME(x, '%Y%m%d')
30635 Expression::Function(Box::new(Function::new(
30636 "STRFTIME".to_string(),
30637 vec![arg, Expression::string("%Y%m%d")],
30638 )))
30639 }
30640 DialectType::Hive | DialectType::Spark | DialectType::Databricks => {
30641 // DATE_FORMAT(x, 'yyyyMMdd')
30642 Expression::Function(Box::new(Function::new(
30643 "DATE_FORMAT".to_string(),
30644 vec![arg, Expression::string("yyyyMMdd")],
30645 )))
30646 }
30647 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
30648 // DATE_FORMAT(x, '%Y%m%d')
30649 Expression::Function(Box::new(Function::new(
30650 "DATE_FORMAT".to_string(),
30651 vec![arg, Expression::string("%Y%m%d")],
30652 )))
30653 }
30654 DialectType::Drill => {
30655 // TO_DATE(x, 'yyyyMMdd')
30656 Expression::Function(Box::new(Function::new(
30657 "TO_DATE".to_string(),
30658 vec![arg, Expression::string("yyyyMMdd")],
30659 )))
30660 }
30661 _ => {
30662 // Default: STRFTIME(x, '%Y%m%d')
30663 Expression::Function(Box::new(Function::new(
30664 "STRFTIME".to_string(),
30665 vec![arg, Expression::string("%Y%m%d")],
30666 )))
30667 }
30668 };
30669 // Use INT (not INTEGER) for Presto/Trino
30670 let int_type = match target {
30671 DialectType::Presto
30672 | DialectType::Trino
30673 | DialectType::Athena
30674 | DialectType::TSQL
30675 | DialectType::Fabric
30676 | DialectType::SQLite
30677 | DialectType::Redshift => DataType::Custom {
30678 name: "INT".to_string(),
30679 },
30680 _ => DataType::Int {
30681 length: None,
30682 integer_spelling: false,
30683 },
30684 };
30685 Ok(Expression::Cast(Box::new(Cast {
30686 this: inner,
30687 to: int_type,
30688 double_colon_syntax: false,
30689 trailing_comments: Vec::new(),
30690 format: None,
30691 default: None,
30692 inferred_type: None,
30693 })))
30694 } else {
30695 Ok(e)
30696 }
30697 }
30698
30699 Action::DiToDateConvert => {
30700 // DI_TO_DATE(x) -> dialect-specific integer-to-date conversion
30701 if let Expression::Function(f) = e {
30702 let arg = f.args.into_iter().next().unwrap();
30703 match target {
30704 DialectType::DuckDB => {
30705 // CAST(STRPTIME(CAST(x AS TEXT), '%Y%m%d') AS DATE)
30706 let cast_text = Expression::Cast(Box::new(Cast {
30707 this: arg,
30708 to: DataType::Text,
30709 double_colon_syntax: false,
30710 trailing_comments: Vec::new(),
30711 format: None,
30712 default: None,
30713 inferred_type: None,
30714 }));
30715 let strptime = Expression::Function(Box::new(Function::new(
30716 "STRPTIME".to_string(),
30717 vec![cast_text, Expression::string("%Y%m%d")],
30718 )));
30719 Ok(Expression::Cast(Box::new(Cast {
30720 this: strptime,
30721 to: DataType::Date,
30722 double_colon_syntax: false,
30723 trailing_comments: Vec::new(),
30724 format: None,
30725 default: None,
30726 inferred_type: None,
30727 })))
30728 }
30729 DialectType::Hive | DialectType::Spark | DialectType::Databricks => {
30730 // TO_DATE(CAST(x AS STRING), 'yyyyMMdd')
30731 let cast_str = Expression::Cast(Box::new(Cast {
30732 this: arg,
30733 to: DataType::Custom {
30734 name: "STRING".to_string(),
30735 },
30736 double_colon_syntax: false,
30737 trailing_comments: Vec::new(),
30738 format: None,
30739 default: None,
30740 inferred_type: None,
30741 }));
30742 Ok(Expression::Function(Box::new(Function::new(
30743 "TO_DATE".to_string(),
30744 vec![cast_str, Expression::string("yyyyMMdd")],
30745 ))))
30746 }
30747 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
30748 // CAST(DATE_PARSE(CAST(x AS VARCHAR), '%Y%m%d') AS DATE)
30749 let cast_varchar = Expression::Cast(Box::new(Cast {
30750 this: arg,
30751 to: DataType::VarChar {
30752 length: None,
30753 parenthesized_length: false,
30754 },
30755 double_colon_syntax: false,
30756 trailing_comments: Vec::new(),
30757 format: None,
30758 default: None,
30759 inferred_type: None,
30760 }));
30761 let date_parse = Expression::Function(Box::new(Function::new(
30762 "DATE_PARSE".to_string(),
30763 vec![cast_varchar, Expression::string("%Y%m%d")],
30764 )));
30765 Ok(Expression::Cast(Box::new(Cast {
30766 this: date_parse,
30767 to: DataType::Date,
30768 double_colon_syntax: false,
30769 trailing_comments: Vec::new(),
30770 format: None,
30771 default: None,
30772 inferred_type: None,
30773 })))
30774 }
30775 DialectType::Drill => {
30776 // TO_DATE(CAST(x AS VARCHAR), 'yyyyMMdd')
30777 let cast_varchar = Expression::Cast(Box::new(Cast {
30778 this: arg,
30779 to: DataType::VarChar {
30780 length: None,
30781 parenthesized_length: false,
30782 },
30783 double_colon_syntax: false,
30784 trailing_comments: Vec::new(),
30785 format: None,
30786 default: None,
30787 inferred_type: None,
30788 }));
30789 Ok(Expression::Function(Box::new(Function::new(
30790 "TO_DATE".to_string(),
30791 vec![cast_varchar, Expression::string("yyyyMMdd")],
30792 ))))
30793 }
30794 _ => Ok(Expression::Function(Box::new(Function::new(
30795 "DI_TO_DATE".to_string(),
30796 vec![arg],
30797 )))),
30798 }
30799 } else {
30800 Ok(e)
30801 }
30802 }
30803
30804 Action::TsOrDiToDiConvert => {
30805 // TS_OR_DI_TO_DI(x) -> CAST(SUBSTR(REPLACE(CAST(x AS type), '-', ''), 1, 8) AS INT)
30806 if let Expression::Function(f) = e {
30807 let arg = f.args.into_iter().next().unwrap();
30808 let str_type = match target {
30809 DialectType::DuckDB => DataType::Text,
30810 DialectType::Hive | DialectType::Spark | DialectType::Databricks => {
30811 DataType::Custom {
30812 name: "STRING".to_string(),
30813 }
30814 }
30815 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
30816 DataType::VarChar {
30817 length: None,
30818 parenthesized_length: false,
30819 }
30820 }
30821 _ => DataType::VarChar {
30822 length: None,
30823 parenthesized_length: false,
30824 },
30825 };
30826 let cast_str = Expression::Cast(Box::new(Cast {
30827 this: arg,
30828 to: str_type,
30829 double_colon_syntax: false,
30830 trailing_comments: Vec::new(),
30831 format: None,
30832 default: None,
30833 inferred_type: None,
30834 }));
30835 let replace_expr = Expression::Function(Box::new(Function::new(
30836 "REPLACE".to_string(),
30837 vec![cast_str, Expression::string("-"), Expression::string("")],
30838 )));
30839 let substr_name = match target {
30840 DialectType::DuckDB
30841 | DialectType::Hive
30842 | DialectType::Spark
30843 | DialectType::Databricks => "SUBSTR",
30844 _ => "SUBSTR",
30845 };
30846 let substr = Expression::Function(Box::new(Function::new(
30847 substr_name.to_string(),
30848 vec![replace_expr, Expression::number(1), Expression::number(8)],
30849 )));
30850 // Use INT (not INTEGER) for Presto/Trino etc.
30851 let int_type = match target {
30852 DialectType::Presto
30853 | DialectType::Trino
30854 | DialectType::Athena
30855 | DialectType::TSQL
30856 | DialectType::Fabric
30857 | DialectType::SQLite
30858 | DialectType::Redshift => DataType::Custom {
30859 name: "INT".to_string(),
30860 },
30861 _ => DataType::Int {
30862 length: None,
30863 integer_spelling: false,
30864 },
30865 };
30866 Ok(Expression::Cast(Box::new(Cast {
30867 this: substr,
30868 to: int_type,
30869 double_colon_syntax: false,
30870 trailing_comments: Vec::new(),
30871 format: None,
30872 default: None,
30873 inferred_type: None,
30874 })))
30875 } else {
30876 Ok(e)
30877 }
30878 }
30879
30880 Action::UnixToStrConvert => {
30881 // UNIX_TO_STR(x, fmt) -> convert to Expression::UnixToStr for generator
30882 if let Expression::Function(f) = e {
30883 let mut args = f.args;
30884 let this = args.remove(0);
30885 let fmt_expr = if !args.is_empty() {
30886 Some(args.remove(0))
30887 } else {
30888 None
30889 };
30890
30891 // Check if format is a string literal
30892 let fmt_str = fmt_expr.as_ref().and_then(|f| {
30893 if let Expression::Literal(lit) = f {
30894 if let Literal::String(s) = lit.as_ref() {
30895 Some(s.clone())
30896 } else {
30897 None
30898 }
30899 } else {
30900 None
30901 }
30902 });
30903
30904 if let Some(fmt_string) = fmt_str {
30905 // String literal format -> use UnixToStr expression (generator handles it)
30906 Ok(Expression::UnixToStr(Box::new(
30907 crate::expressions::UnixToStr {
30908 this: Box::new(this),
30909 format: Some(fmt_string),
30910 },
30911 )))
30912 } else if let Some(fmt_e) = fmt_expr {
30913 // Non-literal format (e.g., identifier `y`) -> build target expression directly
30914 match target {
30915 DialectType::DuckDB => {
30916 // STRFTIME(TO_TIMESTAMP(x), y)
30917 let to_ts = Expression::Function(Box::new(Function::new(
30918 "TO_TIMESTAMP".to_string(),
30919 vec![this],
30920 )));
30921 Ok(Expression::Function(Box::new(Function::new(
30922 "STRFTIME".to_string(),
30923 vec![to_ts, fmt_e],
30924 ))))
30925 }
30926 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
30927 // DATE_FORMAT(FROM_UNIXTIME(x), y)
30928 let from_unix = Expression::Function(Box::new(Function::new(
30929 "FROM_UNIXTIME".to_string(),
30930 vec![this],
30931 )));
30932 Ok(Expression::Function(Box::new(Function::new(
30933 "DATE_FORMAT".to_string(),
30934 vec![from_unix, fmt_e],
30935 ))))
30936 }
30937 DialectType::Hive
30938 | DialectType::Spark
30939 | DialectType::Databricks
30940 | DialectType::Doris
30941 | DialectType::StarRocks => {
30942 // FROM_UNIXTIME(x, y)
30943 Ok(Expression::Function(Box::new(Function::new(
30944 "FROM_UNIXTIME".to_string(),
30945 vec![this, fmt_e],
30946 ))))
30947 }
30948 _ => {
30949 // Default: keep as UNIX_TO_STR(x, y)
30950 Ok(Expression::Function(Box::new(Function::new(
30951 "UNIX_TO_STR".to_string(),
30952 vec![this, fmt_e],
30953 ))))
30954 }
30955 }
30956 } else {
30957 Ok(Expression::UnixToStr(Box::new(
30958 crate::expressions::UnixToStr {
30959 this: Box::new(this),
30960 format: None,
30961 },
30962 )))
30963 }
30964 } else {
30965 Ok(e)
30966 }
30967 }
30968
30969 Action::UnixToTimeConvert => {
30970 // UNIX_TO_TIME(x) -> convert to Expression::UnixToTime for generator
30971 if let Expression::Function(f) = e {
30972 let arg = f.args.into_iter().next().unwrap();
30973 Ok(Expression::UnixToTime(Box::new(
30974 crate::expressions::UnixToTime {
30975 this: Box::new(arg),
30976 scale: None,
30977 zone: None,
30978 hours: None,
30979 minutes: None,
30980 format: None,
30981 target_type: None,
30982 },
30983 )))
30984 } else {
30985 Ok(e)
30986 }
30987 }
30988
30989 Action::UnixToTimeStrConvert => {
30990 // UNIX_TO_TIME_STR(x) -> dialect-specific
30991 if let Expression::Function(f) = e {
30992 let arg = f.args.into_iter().next().unwrap();
30993 match target {
30994 DialectType::Hive | DialectType::Spark | DialectType::Databricks => {
30995 // FROM_UNIXTIME(x)
30996 Ok(Expression::Function(Box::new(Function::new(
30997 "FROM_UNIXTIME".to_string(),
30998 vec![arg],
30999 ))))
31000 }
31001 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
31002 // CAST(FROM_UNIXTIME(x) AS VARCHAR)
31003 let from_unix = Expression::Function(Box::new(Function::new(
31004 "FROM_UNIXTIME".to_string(),
31005 vec![arg],
31006 )));
31007 Ok(Expression::Cast(Box::new(Cast {
31008 this: from_unix,
31009 to: DataType::VarChar {
31010 length: None,
31011 parenthesized_length: false,
31012 },
31013 double_colon_syntax: false,
31014 trailing_comments: Vec::new(),
31015 format: None,
31016 default: None,
31017 inferred_type: None,
31018 })))
31019 }
31020 DialectType::DuckDB => {
31021 // CAST(TO_TIMESTAMP(x) AS TEXT)
31022 let to_ts = Expression::Function(Box::new(Function::new(
31023 "TO_TIMESTAMP".to_string(),
31024 vec![arg],
31025 )));
31026 Ok(Expression::Cast(Box::new(Cast {
31027 this: to_ts,
31028 to: DataType::Text,
31029 double_colon_syntax: false,
31030 trailing_comments: Vec::new(),
31031 format: None,
31032 default: None,
31033 inferred_type: None,
31034 })))
31035 }
31036 _ => Ok(Expression::Function(Box::new(Function::new(
31037 "UNIX_TO_TIME_STR".to_string(),
31038 vec![arg],
31039 )))),
31040 }
31041 } else {
31042 Ok(e)
31043 }
31044 }
31045
31046 Action::TimeToUnixConvert => {
31047 // TIME_TO_UNIX(x) -> convert to Expression::TimeToUnix for generator
31048 if let Expression::Function(f) = e {
31049 let arg = f.args.into_iter().next().unwrap();
31050 Ok(Expression::TimeToUnix(Box::new(
31051 crate::expressions::UnaryFunc {
31052 this: arg,
31053 original_name: None,
31054 inferred_type: None,
31055 },
31056 )))
31057 } else {
31058 Ok(e)
31059 }
31060 }
31061
31062 Action::TimeToStrConvert => {
31063 // TIME_TO_STR(x, fmt) -> convert to Expression::TimeToStr for generator
31064 if let Expression::Function(f) = e {
31065 let mut args = f.args;
31066 let this = args.remove(0);
31067 let fmt = match args.remove(0) {
31068 Expression::Literal(lit)
31069 if matches!(lit.as_ref(), Literal::String(_)) =>
31070 {
31071 let Literal::String(s) = lit.as_ref() else {
31072 unreachable!()
31073 };
31074 s.clone()
31075 }
31076 other => {
31077 return Ok(Expression::Function(Box::new(Function::new(
31078 "TIME_TO_STR".to_string(),
31079 vec![this, other],
31080 ))));
31081 }
31082 };
31083 Ok(Expression::TimeToStr(Box::new(
31084 crate::expressions::TimeToStr {
31085 this: Box::new(this),
31086 format: fmt,
31087 culture: None,
31088 zone: None,
31089 },
31090 )))
31091 } else {
31092 Ok(e)
31093 }
31094 }
31095
31096 Action::StrToUnixConvert => {
31097 // STR_TO_UNIX(x, fmt) -> convert to Expression::StrToUnix for generator
31098 if let Expression::Function(f) = e {
31099 let mut args = f.args;
31100 let this = args.remove(0);
31101 let fmt = match args.remove(0) {
31102 Expression::Literal(lit)
31103 if matches!(lit.as_ref(), Literal::String(_)) =>
31104 {
31105 let Literal::String(s) = lit.as_ref() else {
31106 unreachable!()
31107 };
31108 s.clone()
31109 }
31110 other => {
31111 return Ok(Expression::Function(Box::new(Function::new(
31112 "STR_TO_UNIX".to_string(),
31113 vec![this, other],
31114 ))));
31115 }
31116 };
31117 Ok(Expression::StrToUnix(Box::new(
31118 crate::expressions::StrToUnix {
31119 this: Some(Box::new(this)),
31120 format: Some(fmt),
31121 },
31122 )))
31123 } else {
31124 Ok(e)
31125 }
31126 }
31127
31128 Action::TimeStrToUnixConvert => {
31129 // TIME_STR_TO_UNIX(x) -> dialect-specific
31130 if let Expression::Function(f) = e {
31131 let arg = f.args.into_iter().next().unwrap();
31132 match target {
31133 DialectType::DuckDB => {
31134 // EPOCH(CAST(x AS TIMESTAMP))
31135 let cast_ts = Expression::Cast(Box::new(Cast {
31136 this: arg,
31137 to: DataType::Timestamp {
31138 timezone: false,
31139 precision: None,
31140 },
31141 double_colon_syntax: false,
31142 trailing_comments: Vec::new(),
31143 format: None,
31144 default: None,
31145 inferred_type: None,
31146 }));
31147 Ok(Expression::Function(Box::new(Function::new(
31148 "EPOCH".to_string(),
31149 vec![cast_ts],
31150 ))))
31151 }
31152 DialectType::Hive
31153 | DialectType::Doris
31154 | DialectType::StarRocks
31155 | DialectType::MySQL => {
31156 // UNIX_TIMESTAMP(x)
31157 Ok(Expression::Function(Box::new(Function::new(
31158 "UNIX_TIMESTAMP".to_string(),
31159 vec![arg],
31160 ))))
31161 }
31162 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
31163 // TO_UNIXTIME(DATE_PARSE(x, '%Y-%m-%d %T'))
31164 let date_parse = Expression::Function(Box::new(Function::new(
31165 "DATE_PARSE".to_string(),
31166 vec![arg, Expression::string("%Y-%m-%d %T")],
31167 )));
31168 Ok(Expression::Function(Box::new(Function::new(
31169 "TO_UNIXTIME".to_string(),
31170 vec![date_parse],
31171 ))))
31172 }
31173 _ => Ok(Expression::Function(Box::new(Function::new(
31174 "TIME_STR_TO_UNIX".to_string(),
31175 vec![arg],
31176 )))),
31177 }
31178 } else {
31179 Ok(e)
31180 }
31181 }
31182
31183 Action::TimeToTimeStrConvert => {
31184 // TIME_TO_TIME_STR(x) -> CAST(x AS str_type) per dialect
31185 if let Expression::Function(f) = e {
31186 let arg = f.args.into_iter().next().unwrap();
31187 let str_type = match target {
31188 DialectType::DuckDB => DataType::Text,
31189 DialectType::Hive
31190 | DialectType::Spark
31191 | DialectType::Databricks
31192 | DialectType::Doris
31193 | DialectType::StarRocks => DataType::Custom {
31194 name: "STRING".to_string(),
31195 },
31196 DialectType::Redshift => DataType::Custom {
31197 name: "VARCHAR(MAX)".to_string(),
31198 },
31199 _ => DataType::VarChar {
31200 length: None,
31201 parenthesized_length: false,
31202 },
31203 };
31204 Ok(Expression::Cast(Box::new(Cast {
31205 this: arg,
31206 to: str_type,
31207 double_colon_syntax: false,
31208 trailing_comments: Vec::new(),
31209 format: None,
31210 default: None,
31211 inferred_type: None,
31212 })))
31213 } else {
31214 Ok(e)
31215 }
31216 }
31217
31218 Action::DateTruncSwapArgs => {
31219 // DATE_TRUNC('unit', x) from Generic -> target-specific
31220 if let Expression::Function(f) = e {
31221 if f.args.len() == 2 {
31222 let unit_arg = f.args[0].clone();
31223 let expr_arg = f.args[1].clone();
31224 // Extract unit string from the first arg
31225 let unit_str = match &unit_arg {
31226 Expression::Literal(lit)
31227 if matches!(lit.as_ref(), Literal::String(_)) =>
31228 {
31229 let Literal::String(s) = lit.as_ref() else {
31230 unreachable!()
31231 };
31232 s.to_ascii_uppercase()
31233 }
31234 _ => return Ok(Expression::Function(f)),
31235 };
31236 match target {
31237 DialectType::BigQuery => {
31238 // BigQuery: DATE_TRUNC(x, UNIT) - unquoted unit
31239 let unit_ident =
31240 Expression::Column(Box::new(crate::expressions::Column {
31241 name: crate::expressions::Identifier::new(unit_str),
31242 table: None,
31243 join_mark: false,
31244 trailing_comments: Vec::new(),
31245 span: None,
31246 inferred_type: None,
31247 }));
31248 Ok(Expression::Function(Box::new(Function::new(
31249 "DATE_TRUNC".to_string(),
31250 vec![expr_arg, unit_ident],
31251 ))))
31252 }
31253 DialectType::Doris => {
31254 // Doris: DATE_TRUNC(x, 'UNIT')
31255 Ok(Expression::Function(Box::new(Function::new(
31256 "DATE_TRUNC".to_string(),
31257 vec![expr_arg, Expression::string(&unit_str)],
31258 ))))
31259 }
31260 DialectType::StarRocks => {
31261 // StarRocks: DATE_TRUNC('UNIT', x) - keep standard order
31262 Ok(Expression::Function(Box::new(Function::new(
31263 "DATE_TRUNC".to_string(),
31264 vec![Expression::string(&unit_str), expr_arg],
31265 ))))
31266 }
31267 DialectType::Spark | DialectType::Databricks => {
31268 // Spark: TRUNC(x, 'UNIT')
31269 Ok(Expression::Function(Box::new(Function::new(
31270 "TRUNC".to_string(),
31271 vec![expr_arg, Expression::string(&unit_str)],
31272 ))))
31273 }
31274 DialectType::MySQL => {
31275 // MySQL: complex expansion based on unit
31276 Self::date_trunc_to_mysql(&unit_str, &expr_arg)
31277 }
31278 _ => Ok(Expression::Function(f)),
31279 }
31280 } else {
31281 Ok(Expression::Function(f))
31282 }
31283 } else {
31284 Ok(e)
31285 }
31286 }
31287
31288 Action::TimestampTruncConvert => {
31289 // TIMESTAMP_TRUNC(x, UNIT[, tz]) from Generic -> target-specific
31290 if let Expression::Function(f) = e {
31291 if f.args.len() >= 2 {
31292 let expr_arg = f.args[0].clone();
31293 let unit_arg = f.args[1].clone();
31294 let tz_arg = if f.args.len() >= 3 {
31295 Some(f.args[2].clone())
31296 } else {
31297 None
31298 };
31299 // Extract unit string
31300 let unit_str = match &unit_arg {
31301 Expression::Literal(lit)
31302 if matches!(lit.as_ref(), Literal::String(_)) =>
31303 {
31304 let Literal::String(s) = lit.as_ref() else {
31305 unreachable!()
31306 };
31307 s.to_ascii_uppercase()
31308 }
31309 Expression::Column(c) => c.name.name.to_ascii_uppercase(),
31310 _ => {
31311 return Ok(Expression::Function(f));
31312 }
31313 };
31314 match target {
31315 DialectType::Spark | DialectType::Databricks => {
31316 // Spark: DATE_TRUNC('UNIT', x)
31317 Ok(Expression::Function(Box::new(Function::new(
31318 "DATE_TRUNC".to_string(),
31319 vec![Expression::string(&unit_str), expr_arg],
31320 ))))
31321 }
31322 DialectType::Doris | DialectType::StarRocks => {
31323 // Doris: DATE_TRUNC(x, 'UNIT')
31324 Ok(Expression::Function(Box::new(Function::new(
31325 "DATE_TRUNC".to_string(),
31326 vec![expr_arg, Expression::string(&unit_str)],
31327 ))))
31328 }
31329 DialectType::BigQuery => {
31330 // BigQuery: TIMESTAMP_TRUNC(x, UNIT) - keep but with unquoted unit
31331 let unit_ident =
31332 Expression::Column(Box::new(crate::expressions::Column {
31333 name: crate::expressions::Identifier::new(unit_str),
31334 table: None,
31335 join_mark: false,
31336 trailing_comments: Vec::new(),
31337 span: None,
31338 inferred_type: None,
31339 }));
31340 let mut args = vec![expr_arg, unit_ident];
31341 if let Some(tz) = tz_arg {
31342 args.push(tz);
31343 }
31344 Ok(Expression::Function(Box::new(Function::new(
31345 "TIMESTAMP_TRUNC".to_string(),
31346 args,
31347 ))))
31348 }
31349 DialectType::DuckDB => {
31350 // DuckDB with timezone: DATE_TRUNC('UNIT', x AT TIME ZONE 'tz') AT TIME ZONE 'tz'
31351 if let Some(tz) = tz_arg {
31352 let tz_str = match &tz {
31353 Expression::Literal(lit)
31354 if matches!(lit.as_ref(), Literal::String(_)) =>
31355 {
31356 let Literal::String(s) = lit.as_ref() else {
31357 unreachable!()
31358 };
31359 s.clone()
31360 }
31361 _ => "UTC".to_string(),
31362 };
31363 // x AT TIME ZONE 'tz'
31364 let at_tz = Expression::AtTimeZone(Box::new(
31365 crate::expressions::AtTimeZone {
31366 this: expr_arg,
31367 zone: Expression::string(&tz_str),
31368 },
31369 ));
31370 // DATE_TRUNC('UNIT', x AT TIME ZONE 'tz')
31371 let trunc = Expression::Function(Box::new(Function::new(
31372 "DATE_TRUNC".to_string(),
31373 vec![Expression::string(&unit_str), at_tz],
31374 )));
31375 // DATE_TRUNC(...) AT TIME ZONE 'tz'
31376 Ok(Expression::AtTimeZone(Box::new(
31377 crate::expressions::AtTimeZone {
31378 this: trunc,
31379 zone: Expression::string(&tz_str),
31380 },
31381 )))
31382 } else {
31383 Ok(Expression::Function(Box::new(Function::new(
31384 "DATE_TRUNC".to_string(),
31385 vec![Expression::string(&unit_str), expr_arg],
31386 ))))
31387 }
31388 }
31389 DialectType::Presto
31390 | DialectType::Trino
31391 | DialectType::Athena
31392 | DialectType::Snowflake => {
31393 // Presto/Snowflake: DATE_TRUNC('UNIT', x) - drop timezone
31394 Ok(Expression::Function(Box::new(Function::new(
31395 "DATE_TRUNC".to_string(),
31396 vec![Expression::string(&unit_str), expr_arg],
31397 ))))
31398 }
31399 _ => {
31400 // For most dialects: DATE_TRUNC('UNIT', x) + tz handling
31401 let mut args = vec![Expression::string(&unit_str), expr_arg];
31402 if let Some(tz) = tz_arg {
31403 args.push(tz);
31404 }
31405 Ok(Expression::Function(Box::new(Function::new(
31406 "DATE_TRUNC".to_string(),
31407 args,
31408 ))))
31409 }
31410 }
31411 } else {
31412 Ok(Expression::Function(f))
31413 }
31414 } else {
31415 Ok(e)
31416 }
31417 }
31418
31419 Action::StrToDateConvert => {
31420 // STR_TO_DATE(x, fmt) from Generic -> dialect-specific date parsing
31421 if let Expression::Function(f) = e {
31422 if f.args.len() == 2 {
31423 let mut args = f.args;
31424 let this = args.remove(0);
31425 let fmt_expr = args.remove(0);
31426 let fmt_str = match &fmt_expr {
31427 Expression::Literal(lit)
31428 if matches!(lit.as_ref(), Literal::String(_)) =>
31429 {
31430 let Literal::String(s) = lit.as_ref() else {
31431 unreachable!()
31432 };
31433 Some(s.clone())
31434 }
31435 _ => None,
31436 };
31437 let default_date = "%Y-%m-%d";
31438 let default_time = "%Y-%m-%d %H:%M:%S";
31439 let is_default = fmt_str
31440 .as_ref()
31441 .map_or(false, |f| f == default_date || f == default_time);
31442
31443 if is_default {
31444 // Default format: handle per-dialect
31445 match target {
31446 DialectType::MySQL
31447 | DialectType::Doris
31448 | DialectType::StarRocks => {
31449 // Keep STR_TO_DATE(x, fmt) as-is
31450 Ok(Expression::Function(Box::new(Function::new(
31451 "STR_TO_DATE".to_string(),
31452 vec![this, fmt_expr],
31453 ))))
31454 }
31455 DialectType::Hive => {
31456 // Hive: CAST(x AS DATE)
31457 Ok(Expression::Cast(Box::new(Cast {
31458 this,
31459 to: DataType::Date,
31460 double_colon_syntax: false,
31461 trailing_comments: Vec::new(),
31462 format: None,
31463 default: None,
31464 inferred_type: None,
31465 })))
31466 }
31467 DialectType::Presto
31468 | DialectType::Trino
31469 | DialectType::Athena => {
31470 // Presto: CAST(DATE_PARSE(x, '%Y-%m-%d') AS DATE)
31471 let date_parse =
31472 Expression::Function(Box::new(Function::new(
31473 "DATE_PARSE".to_string(),
31474 vec![this, fmt_expr],
31475 )));
31476 Ok(Expression::Cast(Box::new(Cast {
31477 this: date_parse,
31478 to: DataType::Date,
31479 double_colon_syntax: false,
31480 trailing_comments: Vec::new(),
31481 format: None,
31482 default: None,
31483 inferred_type: None,
31484 })))
31485 }
31486 _ => {
31487 // Others: TsOrDsToDate (delegates to generator)
31488 Ok(Expression::TsOrDsToDate(Box::new(
31489 crate::expressions::TsOrDsToDate {
31490 this: Box::new(this),
31491 format: None,
31492 safe: None,
31493 },
31494 )))
31495 }
31496 }
31497 } else if let Some(fmt) = fmt_str {
31498 match target {
31499 DialectType::Doris
31500 | DialectType::StarRocks
31501 | DialectType::MySQL => {
31502 // Keep STR_TO_DATE but with normalized format (%H:%M:%S -> %T, %-d -> %e)
31503 let mut normalized = fmt.clone();
31504 normalized = normalized.replace("%-d", "%e");
31505 normalized = normalized.replace("%-m", "%c");
31506 normalized = normalized.replace("%H:%M:%S", "%T");
31507 Ok(Expression::Function(Box::new(Function::new(
31508 "STR_TO_DATE".to_string(),
31509 vec![this, Expression::string(&normalized)],
31510 ))))
31511 }
31512 DialectType::Hive => {
31513 // Hive: CAST(FROM_UNIXTIME(UNIX_TIMESTAMP(x, java_fmt)) AS DATE)
31514 let java_fmt = crate::generator::Generator::strftime_to_java_format_static(&fmt);
31515 let unix_ts =
31516 Expression::Function(Box::new(Function::new(
31517 "UNIX_TIMESTAMP".to_string(),
31518 vec![this, Expression::string(&java_fmt)],
31519 )));
31520 let from_unix =
31521 Expression::Function(Box::new(Function::new(
31522 "FROM_UNIXTIME".to_string(),
31523 vec![unix_ts],
31524 )));
31525 Ok(Expression::Cast(Box::new(Cast {
31526 this: from_unix,
31527 to: DataType::Date,
31528 double_colon_syntax: false,
31529 trailing_comments: Vec::new(),
31530 format: None,
31531 default: None,
31532 inferred_type: None,
31533 })))
31534 }
31535 DialectType::Spark | DialectType::Databricks => {
31536 // Spark: TO_DATE(x, java_fmt)
31537 let java_fmt = crate::generator::Generator::strftime_to_java_format_static(&fmt);
31538 Ok(Expression::Function(Box::new(Function::new(
31539 "TO_DATE".to_string(),
31540 vec![this, Expression::string(&java_fmt)],
31541 ))))
31542 }
31543 DialectType::Drill => {
31544 // Drill: TO_DATE(x, java_fmt) with T quoted as 'T' in Java format
31545 // The generator's string literal escaping will double the quotes: 'T' -> ''T''
31546 let java_fmt = crate::generator::Generator::strftime_to_java_format_static(&fmt);
31547 let java_fmt = java_fmt.replace('T', "'T'");
31548 Ok(Expression::Function(Box::new(Function::new(
31549 "TO_DATE".to_string(),
31550 vec![this, Expression::string(&java_fmt)],
31551 ))))
31552 }
31553 _ => {
31554 // For other dialects: use TsOrDsToDate which delegates to generator
31555 Ok(Expression::TsOrDsToDate(Box::new(
31556 crate::expressions::TsOrDsToDate {
31557 this: Box::new(this),
31558 format: Some(fmt),
31559 safe: None,
31560 },
31561 )))
31562 }
31563 }
31564 } else {
31565 // Non-string format - keep as-is
31566 let mut new_args = Vec::new();
31567 new_args.push(this);
31568 new_args.push(fmt_expr);
31569 Ok(Expression::Function(Box::new(Function::new(
31570 "STR_TO_DATE".to_string(),
31571 new_args,
31572 ))))
31573 }
31574 } else {
31575 Ok(Expression::Function(f))
31576 }
31577 } else {
31578 Ok(e)
31579 }
31580 }
31581
31582 Action::TsOrDsAddConvert => {
31583 // TS_OR_DS_ADD(x, n, 'UNIT') from Generic -> dialect-specific DATE_ADD
31584 if let Expression::Function(f) = e {
31585 if f.args.len() == 3 {
31586 let mut args = f.args;
31587 let x = args.remove(0);
31588 let n = args.remove(0);
31589 let unit_expr = args.remove(0);
31590 let unit_str = match &unit_expr {
31591 Expression::Literal(lit)
31592 if matches!(lit.as_ref(), Literal::String(_)) =>
31593 {
31594 let Literal::String(s) = lit.as_ref() else {
31595 unreachable!()
31596 };
31597 s.to_ascii_uppercase()
31598 }
31599 _ => "DAY".to_string(),
31600 };
31601
31602 match target {
31603 DialectType::Hive
31604 | DialectType::Spark
31605 | DialectType::Databricks => {
31606 // DATE_ADD(x, n) - only supports DAY unit
31607 Ok(Expression::Function(Box::new(Function::new(
31608 "DATE_ADD".to_string(),
31609 vec![x, n],
31610 ))))
31611 }
31612 DialectType::MySQL => {
31613 // DATE_ADD(x, INTERVAL n UNIT)
31614 let iu = match unit_str.as_str() {
31615 "YEAR" => crate::expressions::IntervalUnit::Year,
31616 "QUARTER" => crate::expressions::IntervalUnit::Quarter,
31617 "MONTH" => crate::expressions::IntervalUnit::Month,
31618 "WEEK" => crate::expressions::IntervalUnit::Week,
31619 "HOUR" => crate::expressions::IntervalUnit::Hour,
31620 "MINUTE" => crate::expressions::IntervalUnit::Minute,
31621 "SECOND" => crate::expressions::IntervalUnit::Second,
31622 _ => crate::expressions::IntervalUnit::Day,
31623 };
31624 let interval = Expression::Interval(Box::new(
31625 crate::expressions::Interval {
31626 this: Some(n),
31627 unit: Some(
31628 crate::expressions::IntervalUnitSpec::Simple {
31629 unit: iu,
31630 use_plural: false,
31631 },
31632 ),
31633 },
31634 ));
31635 Ok(Expression::Function(Box::new(Function::new(
31636 "DATE_ADD".to_string(),
31637 vec![x, interval],
31638 ))))
31639 }
31640 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
31641 // DATE_ADD('UNIT', n, CAST(CAST(x AS TIMESTAMP) AS DATE))
31642 let cast_ts = Expression::Cast(Box::new(Cast {
31643 this: x,
31644 to: DataType::Timestamp {
31645 precision: None,
31646 timezone: false,
31647 },
31648 double_colon_syntax: false,
31649 trailing_comments: Vec::new(),
31650 format: None,
31651 default: None,
31652 inferred_type: None,
31653 }));
31654 let cast_date = Expression::Cast(Box::new(Cast {
31655 this: cast_ts,
31656 to: DataType::Date,
31657 double_colon_syntax: false,
31658 trailing_comments: Vec::new(),
31659 format: None,
31660 default: None,
31661 inferred_type: None,
31662 }));
31663 Ok(Expression::Function(Box::new(Function::new(
31664 "DATE_ADD".to_string(),
31665 vec![Expression::string(&unit_str), n, cast_date],
31666 ))))
31667 }
31668 DialectType::DuckDB => {
31669 // CAST(x AS DATE) + INTERVAL n UNIT
31670 let cast_date = Expression::Cast(Box::new(Cast {
31671 this: x,
31672 to: DataType::Date,
31673 double_colon_syntax: false,
31674 trailing_comments: Vec::new(),
31675 format: None,
31676 default: None,
31677 inferred_type: None,
31678 }));
31679 let iu = match unit_str.as_str() {
31680 "YEAR" => crate::expressions::IntervalUnit::Year,
31681 "QUARTER" => crate::expressions::IntervalUnit::Quarter,
31682 "MONTH" => crate::expressions::IntervalUnit::Month,
31683 "WEEK" => crate::expressions::IntervalUnit::Week,
31684 "HOUR" => crate::expressions::IntervalUnit::Hour,
31685 "MINUTE" => crate::expressions::IntervalUnit::Minute,
31686 "SECOND" => crate::expressions::IntervalUnit::Second,
31687 _ => crate::expressions::IntervalUnit::Day,
31688 };
31689 let interval = Expression::Interval(Box::new(
31690 crate::expressions::Interval {
31691 this: Some(n),
31692 unit: Some(
31693 crate::expressions::IntervalUnitSpec::Simple {
31694 unit: iu,
31695 use_plural: false,
31696 },
31697 ),
31698 },
31699 ));
31700 Ok(Expression::Add(Box::new(crate::expressions::BinaryOp {
31701 left: cast_date,
31702 right: interval,
31703 left_comments: Vec::new(),
31704 operator_comments: Vec::new(),
31705 trailing_comments: Vec::new(),
31706 inferred_type: None,
31707 })))
31708 }
31709 DialectType::Drill => {
31710 // DATE_ADD(CAST(x AS DATE), INTERVAL n UNIT)
31711 let cast_date = Expression::Cast(Box::new(Cast {
31712 this: x,
31713 to: DataType::Date,
31714 double_colon_syntax: false,
31715 trailing_comments: Vec::new(),
31716 format: None,
31717 default: None,
31718 inferred_type: None,
31719 }));
31720 let iu = match unit_str.as_str() {
31721 "YEAR" => crate::expressions::IntervalUnit::Year,
31722 "QUARTER" => crate::expressions::IntervalUnit::Quarter,
31723 "MONTH" => crate::expressions::IntervalUnit::Month,
31724 "WEEK" => crate::expressions::IntervalUnit::Week,
31725 "HOUR" => crate::expressions::IntervalUnit::Hour,
31726 "MINUTE" => crate::expressions::IntervalUnit::Minute,
31727 "SECOND" => crate::expressions::IntervalUnit::Second,
31728 _ => crate::expressions::IntervalUnit::Day,
31729 };
31730 let interval = Expression::Interval(Box::new(
31731 crate::expressions::Interval {
31732 this: Some(n),
31733 unit: Some(
31734 crate::expressions::IntervalUnitSpec::Simple {
31735 unit: iu,
31736 use_plural: false,
31737 },
31738 ),
31739 },
31740 ));
31741 Ok(Expression::Function(Box::new(Function::new(
31742 "DATE_ADD".to_string(),
31743 vec![cast_date, interval],
31744 ))))
31745 }
31746 _ => {
31747 // Default: keep as TS_OR_DS_ADD
31748 Ok(Expression::Function(Box::new(Function::new(
31749 "TS_OR_DS_ADD".to_string(),
31750 vec![x, n, unit_expr],
31751 ))))
31752 }
31753 }
31754 } else {
31755 Ok(Expression::Function(f))
31756 }
31757 } else {
31758 Ok(e)
31759 }
31760 }
31761
31762 Action::DateFromUnixDateConvert => {
31763 // DATE_FROM_UNIX_DATE(n) -> DATEADD(DAY, n, CAST('1970-01-01' AS DATE))
31764 if let Expression::Function(f) = e {
31765 // Keep as-is for dialects that support DATE_FROM_UNIX_DATE natively
31766 if matches!(
31767 target,
31768 DialectType::Spark | DialectType::Databricks | DialectType::BigQuery
31769 ) {
31770 return Ok(Expression::Function(Box::new(Function::new(
31771 "DATE_FROM_UNIX_DATE".to_string(),
31772 f.args,
31773 ))));
31774 }
31775 let n = f.args.into_iter().next().unwrap();
31776 let epoch_date = Expression::Cast(Box::new(Cast {
31777 this: Expression::string("1970-01-01"),
31778 to: DataType::Date,
31779 double_colon_syntax: false,
31780 trailing_comments: Vec::new(),
31781 format: None,
31782 default: None,
31783 inferred_type: None,
31784 }));
31785 match target {
31786 DialectType::DuckDB => {
31787 // CAST('1970-01-01' AS DATE) + INTERVAL n DAY
31788 let interval =
31789 Expression::Interval(Box::new(crate::expressions::Interval {
31790 this: Some(n),
31791 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
31792 unit: crate::expressions::IntervalUnit::Day,
31793 use_plural: false,
31794 }),
31795 }));
31796 Ok(Expression::Add(Box::new(
31797 crate::expressions::BinaryOp::new(epoch_date, interval),
31798 )))
31799 }
31800 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
31801 // DATE_ADD('DAY', n, CAST('1970-01-01' AS DATE))
31802 Ok(Expression::Function(Box::new(Function::new(
31803 "DATE_ADD".to_string(),
31804 vec![Expression::string("DAY"), n, epoch_date],
31805 ))))
31806 }
31807 DialectType::Snowflake | DialectType::Redshift | DialectType::TSQL => {
31808 // DATEADD(DAY, n, CAST('1970-01-01' AS DATE))
31809 Ok(Expression::Function(Box::new(Function::new(
31810 "DATEADD".to_string(),
31811 vec![
31812 Expression::Identifier(Identifier::new("DAY")),
31813 n,
31814 epoch_date,
31815 ],
31816 ))))
31817 }
31818 DialectType::BigQuery => {
31819 // DATE_ADD(CAST('1970-01-01' AS DATE), INTERVAL n DAY)
31820 let interval =
31821 Expression::Interval(Box::new(crate::expressions::Interval {
31822 this: Some(n),
31823 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
31824 unit: crate::expressions::IntervalUnit::Day,
31825 use_plural: false,
31826 }),
31827 }));
31828 Ok(Expression::Function(Box::new(Function::new(
31829 "DATE_ADD".to_string(),
31830 vec![epoch_date, interval],
31831 ))))
31832 }
31833 DialectType::MySQL
31834 | DialectType::Doris
31835 | DialectType::StarRocks
31836 | DialectType::Drill => {
31837 // DATE_ADD(CAST('1970-01-01' AS DATE), INTERVAL n DAY)
31838 let interval =
31839 Expression::Interval(Box::new(crate::expressions::Interval {
31840 this: Some(n),
31841 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
31842 unit: crate::expressions::IntervalUnit::Day,
31843 use_plural: false,
31844 }),
31845 }));
31846 Ok(Expression::Function(Box::new(Function::new(
31847 "DATE_ADD".to_string(),
31848 vec![epoch_date, interval],
31849 ))))
31850 }
31851 DialectType::Hive | DialectType::Spark | DialectType::Databricks => {
31852 // DATE_ADD(CAST('1970-01-01' AS DATE), n)
31853 Ok(Expression::Function(Box::new(Function::new(
31854 "DATE_ADD".to_string(),
31855 vec![epoch_date, n],
31856 ))))
31857 }
31858 DialectType::PostgreSQL
31859 | DialectType::Materialize
31860 | DialectType::RisingWave => {
31861 // CAST('1970-01-01' AS DATE) + INTERVAL 'n DAY'
31862 let n_str = match &n {
31863 Expression::Literal(lit)
31864 if matches!(lit.as_ref(), Literal::Number(_)) =>
31865 {
31866 let Literal::Number(s) = lit.as_ref() else {
31867 unreachable!()
31868 };
31869 s.clone()
31870 }
31871 _ => Self::expr_to_string_static(&n),
31872 };
31873 let interval =
31874 Expression::Interval(Box::new(crate::expressions::Interval {
31875 this: Some(Expression::string(&format!("{} DAY", n_str))),
31876 unit: None,
31877 }));
31878 Ok(Expression::Add(Box::new(
31879 crate::expressions::BinaryOp::new(epoch_date, interval),
31880 )))
31881 }
31882 _ => {
31883 // Default: keep as-is
31884 Ok(Expression::Function(Box::new(Function::new(
31885 "DATE_FROM_UNIX_DATE".to_string(),
31886 vec![n],
31887 ))))
31888 }
31889 }
31890 } else {
31891 Ok(e)
31892 }
31893 }
31894
31895 Action::ArrayRemoveConvert => {
31896 // ARRAY_REMOVE(arr, target) -> LIST_FILTER/arrayFilter
31897 if let Expression::ArrayRemove(bf) = e {
31898 let arr = bf.this;
31899 let target_val = bf.expression;
31900 match target {
31901 DialectType::DuckDB => {
31902 let u_id = crate::expressions::Identifier::new("_u");
31903 let lambda =
31904 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
31905 parameters: vec![u_id.clone()],
31906 body: Expression::Neq(Box::new(BinaryOp {
31907 left: Expression::Identifier(u_id),
31908 right: target_val,
31909 left_comments: Vec::new(),
31910 operator_comments: Vec::new(),
31911 trailing_comments: Vec::new(),
31912 inferred_type: None,
31913 })),
31914 colon: false,
31915 parameter_types: Vec::new(),
31916 }));
31917 Ok(Expression::Function(Box::new(Function::new(
31918 "LIST_FILTER".to_string(),
31919 vec![arr, lambda],
31920 ))))
31921 }
31922 DialectType::ClickHouse => {
31923 let u_id = crate::expressions::Identifier::new("_u");
31924 let lambda =
31925 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
31926 parameters: vec![u_id.clone()],
31927 body: Expression::Neq(Box::new(BinaryOp {
31928 left: Expression::Identifier(u_id),
31929 right: target_val,
31930 left_comments: Vec::new(),
31931 operator_comments: Vec::new(),
31932 trailing_comments: Vec::new(),
31933 inferred_type: None,
31934 })),
31935 colon: false,
31936 parameter_types: Vec::new(),
31937 }));
31938 Ok(Expression::Function(Box::new(Function::new(
31939 "arrayFilter".to_string(),
31940 vec![lambda, arr],
31941 ))))
31942 }
31943 DialectType::BigQuery => {
31944 // ARRAY(SELECT _u FROM UNNEST(the_array) AS _u WHERE _u <> target)
31945 let u_id = crate::expressions::Identifier::new("_u");
31946 let u_col =
31947 Expression::Column(Box::new(crate::expressions::Column {
31948 name: u_id.clone(),
31949 table: None,
31950 join_mark: false,
31951 trailing_comments: Vec::new(),
31952 span: None,
31953 inferred_type: None,
31954 }));
31955 let unnest_expr =
31956 Expression::Unnest(Box::new(crate::expressions::UnnestFunc {
31957 this: arr,
31958 expressions: Vec::new(),
31959 with_ordinality: false,
31960 alias: None,
31961 offset_alias: None,
31962 }));
31963 let aliased_unnest =
31964 Expression::Alias(Box::new(crate::expressions::Alias {
31965 this: unnest_expr,
31966 alias: u_id.clone(),
31967 column_aliases: Vec::new(),
31968 alias_explicit_as: false,
31969 alias_keyword: None,
31970 pre_alias_comments: Vec::new(),
31971 trailing_comments: Vec::new(),
31972 inferred_type: None,
31973 }));
31974 let where_cond = Expression::Neq(Box::new(BinaryOp {
31975 left: u_col.clone(),
31976 right: target_val,
31977 left_comments: Vec::new(),
31978 operator_comments: Vec::new(),
31979 trailing_comments: Vec::new(),
31980 inferred_type: None,
31981 }));
31982 let subquery = Expression::Select(Box::new(
31983 crate::expressions::Select::new()
31984 .column(u_col)
31985 .from(aliased_unnest)
31986 .where_(where_cond),
31987 ));
31988 Ok(Expression::ArrayFunc(Box::new(
31989 crate::expressions::ArrayConstructor {
31990 expressions: vec![subquery],
31991 bracket_notation: false,
31992 use_list_keyword: false,
31993 },
31994 )))
31995 }
31996 _ => Ok(Expression::ArrayRemove(Box::new(
31997 crate::expressions::BinaryFunc {
31998 original_name: None,
31999 this: arr,
32000 expression: target_val,
32001 inferred_type: None,
32002 },
32003 ))),
32004 }
32005 } else {
32006 Ok(e)
32007 }
32008 }
32009
32010 Action::ArrayReverseConvert => {
32011 // ARRAY_REVERSE(x) -> arrayReverse(x) for ClickHouse
32012 if let Expression::ArrayReverse(af) = e {
32013 Ok(Expression::Function(Box::new(Function::new(
32014 "arrayReverse".to_string(),
32015 vec![af.this],
32016 ))))
32017 } else {
32018 Ok(e)
32019 }
32020 }
32021
32022 Action::JsonKeysConvert => {
32023 // JSON_KEYS(x) -> JSON_OBJECT_KEYS/OBJECT_KEYS
32024 if let Expression::JsonKeys(uf) = e {
32025 match target {
32026 DialectType::Spark | DialectType::Databricks => {
32027 Ok(Expression::Function(Box::new(Function::new(
32028 "JSON_OBJECT_KEYS".to_string(),
32029 vec![uf.this],
32030 ))))
32031 }
32032 DialectType::Snowflake => Ok(Expression::Function(Box::new(
32033 Function::new("OBJECT_KEYS".to_string(), vec![uf.this]),
32034 ))),
32035 _ => Ok(Expression::JsonKeys(uf)),
32036 }
32037 } else {
32038 Ok(e)
32039 }
32040 }
32041
32042 Action::ParseJsonStrip => {
32043 // PARSE_JSON(x) -> x (strip wrapper for SQLite/Doris)
32044 if let Expression::ParseJson(uf) = e {
32045 Ok(uf.this)
32046 } else {
32047 Ok(e)
32048 }
32049 }
32050
32051 Action::ArraySizeDrill => {
32052 // ARRAY_SIZE(x) -> REPEATED_COUNT(x) for Drill
32053 if let Expression::ArraySize(uf) = e {
32054 Ok(Expression::Function(Box::new(Function::new(
32055 "REPEATED_COUNT".to_string(),
32056 vec![uf.this],
32057 ))))
32058 } else {
32059 Ok(e)
32060 }
32061 }
32062
32063 Action::WeekOfYearToWeekIso => {
32064 // WEEKOFYEAR(x) -> WEEKISO(x) for Snowflake (cross-dialect normalization)
32065 if let Expression::WeekOfYear(uf) = e {
32066 Ok(Expression::Function(Box::new(Function::new(
32067 "WEEKISO".to_string(),
32068 vec![uf.this],
32069 ))))
32070 } else {
32071 Ok(e)
32072 }
32073 }
32074 }
32075 })
32076 }
32077
32078 /// Convert DATE_TRUNC('unit', x) to MySQL-specific expansion
32079 fn date_trunc_to_mysql(unit: &str, expr: &Expression) -> Result<Expression> {
32080 use crate::expressions::Function;
32081 match unit {
32082 "DAY" => {
32083 // DATE(x)
32084 Ok(Expression::Function(Box::new(Function::new(
32085 "DATE".to_string(),
32086 vec![expr.clone()],
32087 ))))
32088 }
32089 "WEEK" => {
32090 // STR_TO_DATE(CONCAT(YEAR(x), ' ', WEEK(x, 1), ' 1'), '%Y %u %w')
32091 let year_x = Expression::Function(Box::new(Function::new(
32092 "YEAR".to_string(),
32093 vec![expr.clone()],
32094 )));
32095 let week_x = Expression::Function(Box::new(Function::new(
32096 "WEEK".to_string(),
32097 vec![expr.clone(), Expression::number(1)],
32098 )));
32099 let concat_args = vec![
32100 year_x,
32101 Expression::string(" "),
32102 week_x,
32103 Expression::string(" 1"),
32104 ];
32105 let concat = Expression::Function(Box::new(Function::new(
32106 "CONCAT".to_string(),
32107 concat_args,
32108 )));
32109 Ok(Expression::Function(Box::new(Function::new(
32110 "STR_TO_DATE".to_string(),
32111 vec![concat, Expression::string("%Y %u %w")],
32112 ))))
32113 }
32114 "MONTH" => {
32115 // STR_TO_DATE(CONCAT(YEAR(x), ' ', MONTH(x), ' 1'), '%Y %c %e')
32116 let year_x = Expression::Function(Box::new(Function::new(
32117 "YEAR".to_string(),
32118 vec![expr.clone()],
32119 )));
32120 let month_x = Expression::Function(Box::new(Function::new(
32121 "MONTH".to_string(),
32122 vec![expr.clone()],
32123 )));
32124 let concat_args = vec![
32125 year_x,
32126 Expression::string(" "),
32127 month_x,
32128 Expression::string(" 1"),
32129 ];
32130 let concat = Expression::Function(Box::new(Function::new(
32131 "CONCAT".to_string(),
32132 concat_args,
32133 )));
32134 Ok(Expression::Function(Box::new(Function::new(
32135 "STR_TO_DATE".to_string(),
32136 vec![concat, Expression::string("%Y %c %e")],
32137 ))))
32138 }
32139 "QUARTER" => {
32140 // STR_TO_DATE(CONCAT(YEAR(x), ' ', QUARTER(x) * 3 - 2, ' 1'), '%Y %c %e')
32141 let year_x = Expression::Function(Box::new(Function::new(
32142 "YEAR".to_string(),
32143 vec![expr.clone()],
32144 )));
32145 let quarter_x = Expression::Function(Box::new(Function::new(
32146 "QUARTER".to_string(),
32147 vec![expr.clone()],
32148 )));
32149 // QUARTER(x) * 3 - 2
32150 let mul = Expression::Mul(Box::new(crate::expressions::BinaryOp {
32151 left: quarter_x,
32152 right: Expression::number(3),
32153 left_comments: Vec::new(),
32154 operator_comments: Vec::new(),
32155 trailing_comments: Vec::new(),
32156 inferred_type: None,
32157 }));
32158 let sub = Expression::Sub(Box::new(crate::expressions::BinaryOp {
32159 left: mul,
32160 right: Expression::number(2),
32161 left_comments: Vec::new(),
32162 operator_comments: Vec::new(),
32163 trailing_comments: Vec::new(),
32164 inferred_type: None,
32165 }));
32166 let concat_args = vec![
32167 year_x,
32168 Expression::string(" "),
32169 sub,
32170 Expression::string(" 1"),
32171 ];
32172 let concat = Expression::Function(Box::new(Function::new(
32173 "CONCAT".to_string(),
32174 concat_args,
32175 )));
32176 Ok(Expression::Function(Box::new(Function::new(
32177 "STR_TO_DATE".to_string(),
32178 vec![concat, Expression::string("%Y %c %e")],
32179 ))))
32180 }
32181 "YEAR" => {
32182 // STR_TO_DATE(CONCAT(YEAR(x), ' 1 1'), '%Y %c %e')
32183 let year_x = Expression::Function(Box::new(Function::new(
32184 "YEAR".to_string(),
32185 vec![expr.clone()],
32186 )));
32187 let concat_args = vec![year_x, Expression::string(" 1 1")];
32188 let concat = Expression::Function(Box::new(Function::new(
32189 "CONCAT".to_string(),
32190 concat_args,
32191 )));
32192 Ok(Expression::Function(Box::new(Function::new(
32193 "STR_TO_DATE".to_string(),
32194 vec![concat, Expression::string("%Y %c %e")],
32195 ))))
32196 }
32197 _ => {
32198 // Unsupported unit -> keep as DATE_TRUNC
32199 Ok(Expression::Function(Box::new(Function::new(
32200 "DATE_TRUNC".to_string(),
32201 vec![Expression::string(unit), expr.clone()],
32202 ))))
32203 }
32204 }
32205 }
32206
32207 /// Check if a DataType is or contains VARCHAR/CHAR (for Spark VARCHAR->STRING normalization)
32208 fn has_varchar_char_type(dt: &crate::expressions::DataType) -> bool {
32209 use crate::expressions::DataType;
32210 match dt {
32211 DataType::VarChar { .. } | DataType::Char { .. } => true,
32212 DataType::Struct { fields, .. } => fields
32213 .iter()
32214 .any(|f| Self::has_varchar_char_type(&f.data_type)),
32215 _ => false,
32216 }
32217 }
32218
32219 /// Recursively normalize VARCHAR/CHAR to STRING in a DataType (for Spark)
32220 fn normalize_varchar_to_string(
32221 dt: crate::expressions::DataType,
32222 ) -> crate::expressions::DataType {
32223 use crate::expressions::DataType;
32224 match dt {
32225 DataType::VarChar { .. } | DataType::Char { .. } => DataType::Custom {
32226 name: "STRING".to_string(),
32227 },
32228 DataType::Struct { fields, nested } => {
32229 let fields = fields
32230 .into_iter()
32231 .map(|mut f| {
32232 f.data_type = Self::normalize_varchar_to_string(f.data_type);
32233 f
32234 })
32235 .collect();
32236 DataType::Struct { fields, nested }
32237 }
32238 other => other,
32239 }
32240 }
32241
32242 /// Normalize an interval string like '1day' or ' 2 days ' to proper INTERVAL expression
32243 fn normalize_interval_string(expr: Expression, target: DialectType) -> Expression {
32244 if let Expression::Literal(ref lit) = expr {
32245 if let crate::expressions::Literal::String(ref s) = lit.as_ref() {
32246 // Try to parse patterns like '1day', '1 day', '2 days', ' 2 days '
32247 let trimmed = s.trim();
32248
32249 // Find where digits end and unit text begins
32250 let digit_end = trimmed
32251 .find(|c: char| !c.is_ascii_digit())
32252 .unwrap_or(trimmed.len());
32253 if digit_end == 0 || digit_end == trimmed.len() {
32254 return expr;
32255 }
32256 let num = &trimmed[..digit_end];
32257 let unit_text = trimmed[digit_end..].trim().to_ascii_uppercase();
32258 if unit_text.is_empty() {
32259 return expr;
32260 }
32261
32262 let known_units = [
32263 "DAY", "DAYS", "HOUR", "HOURS", "MINUTE", "MINUTES", "SECOND", "SECONDS",
32264 "WEEK", "WEEKS", "MONTH", "MONTHS", "YEAR", "YEARS",
32265 ];
32266 if !known_units.contains(&unit_text.as_str()) {
32267 return expr;
32268 }
32269
32270 let unit_str = unit_text.clone();
32271 // Singularize
32272 let unit_singular = if unit_str.ends_with('S') && unit_str.len() > 3 {
32273 &unit_str[..unit_str.len() - 1]
32274 } else {
32275 &unit_str
32276 };
32277 let unit = unit_singular;
32278
32279 match target {
32280 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
32281 // INTERVAL '2' DAY
32282 let iu = match unit {
32283 "DAY" => crate::expressions::IntervalUnit::Day,
32284 "HOUR" => crate::expressions::IntervalUnit::Hour,
32285 "MINUTE" => crate::expressions::IntervalUnit::Minute,
32286 "SECOND" => crate::expressions::IntervalUnit::Second,
32287 "WEEK" => crate::expressions::IntervalUnit::Week,
32288 "MONTH" => crate::expressions::IntervalUnit::Month,
32289 "YEAR" => crate::expressions::IntervalUnit::Year,
32290 _ => return expr,
32291 };
32292 return Expression::Interval(Box::new(crate::expressions::Interval {
32293 this: Some(Expression::string(num)),
32294 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
32295 unit: iu,
32296 use_plural: false,
32297 }),
32298 }));
32299 }
32300 DialectType::PostgreSQL | DialectType::Redshift | DialectType::DuckDB => {
32301 // INTERVAL '2 DAYS'
32302 let plural = if num != "1" && !unit_str.ends_with('S') {
32303 format!("{} {}S", num, unit)
32304 } else if unit_str.ends_with('S') {
32305 format!("{} {}", num, unit_str)
32306 } else {
32307 format!("{} {}", num, unit)
32308 };
32309 return Expression::Interval(Box::new(crate::expressions::Interval {
32310 this: Some(Expression::string(&plural)),
32311 unit: None,
32312 }));
32313 }
32314 _ => {
32315 // Spark/Databricks/Hive: INTERVAL '1' DAY
32316 let iu = match unit {
32317 "DAY" => crate::expressions::IntervalUnit::Day,
32318 "HOUR" => crate::expressions::IntervalUnit::Hour,
32319 "MINUTE" => crate::expressions::IntervalUnit::Minute,
32320 "SECOND" => crate::expressions::IntervalUnit::Second,
32321 "WEEK" => crate::expressions::IntervalUnit::Week,
32322 "MONTH" => crate::expressions::IntervalUnit::Month,
32323 "YEAR" => crate::expressions::IntervalUnit::Year,
32324 _ => return expr,
32325 };
32326 return Expression::Interval(Box::new(crate::expressions::Interval {
32327 this: Some(Expression::string(num)),
32328 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
32329 unit: iu,
32330 use_plural: false,
32331 }),
32332 }));
32333 }
32334 }
32335 }
32336 }
32337 // If it's already an INTERVAL expression, pass through
32338 expr
32339 }
32340
32341 /// Rewrite SELECT expressions containing UNNEST into expanded form with CROSS JOINs.
32342 /// DuckDB: SELECT UNNEST(arr1), UNNEST(arr2) ->
32343 /// BigQuery: SELECT IF(pos = pos_2, col, NULL) AS col, ... FROM UNNEST(GENERATE_ARRAY(0, ...)) AS pos CROSS JOIN ...
32344 /// Presto: SELECT IF(_u.pos = _u_2.pos_2, _u_2.col) AS col, ... FROM UNNEST(SEQUENCE(1, ...)) AS _u(pos) CROSS JOIN ...
32345 fn rewrite_unnest_expansion(
32346 select: &crate::expressions::Select,
32347 target: DialectType,
32348 ) -> Option<crate::expressions::Select> {
32349 use crate::expressions::{
32350 Alias, BinaryOp, Column, From, Function, Identifier, Join, JoinKind, Literal,
32351 UnnestFunc,
32352 };
32353
32354 let index_offset: i64 = match target {
32355 DialectType::Presto | DialectType::Trino => 1,
32356 _ => 0, // BigQuery, Snowflake
32357 };
32358
32359 let if_func_name = match target {
32360 DialectType::Snowflake => "IFF",
32361 _ => "IF",
32362 };
32363
32364 let array_length_func = match target {
32365 DialectType::BigQuery => "ARRAY_LENGTH",
32366 DialectType::Presto | DialectType::Trino => "CARDINALITY",
32367 DialectType::Snowflake => "ARRAY_SIZE",
32368 _ => "ARRAY_LENGTH",
32369 };
32370
32371 let use_table_aliases = matches!(
32372 target,
32373 DialectType::Presto | DialectType::Trino | DialectType::Snowflake
32374 );
32375 let null_third_arg = matches!(target, DialectType::BigQuery | DialectType::Snowflake);
32376
32377 fn make_col(name: &str, table: Option<&str>) -> Expression {
32378 if let Some(tbl) = table {
32379 Expression::boxed_column(Column {
32380 name: Identifier::new(name.to_string()),
32381 table: Some(Identifier::new(tbl.to_string())),
32382 join_mark: false,
32383 trailing_comments: Vec::new(),
32384 span: None,
32385 inferred_type: None,
32386 })
32387 } else {
32388 Expression::Identifier(Identifier::new(name.to_string()))
32389 }
32390 }
32391
32392 fn make_join(this: Expression) -> Join {
32393 Join {
32394 this,
32395 on: None,
32396 using: Vec::new(),
32397 kind: JoinKind::Cross,
32398 use_inner_keyword: false,
32399 use_outer_keyword: false,
32400 deferred_condition: false,
32401 join_hint: None,
32402 match_condition: None,
32403 pivots: Vec::new(),
32404 comments: Vec::new(),
32405 nesting_group: 0,
32406 directed: false,
32407 }
32408 }
32409
32410 // Collect UNNEST info from SELECT expressions
32411 struct UnnestInfo {
32412 arr_expr: Expression,
32413 col_alias: String,
32414 pos_alias: String,
32415 source_alias: String,
32416 original_expr: Expression,
32417 has_outer_alias: Option<String>,
32418 }
32419
32420 let mut unnest_infos: Vec<UnnestInfo> = Vec::new();
32421 let mut col_counter = 0usize;
32422 let mut pos_counter = 1usize;
32423 let mut source_counter = 1usize;
32424
32425 fn extract_unnest_arg(expr: &Expression) -> Option<Expression> {
32426 match expr {
32427 Expression::Unnest(u) => Some(u.this.clone()),
32428 Expression::Function(f)
32429 if f.name.eq_ignore_ascii_case("UNNEST") && !f.args.is_empty() =>
32430 {
32431 Some(f.args[0].clone())
32432 }
32433 Expression::Alias(a) => extract_unnest_arg(&a.this),
32434 Expression::Add(op)
32435 | Expression::Sub(op)
32436 | Expression::Mul(op)
32437 | Expression::Div(op) => {
32438 extract_unnest_arg(&op.left).or_else(|| extract_unnest_arg(&op.right))
32439 }
32440 _ => None,
32441 }
32442 }
32443
32444 fn get_alias_name(expr: &Expression) -> Option<String> {
32445 if let Expression::Alias(a) = expr {
32446 Some(a.alias.name.clone())
32447 } else {
32448 None
32449 }
32450 }
32451
32452 for sel_expr in &select.expressions {
32453 if let Some(arr) = extract_unnest_arg(sel_expr) {
32454 col_counter += 1;
32455 pos_counter += 1;
32456 source_counter += 1;
32457
32458 let col_alias = if col_counter == 1 {
32459 "col".to_string()
32460 } else {
32461 format!("col_{}", col_counter)
32462 };
32463 let pos_alias = format!("pos_{}", pos_counter);
32464 let source_alias = format!("_u_{}", source_counter);
32465 let has_outer_alias = get_alias_name(sel_expr);
32466
32467 unnest_infos.push(UnnestInfo {
32468 arr_expr: arr,
32469 col_alias,
32470 pos_alias,
32471 source_alias,
32472 original_expr: sel_expr.clone(),
32473 has_outer_alias,
32474 });
32475 }
32476 }
32477
32478 if unnest_infos.is_empty() {
32479 return None;
32480 }
32481
32482 let series_alias = "pos".to_string();
32483 let series_source_alias = "_u".to_string();
32484 let tbl_ref = if use_table_aliases {
32485 Some(series_source_alias.as_str())
32486 } else {
32487 None
32488 };
32489
32490 // Build new SELECT expressions
32491 let mut new_select_exprs = Vec::new();
32492 for info in &unnest_infos {
32493 let actual_col_name = info.has_outer_alias.as_ref().unwrap_or(&info.col_alias);
32494 let src_ref = if use_table_aliases {
32495 Some(info.source_alias.as_str())
32496 } else {
32497 None
32498 };
32499
32500 let pos_col = make_col(&series_alias, tbl_ref);
32501 let unnest_pos_col = make_col(&info.pos_alias, src_ref);
32502 let col_ref = make_col(actual_col_name, src_ref);
32503
32504 let eq_cond = Expression::Eq(Box::new(BinaryOp::new(
32505 pos_col.clone(),
32506 unnest_pos_col.clone(),
32507 )));
32508 let mut if_args = vec![eq_cond, col_ref];
32509 if null_third_arg {
32510 if_args.push(Expression::Null(crate::expressions::Null));
32511 }
32512
32513 let if_expr =
32514 Expression::Function(Box::new(Function::new(if_func_name.to_string(), if_args)));
32515 let final_expr = Self::replace_unnest_with_if(&info.original_expr, &if_expr);
32516
32517 new_select_exprs.push(Expression::Alias(Box::new(Alias::new(
32518 final_expr,
32519 Identifier::new(actual_col_name.clone()),
32520 ))));
32521 }
32522
32523 // Build array size expressions for GREATEST
32524 let size_exprs: Vec<Expression> = unnest_infos
32525 .iter()
32526 .map(|info| {
32527 Expression::Function(Box::new(Function::new(
32528 array_length_func.to_string(),
32529 vec![info.arr_expr.clone()],
32530 )))
32531 })
32532 .collect();
32533
32534 let greatest =
32535 Expression::Function(Box::new(Function::new("GREATEST".to_string(), size_exprs)));
32536
32537 let series_end = if index_offset == 0 {
32538 Expression::Sub(Box::new(BinaryOp::new(
32539 greatest,
32540 Expression::Literal(Box::new(Literal::Number("1".to_string()))),
32541 )))
32542 } else {
32543 greatest
32544 };
32545
32546 // Build the position array source
32547 let series_unnest_expr = match target {
32548 DialectType::BigQuery => {
32549 let gen_array = Expression::Function(Box::new(Function::new(
32550 "GENERATE_ARRAY".to_string(),
32551 vec![
32552 Expression::Literal(Box::new(Literal::Number("0".to_string()))),
32553 series_end,
32554 ],
32555 )));
32556 Expression::Unnest(Box::new(UnnestFunc {
32557 this: gen_array,
32558 expressions: Vec::new(),
32559 with_ordinality: false,
32560 alias: None,
32561 offset_alias: None,
32562 }))
32563 }
32564 DialectType::Presto | DialectType::Trino => {
32565 let sequence = Expression::Function(Box::new(Function::new(
32566 "SEQUENCE".to_string(),
32567 vec![
32568 Expression::Literal(Box::new(Literal::Number("1".to_string()))),
32569 series_end,
32570 ],
32571 )));
32572 Expression::Unnest(Box::new(UnnestFunc {
32573 this: sequence,
32574 expressions: Vec::new(),
32575 with_ordinality: false,
32576 alias: None,
32577 offset_alias: None,
32578 }))
32579 }
32580 DialectType::Snowflake => {
32581 let range_end = Expression::Add(Box::new(BinaryOp::new(
32582 Expression::Paren(Box::new(crate::expressions::Paren {
32583 this: series_end,
32584 trailing_comments: Vec::new(),
32585 })),
32586 Expression::Literal(Box::new(Literal::Number("1".to_string()))),
32587 )));
32588 let gen_range = Expression::Function(Box::new(Function::new(
32589 "ARRAY_GENERATE_RANGE".to_string(),
32590 vec![
32591 Expression::Literal(Box::new(Literal::Number("0".to_string()))),
32592 range_end,
32593 ],
32594 )));
32595 let flatten_arg =
32596 Expression::NamedArgument(Box::new(crate::expressions::NamedArgument {
32597 name: Identifier::new("INPUT".to_string()),
32598 value: gen_range,
32599 separator: crate::expressions::NamedArgSeparator::DArrow,
32600 }));
32601 let flatten = Expression::Function(Box::new(Function::new(
32602 "FLATTEN".to_string(),
32603 vec![flatten_arg],
32604 )));
32605 Expression::Function(Box::new(Function::new("TABLE".to_string(), vec![flatten])))
32606 }
32607 _ => return None,
32608 };
32609
32610 // Build series alias expression
32611 let series_alias_expr = if use_table_aliases {
32612 let col_aliases = if matches!(target, DialectType::Snowflake) {
32613 vec![
32614 Identifier::new("seq".to_string()),
32615 Identifier::new("key".to_string()),
32616 Identifier::new("path".to_string()),
32617 Identifier::new("index".to_string()),
32618 Identifier::new(series_alias.clone()),
32619 Identifier::new("this".to_string()),
32620 ]
32621 } else {
32622 vec![Identifier::new(series_alias.clone())]
32623 };
32624 Expression::Alias(Box::new(Alias {
32625 this: series_unnest_expr,
32626 alias: Identifier::new(series_source_alias.clone()),
32627 column_aliases: col_aliases,
32628 alias_explicit_as: false,
32629 alias_keyword: None,
32630 pre_alias_comments: Vec::new(),
32631 trailing_comments: Vec::new(),
32632 inferred_type: None,
32633 }))
32634 } else {
32635 Expression::Alias(Box::new(Alias::new(
32636 series_unnest_expr,
32637 Identifier::new(series_alias.clone()),
32638 )))
32639 };
32640
32641 // Build CROSS JOINs for each UNNEST
32642 let mut joins = Vec::new();
32643 for info in &unnest_infos {
32644 let actual_col_name = info.has_outer_alias.as_ref().unwrap_or(&info.col_alias);
32645
32646 let unnest_join_expr = match target {
32647 DialectType::BigQuery => {
32648 // UNNEST([1,2,3]) AS col WITH OFFSET AS pos_2
32649 let unnest = UnnestFunc {
32650 this: info.arr_expr.clone(),
32651 expressions: Vec::new(),
32652 with_ordinality: true,
32653 alias: Some(Identifier::new(actual_col_name.clone())),
32654 offset_alias: Some(Identifier::new(info.pos_alias.clone())),
32655 };
32656 Expression::Unnest(Box::new(unnest))
32657 }
32658 DialectType::Presto | DialectType::Trino => {
32659 let unnest = UnnestFunc {
32660 this: info.arr_expr.clone(),
32661 expressions: Vec::new(),
32662 with_ordinality: true,
32663 alias: None,
32664 offset_alias: None,
32665 };
32666 Expression::Alias(Box::new(Alias {
32667 this: Expression::Unnest(Box::new(unnest)),
32668 alias: Identifier::new(info.source_alias.clone()),
32669 column_aliases: vec![
32670 Identifier::new(actual_col_name.clone()),
32671 Identifier::new(info.pos_alias.clone()),
32672 ],
32673 alias_explicit_as: false,
32674 alias_keyword: None,
32675 pre_alias_comments: Vec::new(),
32676 trailing_comments: Vec::new(),
32677 inferred_type: None,
32678 }))
32679 }
32680 DialectType::Snowflake => {
32681 let flatten_arg =
32682 Expression::NamedArgument(Box::new(crate::expressions::NamedArgument {
32683 name: Identifier::new("INPUT".to_string()),
32684 value: info.arr_expr.clone(),
32685 separator: crate::expressions::NamedArgSeparator::DArrow,
32686 }));
32687 let flatten = Expression::Function(Box::new(Function::new(
32688 "FLATTEN".to_string(),
32689 vec![flatten_arg],
32690 )));
32691 let table_fn = Expression::Function(Box::new(Function::new(
32692 "TABLE".to_string(),
32693 vec![flatten],
32694 )));
32695 Expression::Alias(Box::new(Alias {
32696 this: table_fn,
32697 alias: Identifier::new(info.source_alias.clone()),
32698 column_aliases: vec![
32699 Identifier::new("seq".to_string()),
32700 Identifier::new("key".to_string()),
32701 Identifier::new("path".to_string()),
32702 Identifier::new(info.pos_alias.clone()),
32703 Identifier::new(actual_col_name.clone()),
32704 Identifier::new("this".to_string()),
32705 ],
32706 alias_explicit_as: false,
32707 alias_keyword: None,
32708 pre_alias_comments: Vec::new(),
32709 trailing_comments: Vec::new(),
32710 inferred_type: None,
32711 }))
32712 }
32713 _ => return None,
32714 };
32715
32716 joins.push(make_join(unnest_join_expr));
32717 }
32718
32719 // Build WHERE clause
32720 let mut where_conditions: Vec<Expression> = Vec::new();
32721 for info in &unnest_infos {
32722 let src_ref = if use_table_aliases {
32723 Some(info.source_alias.as_str())
32724 } else {
32725 None
32726 };
32727 let pos_col = make_col(&series_alias, tbl_ref);
32728 let unnest_pos_col = make_col(&info.pos_alias, src_ref);
32729
32730 let arr_size = Expression::Function(Box::new(Function::new(
32731 array_length_func.to_string(),
32732 vec![info.arr_expr.clone()],
32733 )));
32734
32735 let size_ref = if index_offset == 0 {
32736 Expression::Paren(Box::new(crate::expressions::Paren {
32737 this: Expression::Sub(Box::new(BinaryOp::new(
32738 arr_size,
32739 Expression::Literal(Box::new(Literal::Number("1".to_string()))),
32740 ))),
32741 trailing_comments: Vec::new(),
32742 }))
32743 } else {
32744 arr_size
32745 };
32746
32747 let eq = Expression::Eq(Box::new(BinaryOp::new(
32748 pos_col.clone(),
32749 unnest_pos_col.clone(),
32750 )));
32751 let gt = Expression::Gt(Box::new(BinaryOp::new(pos_col, size_ref.clone())));
32752 let pos_eq_size = Expression::Eq(Box::new(BinaryOp::new(unnest_pos_col, size_ref)));
32753 let and_cond = Expression::And(Box::new(BinaryOp::new(gt, pos_eq_size)));
32754 let paren_and = Expression::Paren(Box::new(crate::expressions::Paren {
32755 this: and_cond,
32756 trailing_comments: Vec::new(),
32757 }));
32758 let or_cond = Expression::Or(Box::new(BinaryOp::new(eq, paren_and)));
32759
32760 where_conditions.push(or_cond);
32761 }
32762
32763 let where_expr = if where_conditions.len() == 1 {
32764 // Single condition: no parens needed
32765 where_conditions.into_iter().next().unwrap()
32766 } else {
32767 // Multiple conditions: wrap each OR in parens, then combine with AND
32768 let wrap = |e: Expression| {
32769 Expression::Paren(Box::new(crate::expressions::Paren {
32770 this: e,
32771 trailing_comments: Vec::new(),
32772 }))
32773 };
32774 let mut iter = where_conditions.into_iter();
32775 let first = wrap(iter.next().unwrap());
32776 let second = wrap(iter.next().unwrap());
32777 let mut combined = Expression::Paren(Box::new(crate::expressions::Paren {
32778 this: Expression::And(Box::new(BinaryOp::new(first, second))),
32779 trailing_comments: Vec::new(),
32780 }));
32781 for cond in iter {
32782 combined = Expression::And(Box::new(BinaryOp::new(combined, wrap(cond))));
32783 }
32784 combined
32785 };
32786
32787 // Build the new SELECT
32788 let mut new_select = select.clone();
32789 new_select.expressions = new_select_exprs;
32790
32791 if new_select.from.is_some() {
32792 let mut all_joins = vec![make_join(series_alias_expr)];
32793 all_joins.extend(joins);
32794 new_select.joins.extend(all_joins);
32795 } else {
32796 new_select.from = Some(From {
32797 expressions: vec![series_alias_expr],
32798 });
32799 new_select.joins.extend(joins);
32800 }
32801
32802 if let Some(ref existing_where) = new_select.where_clause {
32803 let combined = Expression::And(Box::new(BinaryOp::new(
32804 existing_where.this.clone(),
32805 where_expr,
32806 )));
32807 new_select.where_clause = Some(crate::expressions::Where { this: combined });
32808 } else {
32809 new_select.where_clause = Some(crate::expressions::Where { this: where_expr });
32810 }
32811
32812 Some(new_select)
32813 }
32814
32815 /// Helper to replace UNNEST(...) inside an expression with a replacement expression.
32816 fn replace_unnest_with_if(original: &Expression, replacement: &Expression) -> Expression {
32817 match original {
32818 Expression::Unnest(_) => replacement.clone(),
32819 Expression::Function(f) if f.name.eq_ignore_ascii_case("UNNEST") => replacement.clone(),
32820 Expression::Alias(a) => Self::replace_unnest_with_if(&a.this, replacement),
32821 Expression::Add(op) => {
32822 let left = Self::replace_unnest_with_if(&op.left, replacement);
32823 let right = Self::replace_unnest_with_if(&op.right, replacement);
32824 Expression::Add(Box::new(crate::expressions::BinaryOp::new(left, right)))
32825 }
32826 Expression::Sub(op) => {
32827 let left = Self::replace_unnest_with_if(&op.left, replacement);
32828 let right = Self::replace_unnest_with_if(&op.right, replacement);
32829 Expression::Sub(Box::new(crate::expressions::BinaryOp::new(left, right)))
32830 }
32831 Expression::Mul(op) => {
32832 let left = Self::replace_unnest_with_if(&op.left, replacement);
32833 let right = Self::replace_unnest_with_if(&op.right, replacement);
32834 Expression::Mul(Box::new(crate::expressions::BinaryOp::new(left, right)))
32835 }
32836 Expression::Div(op) => {
32837 let left = Self::replace_unnest_with_if(&op.left, replacement);
32838 let right = Self::replace_unnest_with_if(&op.right, replacement);
32839 Expression::Div(Box::new(crate::expressions::BinaryOp::new(left, right)))
32840 }
32841 _ => original.clone(),
32842 }
32843 }
32844
32845 /// Decompose a JSON path like `$.y[0].z` into individual parts: `["y", "0", "z"]`.
32846 /// Strips `$` prefix, handles bracket notation, quoted strings, and removes `[*]` wildcards.
32847 fn decompose_json_path(path: &str) -> Vec<String> {
32848 let mut parts = Vec::new();
32849 let path = if path.starts_with("$.") {
32850 &path[2..]
32851 } else if path.starts_with('$') {
32852 &path[1..]
32853 } else {
32854 path
32855 };
32856 if path.is_empty() {
32857 return parts;
32858 }
32859 let mut current = String::new();
32860 let chars: Vec<char> = path.chars().collect();
32861 let mut i = 0;
32862 while i < chars.len() {
32863 match chars[i] {
32864 '.' => {
32865 if !current.is_empty() {
32866 parts.push(current.clone());
32867 current.clear();
32868 }
32869 i += 1;
32870 }
32871 '[' => {
32872 if !current.is_empty() {
32873 parts.push(current.clone());
32874 current.clear();
32875 }
32876 i += 1;
32877 let mut bracket_content = String::new();
32878 while i < chars.len() && chars[i] != ']' {
32879 if chars[i] == '"' || chars[i] == '\'' {
32880 let quote = chars[i];
32881 i += 1;
32882 while i < chars.len() && chars[i] != quote {
32883 bracket_content.push(chars[i]);
32884 i += 1;
32885 }
32886 if i < chars.len() {
32887 i += 1;
32888 }
32889 } else {
32890 bracket_content.push(chars[i]);
32891 i += 1;
32892 }
32893 }
32894 if i < chars.len() {
32895 i += 1;
32896 }
32897 if bracket_content != "*" {
32898 parts.push(bracket_content);
32899 }
32900 }
32901 _ => {
32902 current.push(chars[i]);
32903 i += 1;
32904 }
32905 }
32906 }
32907 if !current.is_empty() {
32908 parts.push(current);
32909 }
32910 parts
32911 }
32912
32913 /// Strip `$` prefix from a JSON path, keeping the rest.
32914 /// `$.y[0].z` -> `y[0].z`, `$["a b"]` -> `["a b"]`
32915 fn strip_json_dollar_prefix(path: &str) -> String {
32916 if path.starts_with("$.") {
32917 path[2..].to_string()
32918 } else if path.starts_with('$') {
32919 path[1..].to_string()
32920 } else {
32921 path.to_string()
32922 }
32923 }
32924
32925 /// Strip `[*]` wildcards from a JSON path.
32926 /// `$.y[*]` -> `$.y`, `$.y[*].z` -> `$.y.z`
32927 fn strip_json_wildcards(path: &str) -> String {
32928 path.replace("[*]", "")
32929 .replace("..", ".") // Clean double dots from `$.y[*].z` -> `$.y..z`
32930 .trim_end_matches('.')
32931 .to_string()
32932 }
32933
32934 /// Convert bracket notation to dot notation for JSON paths.
32935 /// `$["a b"]` -> `$."a b"`, `$["key"]` -> `$.key`
32936 fn bracket_to_dot_notation(path: &str) -> String {
32937 let mut result = String::new();
32938 let chars: Vec<char> = path.chars().collect();
32939 let mut i = 0;
32940 while i < chars.len() {
32941 if chars[i] == '[' {
32942 // Read bracket content
32943 i += 1;
32944 let mut bracket_content = String::new();
32945 let mut is_quoted = false;
32946 let mut _quote_char = '"';
32947 while i < chars.len() && chars[i] != ']' {
32948 if chars[i] == '"' || chars[i] == '\'' {
32949 is_quoted = true;
32950 _quote_char = chars[i];
32951 i += 1;
32952 while i < chars.len() && chars[i] != _quote_char {
32953 bracket_content.push(chars[i]);
32954 i += 1;
32955 }
32956 if i < chars.len() {
32957 i += 1;
32958 }
32959 } else {
32960 bracket_content.push(chars[i]);
32961 i += 1;
32962 }
32963 }
32964 if i < chars.len() {
32965 i += 1;
32966 } // skip ]
32967 if bracket_content == "*" {
32968 // Keep wildcard as-is
32969 result.push_str("[*]");
32970 } else if is_quoted {
32971 // Quoted bracket -> dot notation with quotes
32972 result.push('.');
32973 result.push('"');
32974 result.push_str(&bracket_content);
32975 result.push('"');
32976 } else {
32977 // Numeric index -> keep as bracket
32978 result.push('[');
32979 result.push_str(&bracket_content);
32980 result.push(']');
32981 }
32982 } else {
32983 result.push(chars[i]);
32984 i += 1;
32985 }
32986 }
32987 result
32988 }
32989
32990 /// Convert JSON path bracket quoted strings to use single quotes instead of double quotes.
32991 /// `$["a b"]` -> `$['a b']`
32992 fn bracket_to_single_quotes(path: &str) -> String {
32993 let mut result = String::new();
32994 let chars: Vec<char> = path.chars().collect();
32995 let mut i = 0;
32996 while i < chars.len() {
32997 if chars[i] == '[' && i + 1 < chars.len() && chars[i + 1] == '"' {
32998 result.push('[');
32999 result.push('\'');
33000 i += 2; // skip [ and "
33001 while i < chars.len() && chars[i] != '"' {
33002 result.push(chars[i]);
33003 i += 1;
33004 }
33005 if i < chars.len() {
33006 i += 1;
33007 } // skip closing "
33008 result.push('\'');
33009 } else {
33010 result.push(chars[i]);
33011 i += 1;
33012 }
33013 }
33014 result
33015 }
33016
33017 /// Transform TSQL SELECT INTO -> CREATE TABLE AS for DuckDB/Snowflake
33018 /// or PostgreSQL #temp -> TEMPORARY.
33019 /// Also strips # from INSERT INTO #table for non-TSQL targets.
33020 fn transform_select_into(
33021 expr: Expression,
33022 _source: DialectType,
33023 target: DialectType,
33024 ) -> Expression {
33025 use crate::expressions::{CreateTable, Expression, TableRef};
33026
33027 // Handle INSERT INTO #temp -> INSERT INTO temp for non-TSQL targets
33028 if let Expression::Insert(ref insert) = expr {
33029 if insert.table.name.name.starts_with('#')
33030 && !matches!(target, DialectType::TSQL | DialectType::Fabric)
33031 {
33032 let mut new_insert = insert.clone();
33033 new_insert.table.name.name =
33034 insert.table.name.name.trim_start_matches('#').to_string();
33035 return Expression::Insert(new_insert);
33036 }
33037 return expr;
33038 }
33039
33040 if let Expression::Select(ref select) = expr {
33041 if let Some(ref into) = select.into {
33042 let table_name_raw = match &into.this {
33043 Expression::Table(tr) => tr.name.name.clone(),
33044 Expression::Identifier(id) => id.name.clone(),
33045 _ => String::new(),
33046 };
33047 let is_temp = table_name_raw.starts_with('#') || into.temporary;
33048 let clean_name = table_name_raw.trim_start_matches('#').to_string();
33049
33050 match target {
33051 DialectType::DuckDB | DialectType::Snowflake => {
33052 // SELECT INTO -> CREATE TABLE AS SELECT
33053 let mut new_select = select.clone();
33054 new_select.into = None;
33055 let ct = CreateTable {
33056 name: TableRef::new(clean_name),
33057 on_cluster: None,
33058 columns: Vec::new(),
33059 constraints: Vec::new(),
33060 if_not_exists: false,
33061 temporary: is_temp,
33062 or_replace: false,
33063 table_modifier: None,
33064 as_select: Some(Expression::Select(new_select)),
33065 as_select_parenthesized: false,
33066 on_commit: None,
33067 clone_source: None,
33068 clone_at_clause: None,
33069 shallow_clone: false,
33070 deep_clone: false,
33071 is_copy: false,
33072 leading_comments: Vec::new(),
33073 with_properties: Vec::new(),
33074 teradata_post_name_options: Vec::new(),
33075 with_data: None,
33076 with_statistics: None,
33077 teradata_indexes: Vec::new(),
33078 with_cte: None,
33079 properties: Vec::new(),
33080 partition_of: None,
33081 post_table_properties: Vec::new(),
33082 mysql_table_options: Vec::new(),
33083 inherits: Vec::new(),
33084 on_property: None,
33085 copy_grants: false,
33086 using_template: None,
33087 rollup: None,
33088 uuid: None,
33089 with_partition_columns: Vec::new(),
33090 with_connection: None,
33091 };
33092 return Expression::CreateTable(Box::new(ct));
33093 }
33094 DialectType::PostgreSQL | DialectType::Redshift => {
33095 // PostgreSQL: #foo -> INTO TEMPORARY foo
33096 if is_temp && !into.temporary {
33097 let mut new_select = select.clone();
33098 let mut new_into = into.clone();
33099 new_into.temporary = true;
33100 new_into.unlogged = false;
33101 new_into.this = Expression::Table(Box::new(TableRef::new(clean_name)));
33102 new_select.into = Some(new_into);
33103 Expression::Select(new_select)
33104 } else {
33105 expr
33106 }
33107 }
33108 _ => expr,
33109 }
33110 } else {
33111 expr
33112 }
33113 } else {
33114 expr
33115 }
33116 }
33117
33118 /// Transform CREATE TABLE WITH properties for cross-dialect transpilation.
33119 /// Handles FORMAT, PARTITIONED_BY, and other Presto WITH properties.
33120 fn transform_create_table_properties(
33121 ct: &mut crate::expressions::CreateTable,
33122 _source: DialectType,
33123 target: DialectType,
33124 ) {
33125 use crate::expressions::{
33126 BinaryOp, BooleanLiteral, Expression, FileFormatProperty, Identifier, Literal,
33127 Properties,
33128 };
33129
33130 // Helper to convert a raw property value string to the correct Expression
33131 let value_to_expr = |v: &str| -> Expression {
33132 let trimmed = v.trim();
33133 // Check if it's a quoted string (starts and ends with ')
33134 if trimmed.starts_with('\'') && trimmed.ends_with('\'') {
33135 Expression::Literal(Box::new(Literal::String(
33136 trimmed[1..trimmed.len() - 1].to_string(),
33137 )))
33138 }
33139 // Check if it's a number
33140 else if trimmed.parse::<i64>().is_ok() || trimmed.parse::<f64>().is_ok() {
33141 Expression::Literal(Box::new(Literal::Number(trimmed.to_string())))
33142 }
33143 // Check if it's ARRAY[...] or ARRAY(...)
33144 else if trimmed.len() >= 5 && trimmed[..5].eq_ignore_ascii_case("ARRAY") {
33145 // Convert ARRAY['y'] to ARRAY('y') for Hive/Spark
33146 let inner = trimmed
33147 .trim_start_matches(|c: char| c.is_alphabetic()) // Remove ARRAY
33148 .trim_start_matches('[')
33149 .trim_start_matches('(')
33150 .trim_end_matches(']')
33151 .trim_end_matches(')');
33152 let elements: Vec<Expression> = inner
33153 .split(',')
33154 .map(|e| {
33155 let elem = e.trim().trim_matches('\'');
33156 Expression::Literal(Box::new(Literal::String(elem.to_string())))
33157 })
33158 .collect();
33159 Expression::Function(Box::new(crate::expressions::Function::new(
33160 "ARRAY".to_string(),
33161 elements,
33162 )))
33163 }
33164 // Otherwise, just output as identifier (unquoted)
33165 else {
33166 Expression::Identifier(Identifier::new(trimmed.to_string()))
33167 }
33168 };
33169
33170 if ct.with_properties.is_empty() && ct.properties.is_empty() {
33171 return;
33172 }
33173
33174 // Handle Presto-style WITH properties
33175 if !ct.with_properties.is_empty() {
33176 // Extract FORMAT property and remaining properties
33177 let mut format_value: Option<String> = None;
33178 let mut partitioned_by: Option<String> = None;
33179 let mut other_props: Vec<(String, String)> = Vec::new();
33180
33181 for (key, value) in ct.with_properties.drain(..) {
33182 if key.eq_ignore_ascii_case("FORMAT") {
33183 // Strip surrounding quotes from value if present
33184 format_value = Some(value.trim_matches('\'').to_string());
33185 } else if key.eq_ignore_ascii_case("PARTITIONED_BY") {
33186 partitioned_by = Some(value);
33187 } else {
33188 other_props.push((key, value));
33189 }
33190 }
33191
33192 match target {
33193 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
33194 // Presto: keep WITH properties but lowercase 'format' key
33195 if let Some(fmt) = format_value {
33196 ct.with_properties
33197 .push(("format".to_string(), format!("'{}'", fmt)));
33198 }
33199 if let Some(part) = partitioned_by {
33200 // Convert (col1, col2) to ARRAY['col1', 'col2'] format
33201 let trimmed = part.trim();
33202 let inner = trimmed.trim_start_matches('(').trim_end_matches(')');
33203 // Also handle ARRAY['...'] format - keep as-is
33204 if trimmed.len() >= 5 && trimmed[..5].eq_ignore_ascii_case("ARRAY") {
33205 ct.with_properties
33206 .push(("PARTITIONED_BY".to_string(), part));
33207 } else {
33208 // Parse column names from the parenthesized list
33209 let cols: Vec<&str> = inner
33210 .split(',')
33211 .map(|c| c.trim().trim_matches('"').trim_matches('\''))
33212 .collect();
33213 let array_val = format!(
33214 "ARRAY[{}]",
33215 cols.iter()
33216 .map(|c| format!("'{}'", c))
33217 .collect::<Vec<_>>()
33218 .join(", ")
33219 );
33220 ct.with_properties
33221 .push(("PARTITIONED_BY".to_string(), array_val));
33222 }
33223 }
33224 ct.with_properties.extend(other_props);
33225 }
33226 DialectType::Hive => {
33227 // Hive: FORMAT -> STORED AS, other props -> TBLPROPERTIES
33228 if let Some(fmt) = format_value {
33229 ct.properties.push(Expression::FileFormatProperty(Box::new(
33230 FileFormatProperty {
33231 this: Some(Box::new(Expression::Identifier(Identifier::new(fmt)))),
33232 expressions: vec![],
33233 hive_format: Some(Box::new(Expression::Boolean(BooleanLiteral {
33234 value: true,
33235 }))),
33236 },
33237 )));
33238 }
33239 if let Some(_part) = partitioned_by {
33240 // PARTITIONED_BY handling is complex - move columns to partitioned by
33241 // For now, the partition columns are extracted from the column list
33242 Self::apply_partitioned_by(ct, &_part, target);
33243 }
33244 if !other_props.is_empty() {
33245 let eq_exprs: Vec<Expression> = other_props
33246 .into_iter()
33247 .map(|(k, v)| {
33248 Expression::Eq(Box::new(BinaryOp::new(
33249 Expression::Literal(Box::new(Literal::String(k))),
33250 value_to_expr(&v),
33251 )))
33252 })
33253 .collect();
33254 ct.properties
33255 .push(Expression::Properties(Box::new(Properties {
33256 expressions: eq_exprs,
33257 })));
33258 }
33259 }
33260 DialectType::Spark | DialectType::Databricks => {
33261 // Spark: FORMAT -> USING, other props -> TBLPROPERTIES
33262 if let Some(fmt) = format_value {
33263 ct.properties.push(Expression::FileFormatProperty(Box::new(
33264 FileFormatProperty {
33265 this: Some(Box::new(Expression::Identifier(Identifier::new(fmt)))),
33266 expressions: vec![],
33267 hive_format: None, // None means USING syntax
33268 },
33269 )));
33270 }
33271 if let Some(_part) = partitioned_by {
33272 Self::apply_partitioned_by(ct, &_part, target);
33273 }
33274 if !other_props.is_empty() {
33275 let eq_exprs: Vec<Expression> = other_props
33276 .into_iter()
33277 .map(|(k, v)| {
33278 Expression::Eq(Box::new(BinaryOp::new(
33279 Expression::Literal(Box::new(Literal::String(k))),
33280 value_to_expr(&v),
33281 )))
33282 })
33283 .collect();
33284 ct.properties
33285 .push(Expression::Properties(Box::new(Properties {
33286 expressions: eq_exprs,
33287 })));
33288 }
33289 }
33290 DialectType::DuckDB => {
33291 // DuckDB: strip all WITH properties (FORMAT, PARTITIONED_BY, etc.)
33292 // Keep nothing
33293 }
33294 _ => {
33295 // For other dialects, keep WITH properties as-is
33296 if let Some(fmt) = format_value {
33297 ct.with_properties
33298 .push(("FORMAT".to_string(), format!("'{}'", fmt)));
33299 }
33300 if let Some(part) = partitioned_by {
33301 ct.with_properties
33302 .push(("PARTITIONED_BY".to_string(), part));
33303 }
33304 ct.with_properties.extend(other_props);
33305 }
33306 }
33307 }
33308
33309 // Handle STORED AS 'PARQUET' (quoted format name) -> STORED AS PARQUET (unquoted)
33310 // and Hive STORED AS -> Presto WITH (format=...) conversion
33311 if !ct.properties.is_empty() {
33312 let is_presto_target = matches!(
33313 target,
33314 DialectType::Presto | DialectType::Trino | DialectType::Athena
33315 );
33316 let is_duckdb_target = matches!(target, DialectType::DuckDB);
33317
33318 if is_presto_target || is_duckdb_target {
33319 let mut new_properties = Vec::new();
33320 for prop in ct.properties.drain(..) {
33321 match &prop {
33322 Expression::FileFormatProperty(ffp) => {
33323 if is_presto_target {
33324 // Convert STORED AS/USING to WITH (format=...)
33325 if let Some(ref fmt_expr) = ffp.this {
33326 let fmt_str = match fmt_expr.as_ref() {
33327 Expression::Identifier(id) => id.name.clone(),
33328 Expression::Literal(lit)
33329 if matches!(lit.as_ref(), Literal::String(_)) =>
33330 {
33331 let Literal::String(s) = lit.as_ref() else {
33332 unreachable!()
33333 };
33334 s.clone()
33335 }
33336 _ => {
33337 new_properties.push(prop);
33338 continue;
33339 }
33340 };
33341 ct.with_properties
33342 .push(("format".to_string(), format!("'{}'", fmt_str)));
33343 }
33344 }
33345 // DuckDB: just strip file format properties
33346 }
33347 // Convert TBLPROPERTIES to WITH properties for Presto target
33348 Expression::Properties(props) if is_presto_target => {
33349 for expr in &props.expressions {
33350 if let Expression::Eq(eq) = expr {
33351 // Extract key and value from the Eq expression
33352 let key = match &eq.left {
33353 Expression::Literal(lit)
33354 if matches!(lit.as_ref(), Literal::String(_)) =>
33355 {
33356 let Literal::String(s) = lit.as_ref() else {
33357 unreachable!()
33358 };
33359 s.clone()
33360 }
33361 Expression::Identifier(id) => id.name.clone(),
33362 _ => continue,
33363 };
33364 let value = match &eq.right {
33365 Expression::Literal(lit)
33366 if matches!(lit.as_ref(), Literal::String(_)) =>
33367 {
33368 let Literal::String(s) = lit.as_ref() else {
33369 unreachable!()
33370 };
33371 format!("'{}'", s)
33372 }
33373 Expression::Literal(lit)
33374 if matches!(lit.as_ref(), Literal::Number(_)) =>
33375 {
33376 let Literal::Number(n) = lit.as_ref() else {
33377 unreachable!()
33378 };
33379 n.clone()
33380 }
33381 Expression::Identifier(id) => id.name.clone(),
33382 _ => continue,
33383 };
33384 ct.with_properties.push((key, value));
33385 }
33386 }
33387 }
33388 // Convert PartitionedByProperty for Presto target
33389 Expression::PartitionedByProperty(ref pbp) if is_presto_target => {
33390 // Check if it contains ColumnDef expressions (Hive-style with types)
33391 if let Expression::Tuple(ref tuple) = *pbp.this {
33392 let mut col_names: Vec<String> = Vec::new();
33393 let mut col_defs: Vec<crate::expressions::ColumnDef> = Vec::new();
33394 let mut has_col_defs = false;
33395 for expr in &tuple.expressions {
33396 if let Expression::ColumnDef(ref cd) = expr {
33397 has_col_defs = true;
33398 col_names.push(cd.name.name.clone());
33399 col_defs.push(*cd.clone());
33400 } else if let Expression::Column(ref col) = expr {
33401 col_names.push(col.name.name.clone());
33402 } else if let Expression::Identifier(ref id) = expr {
33403 col_names.push(id.name.clone());
33404 } else {
33405 // For function expressions like MONTHS(y), serialize to SQL
33406 let generic = Dialect::get(DialectType::Generic);
33407 if let Ok(sql) = generic.generate(expr) {
33408 col_names.push(sql);
33409 }
33410 }
33411 }
33412 if has_col_defs {
33413 // Merge partition column defs into the main column list
33414 for cd in col_defs {
33415 ct.columns.push(cd);
33416 }
33417 }
33418 if !col_names.is_empty() {
33419 // Add PARTITIONED_BY property
33420 let array_val = format!(
33421 "ARRAY[{}]",
33422 col_names
33423 .iter()
33424 .map(|n| format!("'{}'", n))
33425 .collect::<Vec<_>>()
33426 .join(", ")
33427 );
33428 ct.with_properties
33429 .push(("PARTITIONED_BY".to_string(), array_val));
33430 }
33431 }
33432 // Skip - don't keep in properties
33433 }
33434 _ => {
33435 if !is_duckdb_target {
33436 new_properties.push(prop);
33437 }
33438 }
33439 }
33440 }
33441 ct.properties = new_properties;
33442 } else {
33443 // For Hive/Spark targets, unquote format names in STORED AS
33444 for prop in &mut ct.properties {
33445 if let Expression::FileFormatProperty(ref mut ffp) = prop {
33446 if let Some(ref mut fmt_expr) = ffp.this {
33447 if let Expression::Literal(lit) = fmt_expr.as_ref() {
33448 if let Literal::String(s) = lit.as_ref() {
33449 // Convert STORED AS 'PARQUET' to STORED AS PARQUET (unquote)
33450 let unquoted = s.clone();
33451 *fmt_expr =
33452 Box::new(Expression::Identifier(Identifier::new(unquoted)));
33453 }
33454 }
33455 }
33456 }
33457 }
33458 }
33459 }
33460 }
33461
33462 /// Apply PARTITIONED_BY conversion: move partition columns from column list to PARTITIONED BY
33463 fn apply_partitioned_by(
33464 ct: &mut crate::expressions::CreateTable,
33465 partitioned_by_value: &str,
33466 target: DialectType,
33467 ) {
33468 use crate::expressions::{Column, Expression, Identifier, PartitionedByProperty, Tuple};
33469
33470 // Parse the ARRAY['col1', 'col2'] value to extract column names
33471 let mut col_names: Vec<String> = Vec::new();
33472 // The value looks like ARRAY['y', 'z'] or ARRAY('y', 'z')
33473 let inner = partitioned_by_value
33474 .trim()
33475 .trim_start_matches("ARRAY")
33476 .trim_start_matches('[')
33477 .trim_start_matches('(')
33478 .trim_end_matches(']')
33479 .trim_end_matches(')');
33480 for part in inner.split(',') {
33481 let col = part.trim().trim_matches('\'').trim_matches('"');
33482 if !col.is_empty() {
33483 col_names.push(col.to_string());
33484 }
33485 }
33486
33487 if col_names.is_empty() {
33488 return;
33489 }
33490
33491 if matches!(target, DialectType::Hive) {
33492 // Hive: PARTITIONED BY (col_name type, ...) - move columns out of column list
33493 let mut partition_col_defs = Vec::new();
33494 for col_name in &col_names {
33495 // Find and remove from columns
33496 if let Some(pos) = ct
33497 .columns
33498 .iter()
33499 .position(|c| c.name.name.eq_ignore_ascii_case(col_name))
33500 {
33501 let col_def = ct.columns.remove(pos);
33502 partition_col_defs.push(Expression::ColumnDef(Box::new(col_def)));
33503 }
33504 }
33505 if !partition_col_defs.is_empty() {
33506 ct.properties
33507 .push(Expression::PartitionedByProperty(Box::new(
33508 PartitionedByProperty {
33509 this: Box::new(Expression::Tuple(Box::new(Tuple {
33510 expressions: partition_col_defs,
33511 }))),
33512 },
33513 )));
33514 }
33515 } else if matches!(target, DialectType::Spark | DialectType::Databricks) {
33516 // Spark: PARTITIONED BY (col1, col2) - just column names, keep in column list
33517 // Use quoted identifiers to match the quoting style of the original column definitions
33518 let partition_exprs: Vec<Expression> = col_names
33519 .iter()
33520 .map(|name| {
33521 // Check if the column exists in the column list and use its quoting
33522 let is_quoted = ct
33523 .columns
33524 .iter()
33525 .any(|c| c.name.name.eq_ignore_ascii_case(name) && c.name.quoted);
33526 let ident = if is_quoted {
33527 Identifier::quoted(name.clone())
33528 } else {
33529 Identifier::new(name.clone())
33530 };
33531 Expression::boxed_column(Column {
33532 name: ident,
33533 table: None,
33534 join_mark: false,
33535 trailing_comments: Vec::new(),
33536 span: None,
33537 inferred_type: None,
33538 })
33539 })
33540 .collect();
33541 ct.properties
33542 .push(Expression::PartitionedByProperty(Box::new(
33543 PartitionedByProperty {
33544 this: Box::new(Expression::Tuple(Box::new(Tuple {
33545 expressions: partition_exprs,
33546 }))),
33547 },
33548 )));
33549 }
33550 // DuckDB: strip partitioned_by entirely (already handled)
33551 }
33552
33553 /// Convert a DataType to Spark's type string format (using angle brackets)
33554 fn data_type_to_spark_string(dt: &crate::expressions::DataType) -> String {
33555 use crate::expressions::DataType;
33556 match dt {
33557 DataType::Int { .. } => "INT".to_string(),
33558 DataType::BigInt { .. } => "BIGINT".to_string(),
33559 DataType::SmallInt { .. } => "SMALLINT".to_string(),
33560 DataType::TinyInt { .. } => "TINYINT".to_string(),
33561 DataType::Float { .. } => "FLOAT".to_string(),
33562 DataType::Double { .. } => "DOUBLE".to_string(),
33563 DataType::Decimal {
33564 precision: Some(p),
33565 scale: Some(s),
33566 } => format!("DECIMAL({}, {})", p, s),
33567 DataType::Decimal {
33568 precision: Some(p), ..
33569 } => format!("DECIMAL({})", p),
33570 DataType::Decimal { .. } => "DECIMAL".to_string(),
33571 DataType::VarChar { .. } | DataType::Text | DataType::String { .. } => {
33572 "STRING".to_string()
33573 }
33574 DataType::Char { .. } => "STRING".to_string(),
33575 DataType::Boolean => "BOOLEAN".to_string(),
33576 DataType::Date => "DATE".to_string(),
33577 DataType::Timestamp { .. } => "TIMESTAMP".to_string(),
33578 DataType::Json | DataType::JsonB => "STRING".to_string(),
33579 DataType::Binary { .. } => "BINARY".to_string(),
33580 DataType::Array { element_type, .. } => {
33581 format!("ARRAY<{}>", Self::data_type_to_spark_string(element_type))
33582 }
33583 DataType::Map {
33584 key_type,
33585 value_type,
33586 } => format!(
33587 "MAP<{}, {}>",
33588 Self::data_type_to_spark_string(key_type),
33589 Self::data_type_to_spark_string(value_type)
33590 ),
33591 DataType::Struct { fields, .. } => {
33592 let field_strs: Vec<String> = fields
33593 .iter()
33594 .map(|f| {
33595 if f.name.is_empty() {
33596 Self::data_type_to_spark_string(&f.data_type)
33597 } else {
33598 format!(
33599 "{}: {}",
33600 f.name,
33601 Self::data_type_to_spark_string(&f.data_type)
33602 )
33603 }
33604 })
33605 .collect();
33606 format!("STRUCT<{}>", field_strs.join(", "))
33607 }
33608 DataType::Custom { name } => name.clone(),
33609 _ => format!("{:?}", dt),
33610 }
33611 }
33612
33613 /// Extract value and unit from an Interval expression
33614 /// Returns (value_expression, IntervalUnit)
33615 fn extract_interval_parts(
33616 interval_expr: &Expression,
33617 ) -> Option<(Expression, crate::expressions::IntervalUnit)> {
33618 use crate::expressions::{DataType, IntervalUnit, IntervalUnitSpec, Literal};
33619
33620 fn unit_from_str(unit: &str) -> Option<IntervalUnit> {
33621 match unit.trim().to_ascii_uppercase().as_str() {
33622 "YEAR" | "YEARS" => Some(IntervalUnit::Year),
33623 "QUARTER" | "QUARTERS" => Some(IntervalUnit::Quarter),
33624 "MONTH" | "MONTHS" | "MON" | "MONS" | "MM" => Some(IntervalUnit::Month),
33625 "WEEK" | "WEEKS" | "ISOWEEK" => Some(IntervalUnit::Week),
33626 "DAY" | "DAYS" => Some(IntervalUnit::Day),
33627 "HOUR" | "HOURS" => Some(IntervalUnit::Hour),
33628 "MINUTE" | "MINUTES" => Some(IntervalUnit::Minute),
33629 "SECOND" | "SECONDS" => Some(IntervalUnit::Second),
33630 "MILLISECOND" | "MILLISECONDS" => Some(IntervalUnit::Millisecond),
33631 "MICROSECOND" | "MICROSECONDS" => Some(IntervalUnit::Microsecond),
33632 "NANOSECOND" | "NANOSECONDS" => Some(IntervalUnit::Nanosecond),
33633 _ => None,
33634 }
33635 }
33636
33637 fn parts_from_literal_string(s: &str) -> Option<(Expression, IntervalUnit)> {
33638 let mut parts = s.split_whitespace();
33639 let value = parts.next()?;
33640 let unit = unit_from_str(parts.next()?)?;
33641 Some((
33642 Expression::Literal(Box::new(Literal::String(value.to_string()))),
33643 unit,
33644 ))
33645 }
33646
33647 fn unit_from_spec(unit: &IntervalUnitSpec) -> Option<IntervalUnit> {
33648 match unit {
33649 IntervalUnitSpec::Simple { unit, .. } => Some(*unit),
33650 IntervalUnitSpec::Expr(expr) => match expr.as_ref() {
33651 Expression::Day(_) => Some(IntervalUnit::Day),
33652 Expression::Month(_) => Some(IntervalUnit::Month),
33653 Expression::Year(_) => Some(IntervalUnit::Year),
33654 Expression::Identifier(id) => unit_from_str(&id.name),
33655 Expression::Var(v) => unit_from_str(&v.this),
33656 Expression::Column(col) => unit_from_str(&col.name.name),
33657 _ => None,
33658 },
33659 _ => None,
33660 }
33661 }
33662
33663 match interval_expr {
33664 Expression::Interval(iv) => {
33665 let val = iv.this.clone().unwrap_or(Expression::number(0));
33666 if let Expression::Literal(lit) = &val {
33667 if let Literal::String(s) = lit.as_ref() {
33668 if let Some(parts) = parts_from_literal_string(s) {
33669 return Some(parts);
33670 }
33671 }
33672 }
33673 let unit = iv
33674 .unit
33675 .as_ref()
33676 .and_then(unit_from_spec)
33677 .unwrap_or(IntervalUnit::Day);
33678 Some((val, unit))
33679 }
33680 Expression::Cast(cast) if matches!(cast.to, DataType::Interval { .. }) => {
33681 if let Expression::Literal(lit) = &cast.this {
33682 if let Literal::String(s) = lit.as_ref() {
33683 if let Some(parts) = parts_from_literal_string(s) {
33684 return Some(parts);
33685 }
33686 }
33687 }
33688 let unit = match &cast.to {
33689 DataType::Interval {
33690 unit: Some(unit), ..
33691 } => unit_from_str(unit).unwrap_or(IntervalUnit::Day),
33692 _ => IntervalUnit::Day,
33693 };
33694 Some((cast.this.clone(), unit))
33695 }
33696 _ => None,
33697 }
33698 }
33699
33700 fn rewrite_tsql_interval_arithmetic(expr: &Expression) -> Option<Expression> {
33701 match expr {
33702 Expression::Add(op) => {
33703 Self::extract_interval_parts(&op.right)?;
33704 Some(Self::build_tsql_dateadd_from_interval(
33705 op.left.clone(),
33706 &op.right,
33707 false,
33708 ))
33709 }
33710 Expression::Sub(op) => {
33711 Self::extract_interval_parts(&op.right)?;
33712 Some(Self::build_tsql_dateadd_from_interval(
33713 op.left.clone(),
33714 &op.right,
33715 true,
33716 ))
33717 }
33718 _ => None,
33719 }
33720 }
33721
33722 fn build_tsql_dateadd_from_interval(
33723 date: Expression,
33724 interval: &Expression,
33725 subtract: bool,
33726 ) -> Expression {
33727 let (value, unit) = Self::extract_interval_parts(interval)
33728 .unwrap_or_else(|| (interval.clone(), crate::expressions::IntervalUnit::Day));
33729 let unit = Self::interval_unit_to_string(&unit);
33730 let amount = Self::tsql_dateadd_amount(value, subtract);
33731
33732 Expression::Function(Box::new(Function::new(
33733 "DATEADD".to_string(),
33734 vec![Expression::Identifier(Identifier::new(unit)), amount, date],
33735 )))
33736 }
33737
33738 fn tsql_dateadd_amount(value: Expression, negate: bool) -> Expression {
33739 use crate::expressions::{Parameter, ParameterStyle, UnaryOp};
33740
33741 fn numeric_literal_value(value: &Expression) -> Option<&str> {
33742 match value {
33743 Expression::Literal(lit) => match lit.as_ref() {
33744 crate::expressions::Literal::Number(n)
33745 | crate::expressions::Literal::String(n) => Some(n.as_str()),
33746 _ => None,
33747 },
33748 _ => None,
33749 }
33750 }
33751
33752 fn colon_parameter(value: &Expression) -> Option<Expression> {
33753 let Expression::Literal(lit) = value else {
33754 return None;
33755 };
33756 let crate::expressions::Literal::String(s) = lit.as_ref() else {
33757 return None;
33758 };
33759 let name = s.strip_prefix(':')?;
33760 if name.is_empty()
33761 || !name
33762 .chars()
33763 .all(|ch| ch.is_ascii_alphanumeric() || ch == '_')
33764 {
33765 return None;
33766 }
33767
33768 Some(Expression::Parameter(Box::new(Parameter {
33769 name: if name.chars().all(|ch| ch.is_ascii_digit()) {
33770 None
33771 } else {
33772 Some(name.to_string())
33773 },
33774 index: name.parse::<u32>().ok(),
33775 style: ParameterStyle::Colon,
33776 quoted: false,
33777 string_quoted: false,
33778 expression: None,
33779 })))
33780 }
33781
33782 let value = colon_parameter(&value).unwrap_or(value);
33783
33784 if let Some(n) = numeric_literal_value(&value) {
33785 if let Ok(parsed) = n.parse::<f64>() {
33786 let normalized = if negate { -parsed } else { parsed };
33787 let rendered = if normalized.fract() == 0.0 {
33788 format!("{}", normalized as i64)
33789 } else {
33790 normalized.to_string()
33791 };
33792 return Expression::Literal(Box::new(crate::expressions::Literal::Number(
33793 rendered,
33794 )));
33795 }
33796 }
33797
33798 if !negate {
33799 return value;
33800 }
33801
33802 match value {
33803 Expression::Neg(op) => op.this,
33804 other => Expression::Neg(Box::new(UnaryOp {
33805 this: other,
33806 inferred_type: None,
33807 })),
33808 }
33809 }
33810
33811 /// Normalize BigQuery-specific functions to standard forms that target dialects can handle
33812 fn normalize_bigquery_function(
33813 e: Expression,
33814 source: DialectType,
33815 target: DialectType,
33816 ) -> Result<Expression> {
33817 use crate::expressions::{BinaryOp, Cast, DataType, Function, Identifier, Literal, Paren};
33818
33819 let f = if let Expression::Function(f) = e {
33820 *f
33821 } else {
33822 return Ok(e);
33823 };
33824 let name = f.name.to_ascii_uppercase();
33825 let mut args = f.args;
33826
33827 /// Helper to extract unit string from an identifier, column, or literal expression
33828 fn get_unit_str(expr: &Expression) -> String {
33829 match expr {
33830 Expression::Identifier(id) => id.name.to_ascii_uppercase(),
33831 Expression::Var(v) => v.this.to_ascii_uppercase(),
33832 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)) => {
33833 let Literal::String(s) = lit.as_ref() else {
33834 unreachable!()
33835 };
33836 s.to_ascii_uppercase()
33837 }
33838 Expression::Column(col) => col.name.name.to_ascii_uppercase(),
33839 // Handle WEEK(MONDAY), WEEK(SUNDAY) etc. which are parsed as Function("WEEK", [Column("MONDAY")])
33840 Expression::Function(f) => {
33841 let base = f.name.to_ascii_uppercase();
33842 if !f.args.is_empty() {
33843 // e.g., WEEK(MONDAY) -> "WEEK(MONDAY)"
33844 let inner = get_unit_str(&f.args[0]);
33845 format!("{}({})", base, inner)
33846 } else {
33847 base
33848 }
33849 }
33850 _ => "DAY".to_string(),
33851 }
33852 }
33853
33854 /// Parse unit string to IntervalUnit
33855 fn parse_interval_unit(s: &str) -> crate::expressions::IntervalUnit {
33856 match s {
33857 "YEAR" => crate::expressions::IntervalUnit::Year,
33858 "QUARTER" => crate::expressions::IntervalUnit::Quarter,
33859 "MONTH" => crate::expressions::IntervalUnit::Month,
33860 "WEEK" | "ISOWEEK" => crate::expressions::IntervalUnit::Week,
33861 "DAY" => crate::expressions::IntervalUnit::Day,
33862 "HOUR" => crate::expressions::IntervalUnit::Hour,
33863 "MINUTE" => crate::expressions::IntervalUnit::Minute,
33864 "SECOND" => crate::expressions::IntervalUnit::Second,
33865 "MILLISECOND" => crate::expressions::IntervalUnit::Millisecond,
33866 "MICROSECOND" => crate::expressions::IntervalUnit::Microsecond,
33867 _ if s.starts_with("WEEK(") => crate::expressions::IntervalUnit::Week,
33868 _ => crate::expressions::IntervalUnit::Day,
33869 }
33870 }
33871
33872 match name.as_str() {
33873 // TIMESTAMP_DIFF(date1, date2, unit) -> TIMESTAMPDIFF(unit, date2, date1)
33874 // (BigQuery: result = date1 - date2, Standard: result = end - start)
33875 "TIMESTAMP_DIFF" | "DATETIME_DIFF" | "TIME_DIFF" if args.len() == 3 => {
33876 let date1 = args.remove(0);
33877 let date2 = args.remove(0);
33878 let unit_expr = args.remove(0);
33879 let unit_str = get_unit_str(&unit_expr);
33880
33881 if matches!(target, DialectType::BigQuery) {
33882 // BigQuery -> BigQuery: just uppercase the unit
33883 let unit = Expression::Identifier(Identifier::new(unit_str.clone()));
33884 return Ok(Expression::Function(Box::new(Function::new(
33885 f.name,
33886 vec![date1, date2, unit],
33887 ))));
33888 }
33889
33890 // For Snowflake: use TimestampDiff expression so it generates TIMESTAMPDIFF
33891 // (Function("TIMESTAMPDIFF") would be converted to DATEDIFF by Snowflake's function normalization)
33892 if matches!(target, DialectType::Snowflake) {
33893 return Ok(Expression::TimestampDiff(Box::new(
33894 crate::expressions::TimestampDiff {
33895 this: Box::new(date2),
33896 expression: Box::new(date1),
33897 unit: Some(unit_str),
33898 },
33899 )));
33900 }
33901
33902 // For DuckDB: DATE_DIFF('UNIT', start, end) with proper CAST
33903 if matches!(target, DialectType::DuckDB) {
33904 let (cast_d1, cast_d2) = if name == "TIME_DIFF" {
33905 // CAST to TIME
33906 let cast_fn = |e: Expression| -> Expression {
33907 match e {
33908 Expression::Literal(lit)
33909 if matches!(lit.as_ref(), Literal::String(_)) =>
33910 {
33911 let Literal::String(s) = lit.as_ref() else {
33912 unreachable!()
33913 };
33914 Expression::Cast(Box::new(Cast {
33915 this: Expression::Literal(Box::new(Literal::String(
33916 s.clone(),
33917 ))),
33918 to: DataType::Custom {
33919 name: "TIME".to_string(),
33920 },
33921 trailing_comments: vec![],
33922 double_colon_syntax: false,
33923 format: None,
33924 default: None,
33925 inferred_type: None,
33926 }))
33927 }
33928 other => other,
33929 }
33930 };
33931 (cast_fn(date1), cast_fn(date2))
33932 } else if name == "DATETIME_DIFF" {
33933 // CAST to TIMESTAMP
33934 (
33935 Self::ensure_cast_timestamp(date1),
33936 Self::ensure_cast_timestamp(date2),
33937 )
33938 } else {
33939 // TIMESTAMP_DIFF: CAST to TIMESTAMPTZ
33940 (
33941 Self::ensure_cast_timestamptz(date1),
33942 Self::ensure_cast_timestamptz(date2),
33943 )
33944 };
33945 return Ok(Expression::Function(Box::new(Function::new(
33946 "DATE_DIFF".to_string(),
33947 vec![
33948 Expression::Literal(Box::new(Literal::String(unit_str))),
33949 cast_d2,
33950 cast_d1,
33951 ],
33952 ))));
33953 }
33954
33955 // Convert to standard TIMESTAMPDIFF(unit, start, end)
33956 let unit = Expression::Identifier(Identifier::new(unit_str));
33957 Ok(Expression::Function(Box::new(Function::new(
33958 "TIMESTAMPDIFF".to_string(),
33959 vec![unit, date2, date1],
33960 ))))
33961 }
33962
33963 // DATEDIFF(unit, start, end) -> target-specific form
33964 // Used by: Redshift, Snowflake, TSQL, Databricks, Spark
33965 "DATEDIFF" if args.len() == 3 => {
33966 let arg0 = args.remove(0);
33967 let arg1 = args.remove(0);
33968 let arg2 = args.remove(0);
33969 let unit_str = get_unit_str(&arg0);
33970
33971 // Redshift DATEDIFF(unit, start, end) order: result = end - start
33972 // Snowflake DATEDIFF(unit, start, end) order: result = end - start
33973 // TSQL DATEDIFF(unit, start, end) order: result = end - start
33974
33975 if matches!(target, DialectType::Snowflake) {
33976 // Snowflake: DATEDIFF(UNIT, start, end) - uppercase unit
33977 let unit = Expression::Identifier(Identifier::new(unit_str));
33978 return Ok(Expression::Function(Box::new(Function::new(
33979 "DATEDIFF".to_string(),
33980 vec![unit, arg1, arg2],
33981 ))));
33982 }
33983
33984 if matches!(target, DialectType::DuckDB) {
33985 // DuckDB: DATE_DIFF('UNIT', start, end) with CAST
33986 let cast_d1 = Self::ensure_cast_timestamp(arg1);
33987 let cast_d2 = Self::ensure_cast_timestamp(arg2);
33988 return Ok(Expression::Function(Box::new(Function::new(
33989 "DATE_DIFF".to_string(),
33990 vec![
33991 Expression::Literal(Box::new(Literal::String(unit_str))),
33992 cast_d1,
33993 cast_d2,
33994 ],
33995 ))));
33996 }
33997
33998 if matches!(target, DialectType::BigQuery) {
33999 // BigQuery: DATE_DIFF(end_date, start_date, UNIT) - reversed args, CAST to DATETIME
34000 let cast_d1 = Self::ensure_cast_datetime(arg1);
34001 let cast_d2 = Self::ensure_cast_datetime(arg2);
34002 let unit = Expression::Identifier(Identifier::new(unit_str));
34003 return Ok(Expression::Function(Box::new(Function::new(
34004 "DATE_DIFF".to_string(),
34005 vec![cast_d2, cast_d1, unit],
34006 ))));
34007 }
34008
34009 if matches!(target, DialectType::Spark | DialectType::Databricks) {
34010 // Spark/Databricks: DATEDIFF(UNIT, start, end) - uppercase unit
34011 let unit = Expression::Identifier(Identifier::new(unit_str));
34012 return Ok(Expression::Function(Box::new(Function::new(
34013 "DATEDIFF".to_string(),
34014 vec![unit, arg1, arg2],
34015 ))));
34016 }
34017
34018 if matches!(target, DialectType::Hive) {
34019 // Hive: DATEDIFF(end, start) for DAY only, use MONTHS_BETWEEN for MONTH
34020 match unit_str.as_str() {
34021 "MONTH" => {
34022 return Ok(Expression::Function(Box::new(Function::new(
34023 "CAST".to_string(),
34024 vec![Expression::Function(Box::new(Function::new(
34025 "MONTHS_BETWEEN".to_string(),
34026 vec![arg2, arg1],
34027 )))],
34028 ))));
34029 }
34030 "WEEK" => {
34031 return Ok(Expression::Cast(Box::new(Cast {
34032 this: Expression::Div(Box::new(crate::expressions::BinaryOp::new(
34033 Expression::Function(Box::new(Function::new(
34034 "DATEDIFF".to_string(),
34035 vec![arg2, arg1],
34036 ))),
34037 Expression::Literal(Box::new(Literal::Number("7".to_string()))),
34038 ))),
34039 to: DataType::Int {
34040 length: None,
34041 integer_spelling: false,
34042 },
34043 trailing_comments: vec![],
34044 double_colon_syntax: false,
34045 format: None,
34046 default: None,
34047 inferred_type: None,
34048 })));
34049 }
34050 _ => {
34051 // Default: DATEDIFF(end, start) for DAY
34052 return Ok(Expression::Function(Box::new(Function::new(
34053 "DATEDIFF".to_string(),
34054 vec![arg2, arg1],
34055 ))));
34056 }
34057 }
34058 }
34059
34060 if matches!(
34061 target,
34062 DialectType::Presto | DialectType::Trino | DialectType::Athena
34063 ) {
34064 // Presto/Trino: DATE_DIFF('UNIT', start, end)
34065 return Ok(Expression::Function(Box::new(Function::new(
34066 "DATE_DIFF".to_string(),
34067 vec![
34068 Expression::Literal(Box::new(Literal::String(unit_str))),
34069 arg1,
34070 arg2,
34071 ],
34072 ))));
34073 }
34074
34075 if matches!(target, DialectType::TSQL) {
34076 // TSQL: DATEDIFF(UNIT, start, CAST(end AS DATETIME2))
34077 let cast_d2 = Self::ensure_cast_datetime2(arg2);
34078 let unit = Expression::Identifier(Identifier::new(unit_str));
34079 return Ok(Expression::Function(Box::new(Function::new(
34080 "DATEDIFF".to_string(),
34081 vec![unit, arg1, cast_d2],
34082 ))));
34083 }
34084
34085 if matches!(target, DialectType::PostgreSQL) {
34086 // PostgreSQL doesn't have DATEDIFF - use date subtraction or EXTRACT
34087 // For now, use DATEDIFF (passthrough) with uppercased unit
34088 let unit = Expression::Identifier(Identifier::new(unit_str));
34089 return Ok(Expression::Function(Box::new(Function::new(
34090 "DATEDIFF".to_string(),
34091 vec![unit, arg1, arg2],
34092 ))));
34093 }
34094
34095 // Default: DATEDIFF(UNIT, start, end) with uppercase unit
34096 let unit = Expression::Identifier(Identifier::new(unit_str));
34097 Ok(Expression::Function(Box::new(Function::new(
34098 "DATEDIFF".to_string(),
34099 vec![unit, arg1, arg2],
34100 ))))
34101 }
34102
34103 // DATE_DIFF(date1, date2, unit) -> standard form
34104 "DATE_DIFF" if args.len() == 3 => {
34105 let date1 = args.remove(0);
34106 let date2 = args.remove(0);
34107 let unit_expr = args.remove(0);
34108 let unit_str = get_unit_str(&unit_expr);
34109
34110 if matches!(target, DialectType::BigQuery) {
34111 // BigQuery -> BigQuery: just uppercase the unit, normalize WEEK(SUNDAY) -> WEEK
34112 let norm_unit = if unit_str == "WEEK(SUNDAY)" {
34113 "WEEK".to_string()
34114 } else {
34115 unit_str
34116 };
34117 let norm_d1 = Self::date_literal_to_cast(date1);
34118 let norm_d2 = Self::date_literal_to_cast(date2);
34119 let unit = Expression::Identifier(Identifier::new(norm_unit));
34120 return Ok(Expression::Function(Box::new(Function::new(
34121 f.name,
34122 vec![norm_d1, norm_d2, unit],
34123 ))));
34124 }
34125
34126 if matches!(target, DialectType::MySQL) {
34127 // MySQL DATEDIFF only takes 2 args (date1, date2), returns day difference
34128 let norm_d1 = Self::date_literal_to_cast(date1);
34129 let norm_d2 = Self::date_literal_to_cast(date2);
34130 return Ok(Expression::Function(Box::new(Function::new(
34131 "DATEDIFF".to_string(),
34132 vec![norm_d1, norm_d2],
34133 ))));
34134 }
34135
34136 if matches!(target, DialectType::StarRocks) {
34137 // StarRocks: DATE_DIFF('UNIT', date1, date2) - unit as string, args NOT swapped
34138 let norm_d1 = Self::date_literal_to_cast(date1);
34139 let norm_d2 = Self::date_literal_to_cast(date2);
34140 return Ok(Expression::Function(Box::new(Function::new(
34141 "DATE_DIFF".to_string(),
34142 vec![
34143 Expression::Literal(Box::new(Literal::String(unit_str))),
34144 norm_d1,
34145 norm_d2,
34146 ],
34147 ))));
34148 }
34149
34150 if matches!(target, DialectType::DuckDB) {
34151 // DuckDB: DATE_DIFF('UNIT', date2, date1) with proper CAST for dates
34152 let norm_d1 = Self::ensure_cast_date(date1);
34153 let norm_d2 = Self::ensure_cast_date(date2);
34154
34155 // Handle WEEK variants: WEEK(MONDAY)/WEEK(SUNDAY)/ISOWEEK/WEEK
34156 let is_week_variant = unit_str == "WEEK"
34157 || unit_str.starts_with("WEEK(")
34158 || unit_str == "ISOWEEK";
34159 if is_week_variant {
34160 // For DuckDB, WEEK-based diffs use DATE_TRUNC approach
34161 // WEEK(MONDAY) / ISOWEEK: DATE_DIFF('WEEK', DATE_TRUNC('WEEK', d2), DATE_TRUNC('WEEK', d1))
34162 // WEEK / WEEK(SUNDAY): DATE_DIFF('WEEK', DATE_TRUNC('WEEK', d2 + INTERVAL '1' DAY), DATE_TRUNC('WEEK', d1 + INTERVAL '1' DAY))
34163 // WEEK(SATURDAY): DATE_DIFF('WEEK', DATE_TRUNC('WEEK', d2 + INTERVAL '-5' DAY), DATE_TRUNC('WEEK', d1 + INTERVAL '-5' DAY))
34164 let day_offset = if unit_str == "WEEK(MONDAY)" || unit_str == "ISOWEEK" {
34165 None // ISO weeks start on Monday, aligned with DATE_TRUNC('WEEK')
34166 } else if unit_str == "WEEK" || unit_str == "WEEK(SUNDAY)" {
34167 Some("1") // Shift Sunday to Monday alignment
34168 } else if unit_str == "WEEK(SATURDAY)" {
34169 Some("-5")
34170 } else if unit_str == "WEEK(TUESDAY)" {
34171 Some("-1")
34172 } else if unit_str == "WEEK(WEDNESDAY)" {
34173 Some("-2")
34174 } else if unit_str == "WEEK(THURSDAY)" {
34175 Some("-3")
34176 } else if unit_str == "WEEK(FRIDAY)" {
34177 Some("-4")
34178 } else {
34179 Some("1") // default to Sunday
34180 };
34181
34182 let make_trunc = |date: Expression, offset: Option<&str>| -> Expression {
34183 let shifted = if let Some(off) = offset {
34184 let interval =
34185 Expression::Interval(Box::new(crate::expressions::Interval {
34186 this: Some(Expression::Literal(Box::new(Literal::String(
34187 off.to_string(),
34188 )))),
34189 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
34190 unit: crate::expressions::IntervalUnit::Day,
34191 use_plural: false,
34192 }),
34193 }));
34194 Expression::Add(Box::new(crate::expressions::BinaryOp::new(
34195 date, interval,
34196 )))
34197 } else {
34198 date
34199 };
34200 Expression::Function(Box::new(Function::new(
34201 "DATE_TRUNC".to_string(),
34202 vec![
34203 Expression::Literal(Box::new(Literal::String(
34204 "WEEK".to_string(),
34205 ))),
34206 shifted,
34207 ],
34208 )))
34209 };
34210
34211 let trunc_d2 = make_trunc(norm_d2, day_offset);
34212 let trunc_d1 = make_trunc(norm_d1, day_offset);
34213 return Ok(Expression::Function(Box::new(Function::new(
34214 "DATE_DIFF".to_string(),
34215 vec![
34216 Expression::Literal(Box::new(Literal::String("WEEK".to_string()))),
34217 trunc_d2,
34218 trunc_d1,
34219 ],
34220 ))));
34221 }
34222
34223 return Ok(Expression::Function(Box::new(Function::new(
34224 "DATE_DIFF".to_string(),
34225 vec![
34226 Expression::Literal(Box::new(Literal::String(unit_str))),
34227 norm_d2,
34228 norm_d1,
34229 ],
34230 ))));
34231 }
34232
34233 // Default: DATEDIFF(unit, date2, date1)
34234 let unit = Expression::Identifier(Identifier::new(unit_str));
34235 Ok(Expression::Function(Box::new(Function::new(
34236 "DATEDIFF".to_string(),
34237 vec![unit, date2, date1],
34238 ))))
34239 }
34240
34241 // TIMESTAMP_ADD(ts, INTERVAL n UNIT) -> target-specific
34242 "TIMESTAMP_ADD" | "DATETIME_ADD" | "TIME_ADD" if args.len() == 2 => {
34243 let ts = args.remove(0);
34244 let interval_expr = args.remove(0);
34245 let (val, unit) =
34246 Self::extract_interval_parts(&interval_expr).unwrap_or_else(|| {
34247 (interval_expr.clone(), crate::expressions::IntervalUnit::Day)
34248 });
34249
34250 match target {
34251 DialectType::Snowflake => {
34252 // TIMESTAMPADD(UNIT, val, CAST(ts AS TIMESTAMPTZ))
34253 // Use TimestampAdd expression so Snowflake generates TIMESTAMPADD
34254 // (Function("TIMESTAMPADD") would be converted to DATEADD by Snowflake's function normalization)
34255 let unit_str = Self::interval_unit_to_string(&unit);
34256 let cast_ts = Self::maybe_cast_ts_to_tz(ts, &name);
34257 Ok(Expression::TimestampAdd(Box::new(
34258 crate::expressions::TimestampAdd {
34259 this: Box::new(val),
34260 expression: Box::new(cast_ts),
34261 unit: Some(unit_str.to_string()),
34262 },
34263 )))
34264 }
34265 DialectType::Spark | DialectType::Databricks => {
34266 if name == "DATETIME_ADD" && matches!(target, DialectType::Spark) {
34267 // Spark DATETIME_ADD: ts + INTERVAL val UNIT
34268 let interval =
34269 Expression::Interval(Box::new(crate::expressions::Interval {
34270 this: Some(val),
34271 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
34272 unit,
34273 use_plural: false,
34274 }),
34275 }));
34276 Ok(Expression::Add(Box::new(
34277 crate::expressions::BinaryOp::new(ts, interval),
34278 )))
34279 } else if name == "DATETIME_ADD"
34280 && matches!(target, DialectType::Databricks)
34281 {
34282 // Databricks DATETIME_ADD: TIMESTAMPADD(UNIT, val, ts)
34283 let unit_str = Self::interval_unit_to_string(&unit);
34284 Ok(Expression::Function(Box::new(Function::new(
34285 "TIMESTAMPADD".to_string(),
34286 vec![Expression::Identifier(Identifier::new(unit_str)), val, ts],
34287 ))))
34288 } else {
34289 // Presto-style: DATE_ADD('unit', val, CAST(ts AS TIMESTAMP))
34290 let unit_str = Self::interval_unit_to_string(&unit);
34291 let cast_ts =
34292 if name.starts_with("TIMESTAMP") || name.starts_with("DATETIME") {
34293 Self::maybe_cast_ts(ts)
34294 } else {
34295 ts
34296 };
34297 Ok(Expression::Function(Box::new(Function::new(
34298 "DATE_ADD".to_string(),
34299 vec![
34300 Expression::Identifier(Identifier::new(unit_str)),
34301 val,
34302 cast_ts,
34303 ],
34304 ))))
34305 }
34306 }
34307 DialectType::MySQL => {
34308 // DATE_ADD(TIMESTAMP(ts), INTERVAL val UNIT) for MySQL
34309 let mysql_ts = if name.starts_with("TIMESTAMP") {
34310 // Check if already wrapped in TIMESTAMP() function (from cross-dialect normalization)
34311 match &ts {
34312 Expression::Function(ref inner_f)
34313 if inner_f.name.eq_ignore_ascii_case("TIMESTAMP") =>
34314 {
34315 // Already wrapped, keep as-is
34316 ts
34317 }
34318 _ => {
34319 // Unwrap typed literals: TIMESTAMP '...' -> '...' for TIMESTAMP() wrapper
34320 let unwrapped = match ts {
34321 Expression::Literal(lit)
34322 if matches!(lit.as_ref(), Literal::Timestamp(_)) =>
34323 {
34324 let Literal::Timestamp(s) = lit.as_ref() else {
34325 unreachable!()
34326 };
34327 Expression::Literal(Box::new(Literal::String(
34328 s.clone(),
34329 )))
34330 }
34331 other => other,
34332 };
34333 Expression::Function(Box::new(Function::new(
34334 "TIMESTAMP".to_string(),
34335 vec![unwrapped],
34336 )))
34337 }
34338 }
34339 } else {
34340 ts
34341 };
34342 Ok(Expression::DateAdd(Box::new(
34343 crate::expressions::DateAddFunc {
34344 this: mysql_ts,
34345 interval: val,
34346 unit,
34347 },
34348 )))
34349 }
34350 _ => {
34351 // DuckDB and others use DateAdd expression (DuckDB converts to + INTERVAL)
34352 let cast_ts = if matches!(target, DialectType::DuckDB) {
34353 if name == "DATETIME_ADD" {
34354 Self::ensure_cast_timestamp(ts)
34355 } else if name.starts_with("TIMESTAMP") {
34356 Self::maybe_cast_ts_to_tz(ts, &name)
34357 } else {
34358 ts
34359 }
34360 } else {
34361 ts
34362 };
34363 Ok(Expression::DateAdd(Box::new(
34364 crate::expressions::DateAddFunc {
34365 this: cast_ts,
34366 interval: val,
34367 unit,
34368 },
34369 )))
34370 }
34371 }
34372 }
34373
34374 // TIMESTAMP_SUB(ts, INTERVAL n UNIT) -> target-specific
34375 "TIMESTAMP_SUB" | "DATETIME_SUB" | "TIME_SUB" if args.len() == 2 => {
34376 let ts = args.remove(0);
34377 let interval_expr = args.remove(0);
34378 let (val, unit) =
34379 Self::extract_interval_parts(&interval_expr).unwrap_or_else(|| {
34380 (interval_expr.clone(), crate::expressions::IntervalUnit::Day)
34381 });
34382
34383 match target {
34384 DialectType::Snowflake => {
34385 // TIMESTAMPADD(UNIT, val * -1, CAST(ts AS TIMESTAMPTZ))
34386 let unit_str = Self::interval_unit_to_string(&unit);
34387 let cast_ts = Self::maybe_cast_ts_to_tz(ts, &name);
34388 let neg_val = Expression::Mul(Box::new(crate::expressions::BinaryOp::new(
34389 val,
34390 Expression::Neg(Box::new(crate::expressions::UnaryOp {
34391 this: Expression::number(1),
34392 inferred_type: None,
34393 })),
34394 )));
34395 Ok(Expression::TimestampAdd(Box::new(
34396 crate::expressions::TimestampAdd {
34397 this: Box::new(neg_val),
34398 expression: Box::new(cast_ts),
34399 unit: Some(unit_str.to_string()),
34400 },
34401 )))
34402 }
34403 DialectType::Spark | DialectType::Databricks => {
34404 if (name == "DATETIME_SUB" && matches!(target, DialectType::Spark))
34405 || (name == "TIMESTAMP_SUB" && matches!(target, DialectType::Spark))
34406 {
34407 // Spark: ts - INTERVAL val UNIT
34408 let cast_ts = if name.starts_with("TIMESTAMP") {
34409 Self::maybe_cast_ts(ts)
34410 } else {
34411 ts
34412 };
34413 let interval =
34414 Expression::Interval(Box::new(crate::expressions::Interval {
34415 this: Some(val),
34416 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
34417 unit,
34418 use_plural: false,
34419 }),
34420 }));
34421 Ok(Expression::Sub(Box::new(
34422 crate::expressions::BinaryOp::new(cast_ts, interval),
34423 )))
34424 } else {
34425 // Databricks: TIMESTAMPADD(UNIT, val * -1, ts)
34426 let unit_str = Self::interval_unit_to_string(&unit);
34427 let neg_val =
34428 Expression::Mul(Box::new(crate::expressions::BinaryOp::new(
34429 val,
34430 Expression::Neg(Box::new(crate::expressions::UnaryOp {
34431 this: Expression::number(1),
34432 inferred_type: None,
34433 })),
34434 )));
34435 Ok(Expression::Function(Box::new(Function::new(
34436 "TIMESTAMPADD".to_string(),
34437 vec![
34438 Expression::Identifier(Identifier::new(unit_str)),
34439 neg_val,
34440 ts,
34441 ],
34442 ))))
34443 }
34444 }
34445 DialectType::MySQL => {
34446 let mysql_ts = if name.starts_with("TIMESTAMP") {
34447 // Check if already wrapped in TIMESTAMP() function (from cross-dialect normalization)
34448 match &ts {
34449 Expression::Function(ref inner_f)
34450 if inner_f.name.eq_ignore_ascii_case("TIMESTAMP") =>
34451 {
34452 // Already wrapped, keep as-is
34453 ts
34454 }
34455 _ => {
34456 let unwrapped = match ts {
34457 Expression::Literal(lit)
34458 if matches!(lit.as_ref(), Literal::Timestamp(_)) =>
34459 {
34460 let Literal::Timestamp(s) = lit.as_ref() else {
34461 unreachable!()
34462 };
34463 Expression::Literal(Box::new(Literal::String(
34464 s.clone(),
34465 )))
34466 }
34467 other => other,
34468 };
34469 Expression::Function(Box::new(Function::new(
34470 "TIMESTAMP".to_string(),
34471 vec![unwrapped],
34472 )))
34473 }
34474 }
34475 } else {
34476 ts
34477 };
34478 Ok(Expression::DateSub(Box::new(
34479 crate::expressions::DateAddFunc {
34480 this: mysql_ts,
34481 interval: val,
34482 unit,
34483 },
34484 )))
34485 }
34486 _ => {
34487 let cast_ts = if matches!(target, DialectType::DuckDB) {
34488 if name == "DATETIME_SUB" {
34489 Self::ensure_cast_timestamp(ts)
34490 } else if name.starts_with("TIMESTAMP") {
34491 Self::maybe_cast_ts_to_tz(ts, &name)
34492 } else {
34493 ts
34494 }
34495 } else {
34496 ts
34497 };
34498 Ok(Expression::DateSub(Box::new(
34499 crate::expressions::DateAddFunc {
34500 this: cast_ts,
34501 interval: val,
34502 unit,
34503 },
34504 )))
34505 }
34506 }
34507 }
34508
34509 // DATE_SUB(date, INTERVAL n UNIT) -> target-specific
34510 "DATE_SUB" if args.len() == 2 => {
34511 let date = args.remove(0);
34512 let interval_expr = args.remove(0);
34513 let (val, unit) =
34514 Self::extract_interval_parts(&interval_expr).unwrap_or_else(|| {
34515 (interval_expr.clone(), crate::expressions::IntervalUnit::Day)
34516 });
34517
34518 match target {
34519 DialectType::Databricks | DialectType::Spark => {
34520 // Databricks/Spark: DATE_ADD(date, -val)
34521 // Use DateAdd expression with negative val so it generates correctly
34522 // The generator will output DATE_ADD(date, INTERVAL -val DAY)
34523 // Then Databricks transform converts 2-arg DATE_ADD(date, interval) to DATEADD(DAY, interval, date)
34524 // Instead, we directly output as a simple negated DateSub
34525 Ok(Expression::DateSub(Box::new(
34526 crate::expressions::DateAddFunc {
34527 this: date,
34528 interval: val,
34529 unit,
34530 },
34531 )))
34532 }
34533 DialectType::DuckDB => {
34534 // DuckDB: CAST(date AS DATE) - INTERVAL 'val' UNIT
34535 let cast_date = Self::ensure_cast_date(date);
34536 let interval =
34537 Expression::Interval(Box::new(crate::expressions::Interval {
34538 this: Some(val),
34539 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
34540 unit,
34541 use_plural: false,
34542 }),
34543 }));
34544 Ok(Expression::Sub(Box::new(
34545 crate::expressions::BinaryOp::new(cast_date, interval),
34546 )))
34547 }
34548 DialectType::Snowflake => {
34549 // Snowflake: Let Snowflake's own DateSub -> DATEADD(UNIT, val * -1, date) handler work
34550 // Just ensure the date is cast properly
34551 let cast_date = Self::ensure_cast_date(date);
34552 Ok(Expression::DateSub(Box::new(
34553 crate::expressions::DateAddFunc {
34554 this: cast_date,
34555 interval: val,
34556 unit,
34557 },
34558 )))
34559 }
34560 DialectType::PostgreSQL => {
34561 // PostgreSQL: date - INTERVAL 'val UNIT'
34562 let unit_str = Self::interval_unit_to_string(&unit);
34563 let interval =
34564 Expression::Interval(Box::new(crate::expressions::Interval {
34565 this: Some(Expression::Literal(Box::new(Literal::String(
34566 format!("{} {}", Self::expr_to_string(&val), unit_str),
34567 )))),
34568 unit: None,
34569 }));
34570 Ok(Expression::Sub(Box::new(
34571 crate::expressions::BinaryOp::new(date, interval),
34572 )))
34573 }
34574 _ => Ok(Expression::DateSub(Box::new(
34575 crate::expressions::DateAddFunc {
34576 this: date,
34577 interval: val,
34578 unit,
34579 },
34580 ))),
34581 }
34582 }
34583
34584 // DATEADD(unit, val, date) -> target-specific form
34585 // Used by: Redshift, Snowflake, TSQL, ClickHouse
34586 "DATEADD" if args.len() == 3 => {
34587 let arg0 = args.remove(0);
34588 let arg1 = args.remove(0);
34589 let arg2 = args.remove(0);
34590 let unit_str = get_unit_str(&arg0);
34591
34592 if matches!(target, DialectType::Snowflake | DialectType::TSQL) {
34593 // Keep DATEADD(UNIT, val, date) with uppercased unit
34594 let unit = Expression::Identifier(Identifier::new(unit_str));
34595 // Only CAST to DATETIME2 for TSQL target when source is NOT Spark/Databricks family
34596 let date = if matches!(target, DialectType::TSQL)
34597 && !matches!(
34598 source,
34599 DialectType::Spark | DialectType::Databricks | DialectType::Hive
34600 ) {
34601 Self::ensure_cast_datetime2(arg2)
34602 } else {
34603 arg2
34604 };
34605 return Ok(Expression::Function(Box::new(Function::new(
34606 "DATEADD".to_string(),
34607 vec![unit, arg1, date],
34608 ))));
34609 }
34610
34611 if matches!(target, DialectType::DuckDB) {
34612 // DuckDB: date + INTERVAL 'val' UNIT
34613 let iu = parse_interval_unit(&unit_str);
34614 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
34615 this: Some(arg1),
34616 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
34617 unit: iu,
34618 use_plural: false,
34619 }),
34620 }));
34621 let cast_date = Self::ensure_cast_timestamp(arg2);
34622 return Ok(Expression::Add(Box::new(
34623 crate::expressions::BinaryOp::new(cast_date, interval),
34624 )));
34625 }
34626
34627 if matches!(target, DialectType::BigQuery) {
34628 // BigQuery: DATE_ADD(date, INTERVAL val UNIT) or TIMESTAMP_ADD(ts, INTERVAL val UNIT)
34629 let iu = parse_interval_unit(&unit_str);
34630 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
34631 this: Some(arg1),
34632 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
34633 unit: iu,
34634 use_plural: false,
34635 }),
34636 }));
34637 return Ok(Expression::Function(Box::new(Function::new(
34638 "DATE_ADD".to_string(),
34639 vec![arg2, interval],
34640 ))));
34641 }
34642
34643 if matches!(target, DialectType::Databricks) {
34644 // Databricks: keep DATEADD(UNIT, val, date) format
34645 let unit = Expression::Identifier(Identifier::new(unit_str));
34646 return Ok(Expression::Function(Box::new(Function::new(
34647 "DATEADD".to_string(),
34648 vec![unit, arg1, arg2],
34649 ))));
34650 }
34651
34652 if matches!(target, DialectType::Spark) {
34653 // Spark: convert month-based units to ADD_MONTHS, rest to DATE_ADD
34654 fn multiply_expr_dateadd(expr: Expression, factor: i64) -> Expression {
34655 if let Expression::Literal(lit) = &expr {
34656 if let crate::expressions::Literal::Number(n) = lit.as_ref() {
34657 if let Ok(val) = n.parse::<i64>() {
34658 return Expression::Literal(Box::new(
34659 crate::expressions::Literal::Number(
34660 (val * factor).to_string(),
34661 ),
34662 ));
34663 }
34664 }
34665 }
34666 Expression::Mul(Box::new(crate::expressions::BinaryOp::new(
34667 expr,
34668 Expression::Literal(Box::new(crate::expressions::Literal::Number(
34669 factor.to_string(),
34670 ))),
34671 )))
34672 }
34673 match unit_str.as_str() {
34674 "YEAR" => {
34675 let months = multiply_expr_dateadd(arg1, 12);
34676 return Ok(Expression::Function(Box::new(Function::new(
34677 "ADD_MONTHS".to_string(),
34678 vec![arg2, months],
34679 ))));
34680 }
34681 "QUARTER" => {
34682 let months = multiply_expr_dateadd(arg1, 3);
34683 return Ok(Expression::Function(Box::new(Function::new(
34684 "ADD_MONTHS".to_string(),
34685 vec![arg2, months],
34686 ))));
34687 }
34688 "MONTH" => {
34689 return Ok(Expression::Function(Box::new(Function::new(
34690 "ADD_MONTHS".to_string(),
34691 vec![arg2, arg1],
34692 ))));
34693 }
34694 "WEEK" => {
34695 let days = multiply_expr_dateadd(arg1, 7);
34696 return Ok(Expression::Function(Box::new(Function::new(
34697 "DATE_ADD".to_string(),
34698 vec![arg2, days],
34699 ))));
34700 }
34701 "DAY" => {
34702 return Ok(Expression::Function(Box::new(Function::new(
34703 "DATE_ADD".to_string(),
34704 vec![arg2, arg1],
34705 ))));
34706 }
34707 _ => {
34708 let unit = Expression::Identifier(Identifier::new(unit_str));
34709 return Ok(Expression::Function(Box::new(Function::new(
34710 "DATE_ADD".to_string(),
34711 vec![unit, arg1, arg2],
34712 ))));
34713 }
34714 }
34715 }
34716
34717 if matches!(target, DialectType::Hive) {
34718 // Hive: DATE_ADD(date, val) for DAY, or date + INTERVAL for others
34719 match unit_str.as_str() {
34720 "DAY" => {
34721 return Ok(Expression::Function(Box::new(Function::new(
34722 "DATE_ADD".to_string(),
34723 vec![arg2, arg1],
34724 ))));
34725 }
34726 "MONTH" => {
34727 return Ok(Expression::Function(Box::new(Function::new(
34728 "ADD_MONTHS".to_string(),
34729 vec![arg2, arg1],
34730 ))));
34731 }
34732 _ => {
34733 let iu = parse_interval_unit(&unit_str);
34734 let interval =
34735 Expression::Interval(Box::new(crate::expressions::Interval {
34736 this: Some(arg1),
34737 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
34738 unit: iu,
34739 use_plural: false,
34740 }),
34741 }));
34742 return Ok(Expression::Add(Box::new(
34743 crate::expressions::BinaryOp::new(arg2, interval),
34744 )));
34745 }
34746 }
34747 }
34748
34749 if matches!(target, DialectType::PostgreSQL) {
34750 // PostgreSQL: date + INTERVAL 'val UNIT'
34751 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
34752 this: Some(Expression::Literal(Box::new(Literal::String(format!(
34753 "{} {}",
34754 Self::expr_to_string(&arg1),
34755 unit_str
34756 ))))),
34757 unit: None,
34758 }));
34759 return Ok(Expression::Add(Box::new(
34760 crate::expressions::BinaryOp::new(arg2, interval),
34761 )));
34762 }
34763
34764 if matches!(
34765 target,
34766 DialectType::Presto | DialectType::Trino | DialectType::Athena
34767 ) {
34768 // Presto/Trino: DATE_ADD('UNIT', val, date)
34769 return Ok(Expression::Function(Box::new(Function::new(
34770 "DATE_ADD".to_string(),
34771 vec![
34772 Expression::Literal(Box::new(Literal::String(unit_str))),
34773 arg1,
34774 arg2,
34775 ],
34776 ))));
34777 }
34778
34779 if matches!(target, DialectType::ClickHouse) {
34780 // ClickHouse: DATE_ADD(UNIT, val, date)
34781 let unit = Expression::Identifier(Identifier::new(unit_str));
34782 return Ok(Expression::Function(Box::new(Function::new(
34783 "DATE_ADD".to_string(),
34784 vec![unit, arg1, arg2],
34785 ))));
34786 }
34787
34788 // Default: keep DATEADD with uppercased unit
34789 let unit = Expression::Identifier(Identifier::new(unit_str));
34790 Ok(Expression::Function(Box::new(Function::new(
34791 "DATEADD".to_string(),
34792 vec![unit, arg1, arg2],
34793 ))))
34794 }
34795
34796 // DATE_ADD(unit, val, date) - 3 arg form from ClickHouse/Presto
34797 "DATE_ADD" if args.len() == 3 => {
34798 let arg0 = args.remove(0);
34799 let arg1 = args.remove(0);
34800 let arg2 = args.remove(0);
34801 let unit_str = get_unit_str(&arg0);
34802
34803 if matches!(
34804 target,
34805 DialectType::Presto | DialectType::Trino | DialectType::Athena
34806 ) {
34807 // Presto/Trino: DATE_ADD('UNIT', val, date)
34808 return Ok(Expression::Function(Box::new(Function::new(
34809 "DATE_ADD".to_string(),
34810 vec![
34811 Expression::Literal(Box::new(Literal::String(unit_str))),
34812 arg1,
34813 arg2,
34814 ],
34815 ))));
34816 }
34817
34818 if matches!(
34819 target,
34820 DialectType::Snowflake | DialectType::TSQL | DialectType::Redshift
34821 ) {
34822 // DATEADD(UNIT, val, date)
34823 let unit = Expression::Identifier(Identifier::new(unit_str));
34824 let date = if matches!(target, DialectType::TSQL) {
34825 Self::ensure_cast_datetime2(arg2)
34826 } else {
34827 arg2
34828 };
34829 return Ok(Expression::Function(Box::new(Function::new(
34830 "DATEADD".to_string(),
34831 vec![unit, arg1, date],
34832 ))));
34833 }
34834
34835 if matches!(target, DialectType::DuckDB) {
34836 // DuckDB: date + INTERVAL val UNIT
34837 let iu = parse_interval_unit(&unit_str);
34838 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
34839 this: Some(arg1),
34840 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
34841 unit: iu,
34842 use_plural: false,
34843 }),
34844 }));
34845 return Ok(Expression::Add(Box::new(
34846 crate::expressions::BinaryOp::new(arg2, interval),
34847 )));
34848 }
34849
34850 if matches!(target, DialectType::Spark | DialectType::Databricks) {
34851 // Spark: DATE_ADD(UNIT, val, date) with uppercased unit
34852 let unit = Expression::Identifier(Identifier::new(unit_str));
34853 return Ok(Expression::Function(Box::new(Function::new(
34854 "DATE_ADD".to_string(),
34855 vec![unit, arg1, arg2],
34856 ))));
34857 }
34858
34859 // Default: DATE_ADD(UNIT, val, date)
34860 let unit = Expression::Identifier(Identifier::new(unit_str));
34861 Ok(Expression::Function(Box::new(Function::new(
34862 "DATE_ADD".to_string(),
34863 vec![unit, arg1, arg2],
34864 ))))
34865 }
34866
34867 // DATE_ADD(date, INTERVAL val UNIT) - 2 arg BigQuery form
34868 "DATE_ADD" if args.len() == 2 => {
34869 let date = args.remove(0);
34870 let interval_expr = args.remove(0);
34871 let (val, unit) =
34872 Self::extract_interval_parts(&interval_expr).unwrap_or_else(|| {
34873 (interval_expr.clone(), crate::expressions::IntervalUnit::Day)
34874 });
34875 let unit_str = Self::interval_unit_to_string(&unit);
34876
34877 match target {
34878 DialectType::DuckDB => {
34879 // DuckDB: CAST(date AS DATE) + INTERVAL 'val' UNIT
34880 let cast_date = Self::ensure_cast_date(date);
34881 let quoted_val = Self::quote_interval_val(&val);
34882 let interval =
34883 Expression::Interval(Box::new(crate::expressions::Interval {
34884 this: Some(quoted_val),
34885 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
34886 unit,
34887 use_plural: false,
34888 }),
34889 }));
34890 Ok(Expression::Add(Box::new(
34891 crate::expressions::BinaryOp::new(cast_date, interval),
34892 )))
34893 }
34894 DialectType::PostgreSQL => {
34895 // PostgreSQL: date + INTERVAL 'val UNIT'
34896 let interval =
34897 Expression::Interval(Box::new(crate::expressions::Interval {
34898 this: Some(Expression::Literal(Box::new(Literal::String(
34899 format!("{} {}", Self::expr_to_string(&val), unit_str),
34900 )))),
34901 unit: None,
34902 }));
34903 Ok(Expression::Add(Box::new(
34904 crate::expressions::BinaryOp::new(date, interval),
34905 )))
34906 }
34907 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
34908 // Presto: DATE_ADD('UNIT', CAST('val' AS BIGINT), date)
34909 let val_str = Self::expr_to_string(&val);
34910 Ok(Expression::Function(Box::new(Function::new(
34911 "DATE_ADD".to_string(),
34912 vec![
34913 Expression::Literal(Box::new(Literal::String(
34914 unit_str.to_string(),
34915 ))),
34916 Expression::Cast(Box::new(Cast {
34917 this: Expression::Literal(Box::new(Literal::String(val_str))),
34918 to: DataType::BigInt { length: None },
34919 trailing_comments: vec![],
34920 double_colon_syntax: false,
34921 format: None,
34922 default: None,
34923 inferred_type: None,
34924 })),
34925 date,
34926 ],
34927 ))))
34928 }
34929 DialectType::Spark | DialectType::Hive => {
34930 // Spark/Hive: DATE_ADD(date, val) for DAY
34931 match unit_str {
34932 "DAY" => Ok(Expression::Function(Box::new(Function::new(
34933 "DATE_ADD".to_string(),
34934 vec![date, val],
34935 )))),
34936 "MONTH" => Ok(Expression::Function(Box::new(Function::new(
34937 "ADD_MONTHS".to_string(),
34938 vec![date, val],
34939 )))),
34940 _ => {
34941 let iu = parse_interval_unit(&unit_str);
34942 let interval =
34943 Expression::Interval(Box::new(crate::expressions::Interval {
34944 this: Some(val),
34945 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
34946 unit: iu,
34947 use_plural: false,
34948 }),
34949 }));
34950 Ok(Expression::Function(Box::new(Function::new(
34951 "DATE_ADD".to_string(),
34952 vec![date, interval],
34953 ))))
34954 }
34955 }
34956 }
34957 DialectType::Snowflake => {
34958 // Snowflake: DATEADD(UNIT, 'val', CAST(date AS DATE))
34959 let cast_date = Self::ensure_cast_date(date);
34960 let val_str = Self::expr_to_string(&val);
34961 Ok(Expression::Function(Box::new(Function::new(
34962 "DATEADD".to_string(),
34963 vec![
34964 Expression::Identifier(Identifier::new(unit_str)),
34965 Expression::Literal(Box::new(Literal::String(val_str))),
34966 cast_date,
34967 ],
34968 ))))
34969 }
34970 DialectType::TSQL | DialectType::Fabric => {
34971 let cast_date = Self::ensure_cast_datetime2(date);
34972 Ok(Expression::Function(Box::new(Function::new(
34973 "DATEADD".to_string(),
34974 vec![
34975 Expression::Identifier(Identifier::new(unit_str)),
34976 val,
34977 cast_date,
34978 ],
34979 ))))
34980 }
34981 DialectType::Redshift => Ok(Expression::Function(Box::new(Function::new(
34982 "DATEADD".to_string(),
34983 vec![Expression::Identifier(Identifier::new(unit_str)), val, date],
34984 )))),
34985 DialectType::MySQL => {
34986 // MySQL: DATE_ADD(date, INTERVAL 'val' UNIT)
34987 let quoted_val = Self::quote_interval_val(&val);
34988 let iu = parse_interval_unit(&unit_str);
34989 let interval =
34990 Expression::Interval(Box::new(crate::expressions::Interval {
34991 this: Some(quoted_val),
34992 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
34993 unit: iu,
34994 use_plural: false,
34995 }),
34996 }));
34997 Ok(Expression::Function(Box::new(Function::new(
34998 "DATE_ADD".to_string(),
34999 vec![date, interval],
35000 ))))
35001 }
35002 DialectType::BigQuery => {
35003 // BigQuery: DATE_ADD(date, INTERVAL 'val' UNIT)
35004 let quoted_val = Self::quote_interval_val(&val);
35005 let iu = parse_interval_unit(&unit_str);
35006 let interval =
35007 Expression::Interval(Box::new(crate::expressions::Interval {
35008 this: Some(quoted_val),
35009 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
35010 unit: iu,
35011 use_plural: false,
35012 }),
35013 }));
35014 Ok(Expression::Function(Box::new(Function::new(
35015 "DATE_ADD".to_string(),
35016 vec![date, interval],
35017 ))))
35018 }
35019 DialectType::Databricks => Ok(Expression::Function(Box::new(Function::new(
35020 "DATEADD".to_string(),
35021 vec![Expression::Identifier(Identifier::new(unit_str)), val, date],
35022 )))),
35023 _ => {
35024 // Default: keep as DATE_ADD with decomposed interval
35025 Ok(Expression::DateAdd(Box::new(
35026 crate::expressions::DateAddFunc {
35027 this: date,
35028 interval: val,
35029 unit,
35030 },
35031 )))
35032 }
35033 }
35034 }
35035
35036 // ADD_MONTHS(date, val) -> target-specific form
35037 "ADD_MONTHS" if args.len() == 2 => {
35038 let date = args.remove(0);
35039 let val = args.remove(0);
35040
35041 if matches!(target, DialectType::TSQL) {
35042 // TSQL: DATEADD(MONTH, val, CAST(date AS DATETIME2))
35043 let cast_date = Self::ensure_cast_datetime2(date);
35044 return Ok(Expression::Function(Box::new(Function::new(
35045 "DATEADD".to_string(),
35046 vec![
35047 Expression::Identifier(Identifier::new("MONTH")),
35048 val,
35049 cast_date,
35050 ],
35051 ))));
35052 }
35053
35054 if matches!(target, DialectType::DuckDB) {
35055 // DuckDB: date + INTERVAL val MONTH
35056 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
35057 this: Some(val),
35058 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
35059 unit: crate::expressions::IntervalUnit::Month,
35060 use_plural: false,
35061 }),
35062 }));
35063 return Ok(Expression::Add(Box::new(
35064 crate::expressions::BinaryOp::new(date, interval),
35065 )));
35066 }
35067
35068 if matches!(target, DialectType::Snowflake) {
35069 // Snowflake: keep ADD_MONTHS when source is also Snowflake, else DATEADD
35070 if matches!(source, DialectType::Snowflake) {
35071 return Ok(Expression::Function(Box::new(Function::new(
35072 "ADD_MONTHS".to_string(),
35073 vec![date, val],
35074 ))));
35075 }
35076 return Ok(Expression::Function(Box::new(Function::new(
35077 "DATEADD".to_string(),
35078 vec![Expression::Identifier(Identifier::new("MONTH")), val, date],
35079 ))));
35080 }
35081
35082 if matches!(target, DialectType::Spark | DialectType::Databricks) {
35083 // Spark: ADD_MONTHS(date, val) - keep as is
35084 return Ok(Expression::Function(Box::new(Function::new(
35085 "ADD_MONTHS".to_string(),
35086 vec![date, val],
35087 ))));
35088 }
35089
35090 if matches!(target, DialectType::Hive) {
35091 return Ok(Expression::Function(Box::new(Function::new(
35092 "ADD_MONTHS".to_string(),
35093 vec![date, val],
35094 ))));
35095 }
35096
35097 if matches!(
35098 target,
35099 DialectType::Presto | DialectType::Trino | DialectType::Athena
35100 ) {
35101 // Presto: DATE_ADD('MONTH', val, date)
35102 return Ok(Expression::Function(Box::new(Function::new(
35103 "DATE_ADD".to_string(),
35104 vec![
35105 Expression::Literal(Box::new(Literal::String("MONTH".to_string()))),
35106 val,
35107 date,
35108 ],
35109 ))));
35110 }
35111
35112 // Default: keep ADD_MONTHS
35113 Ok(Expression::Function(Box::new(Function::new(
35114 "ADD_MONTHS".to_string(),
35115 vec![date, val],
35116 ))))
35117 }
35118
35119 // SAFE_DIVIDE(x, y) -> target-specific form directly
35120 "SAFE_DIVIDE" if args.len() == 2 => {
35121 let x = args.remove(0);
35122 let y = args.remove(0);
35123 // Wrap x and y in parens if they're complex expressions
35124 let y_ref = match &y {
35125 Expression::Column(_) | Expression::Literal(_) | Expression::Identifier(_) => {
35126 y.clone()
35127 }
35128 _ => Expression::Paren(Box::new(Paren {
35129 this: y.clone(),
35130 trailing_comments: vec![],
35131 })),
35132 };
35133 let x_ref = match &x {
35134 Expression::Column(_) | Expression::Literal(_) | Expression::Identifier(_) => {
35135 x.clone()
35136 }
35137 _ => Expression::Paren(Box::new(Paren {
35138 this: x.clone(),
35139 trailing_comments: vec![],
35140 })),
35141 };
35142 let condition = Expression::Neq(Box::new(crate::expressions::BinaryOp::new(
35143 y_ref.clone(),
35144 Expression::number(0),
35145 )));
35146 let div_expr = Expression::Div(Box::new(crate::expressions::BinaryOp::new(
35147 x_ref.clone(),
35148 y_ref.clone(),
35149 )));
35150
35151 match target {
35152 DialectType::Spark | DialectType::Databricks => Ok(Expression::Function(
35153 Box::new(Function::new("TRY_DIVIDE".to_string(), vec![x, y])),
35154 )),
35155 DialectType::DuckDB | DialectType::PostgreSQL => {
35156 // CASE WHEN y <> 0 THEN x / y ELSE NULL END
35157 let result_div = if matches!(target, DialectType::PostgreSQL) {
35158 let cast_x = Expression::Cast(Box::new(Cast {
35159 this: x_ref,
35160 to: DataType::Custom {
35161 name: "DOUBLE PRECISION".to_string(),
35162 },
35163 trailing_comments: vec![],
35164 double_colon_syntax: false,
35165 format: None,
35166 default: None,
35167 inferred_type: None,
35168 }));
35169 Expression::Div(Box::new(crate::expressions::BinaryOp::new(
35170 cast_x, y_ref,
35171 )))
35172 } else {
35173 div_expr
35174 };
35175 Ok(Expression::Case(Box::new(crate::expressions::Case {
35176 operand: None,
35177 whens: vec![(condition, result_div)],
35178 else_: Some(Expression::Null(crate::expressions::Null)),
35179 comments: Vec::new(),
35180 inferred_type: None,
35181 })))
35182 }
35183 DialectType::Snowflake => {
35184 // IFF(y <> 0, x / y, NULL)
35185 Ok(Expression::IfFunc(Box::new(crate::expressions::IfFunc {
35186 condition,
35187 true_value: div_expr,
35188 false_value: Some(Expression::Null(crate::expressions::Null)),
35189 original_name: Some("IFF".to_string()),
35190 inferred_type: None,
35191 })))
35192 }
35193 DialectType::Presto | DialectType::Trino => {
35194 // IF(y <> 0, CAST(x AS DOUBLE) / y, NULL)
35195 let cast_x = Expression::Cast(Box::new(Cast {
35196 this: x_ref,
35197 to: DataType::Double {
35198 precision: None,
35199 scale: None,
35200 },
35201 trailing_comments: vec![],
35202 double_colon_syntax: false,
35203 format: None,
35204 default: None,
35205 inferred_type: None,
35206 }));
35207 let cast_div = Expression::Div(Box::new(
35208 crate::expressions::BinaryOp::new(cast_x, y_ref),
35209 ));
35210 Ok(Expression::IfFunc(Box::new(crate::expressions::IfFunc {
35211 condition,
35212 true_value: cast_div,
35213 false_value: Some(Expression::Null(crate::expressions::Null)),
35214 original_name: None,
35215 inferred_type: None,
35216 })))
35217 }
35218 _ => {
35219 // IF(y <> 0, x / y, NULL)
35220 Ok(Expression::IfFunc(Box::new(crate::expressions::IfFunc {
35221 condition,
35222 true_value: div_expr,
35223 false_value: Some(Expression::Null(crate::expressions::Null)),
35224 original_name: None,
35225 inferred_type: None,
35226 })))
35227 }
35228 }
35229 }
35230
35231 // GENERATE_UUID() -> UUID() with CAST to string
35232 "GENERATE_UUID" => {
35233 let uuid_expr = Expression::Uuid(Box::new(crate::expressions::Uuid {
35234 this: None,
35235 name: None,
35236 is_string: None,
35237 }));
35238 // Most targets need CAST(UUID() AS TEXT/VARCHAR/STRING)
35239 let cast_type = match target {
35240 DialectType::DuckDB => Some(DataType::Text),
35241 DialectType::Presto | DialectType::Trino => Some(DataType::VarChar {
35242 length: None,
35243 parenthesized_length: false,
35244 }),
35245 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
35246 Some(DataType::String { length: None })
35247 }
35248 _ => None,
35249 };
35250 if let Some(dt) = cast_type {
35251 Ok(Expression::Cast(Box::new(Cast {
35252 this: uuid_expr,
35253 to: dt,
35254 trailing_comments: vec![],
35255 double_colon_syntax: false,
35256 format: None,
35257 default: None,
35258 inferred_type: None,
35259 })))
35260 } else {
35261 Ok(uuid_expr)
35262 }
35263 }
35264
35265 // COUNTIF(x) -> CountIf expression
35266 "COUNTIF" if args.len() == 1 => {
35267 let arg = args.remove(0);
35268 Ok(Expression::CountIf(Box::new(crate::expressions::AggFunc {
35269 this: arg,
35270 distinct: false,
35271 filter: None,
35272 order_by: vec![],
35273 name: None,
35274 ignore_nulls: None,
35275 having_max: None,
35276 limit: None,
35277 inferred_type: None,
35278 })))
35279 }
35280
35281 // EDIT_DISTANCE(col1, col2, ...) -> Levenshtein expression
35282 "EDIT_DISTANCE" => {
35283 // Strip named arguments (max_distance => N) and pass as positional
35284 let mut positional_args: Vec<Expression> = vec![];
35285 for arg in args {
35286 match arg {
35287 Expression::NamedArgument(na) => {
35288 positional_args.push(na.value);
35289 }
35290 other => positional_args.push(other),
35291 }
35292 }
35293 if positional_args.len() >= 2 {
35294 let col1 = positional_args.remove(0);
35295 let col2 = positional_args.remove(0);
35296 let levenshtein = crate::expressions::BinaryFunc {
35297 this: col1,
35298 expression: col2,
35299 original_name: None,
35300 inferred_type: None,
35301 };
35302 // Pass extra args through a function wrapper with all args
35303 if !positional_args.is_empty() {
35304 let max_dist = positional_args.remove(0);
35305 // DuckDB: CASE WHEN LEVENSHTEIN(a, b) IS NULL OR max IS NULL THEN NULL ELSE LEAST(LEVENSHTEIN(a, b), max) END
35306 if matches!(target, DialectType::DuckDB) {
35307 let lev = Expression::Function(Box::new(Function::new(
35308 "LEVENSHTEIN".to_string(),
35309 vec![levenshtein.this, levenshtein.expression],
35310 )));
35311 let lev_is_null =
35312 Expression::IsNull(Box::new(crate::expressions::IsNull {
35313 this: lev.clone(),
35314 not: false,
35315 postfix_form: false,
35316 }));
35317 let max_is_null =
35318 Expression::IsNull(Box::new(crate::expressions::IsNull {
35319 this: max_dist.clone(),
35320 not: false,
35321 postfix_form: false,
35322 }));
35323 let null_check =
35324 Expression::Or(Box::new(crate::expressions::BinaryOp {
35325 left: lev_is_null,
35326 right: max_is_null,
35327 left_comments: Vec::new(),
35328 operator_comments: Vec::new(),
35329 trailing_comments: Vec::new(),
35330 inferred_type: None,
35331 }));
35332 let least =
35333 Expression::Least(Box::new(crate::expressions::VarArgFunc {
35334 expressions: vec![lev, max_dist],
35335 original_name: None,
35336 inferred_type: None,
35337 }));
35338 return Ok(Expression::Case(Box::new(crate::expressions::Case {
35339 operand: None,
35340 whens: vec![(
35341 null_check,
35342 Expression::Null(crate::expressions::Null),
35343 )],
35344 else_: Some(least),
35345 comments: Vec::new(),
35346 inferred_type: None,
35347 })));
35348 }
35349 let mut all_args = vec![levenshtein.this, levenshtein.expression, max_dist];
35350 all_args.extend(positional_args);
35351 // PostgreSQL: use LEVENSHTEIN_LESS_EQUAL when max_distance is provided
35352 let func_name = if matches!(target, DialectType::PostgreSQL) {
35353 "LEVENSHTEIN_LESS_EQUAL"
35354 } else {
35355 "LEVENSHTEIN"
35356 };
35357 return Ok(Expression::Function(Box::new(Function::new(
35358 func_name.to_string(),
35359 all_args,
35360 ))));
35361 }
35362 Ok(Expression::Levenshtein(Box::new(levenshtein)))
35363 } else {
35364 Ok(Expression::Function(Box::new(Function::new(
35365 "EDIT_DISTANCE".to_string(),
35366 positional_args,
35367 ))))
35368 }
35369 }
35370
35371 // TIMESTAMP_SECONDS(x) -> UnixToTime with scale 0
35372 "TIMESTAMP_SECONDS" if args.len() == 1 => {
35373 let arg = args.remove(0);
35374 Ok(Expression::UnixToTime(Box::new(
35375 crate::expressions::UnixToTime {
35376 this: Box::new(arg),
35377 scale: Some(0),
35378 zone: None,
35379 hours: None,
35380 minutes: None,
35381 format: None,
35382 target_type: None,
35383 },
35384 )))
35385 }
35386
35387 // TIMESTAMP_MILLIS(x) -> UnixToTime with scale 3
35388 "TIMESTAMP_MILLIS" if args.len() == 1 => {
35389 let arg = args.remove(0);
35390 Ok(Expression::UnixToTime(Box::new(
35391 crate::expressions::UnixToTime {
35392 this: Box::new(arg),
35393 scale: Some(3),
35394 zone: None,
35395 hours: None,
35396 minutes: None,
35397 format: None,
35398 target_type: None,
35399 },
35400 )))
35401 }
35402
35403 // TIMESTAMP_MICROS(x) -> UnixToTime with scale 6
35404 "TIMESTAMP_MICROS" if args.len() == 1 => {
35405 let arg = args.remove(0);
35406 Ok(Expression::UnixToTime(Box::new(
35407 crate::expressions::UnixToTime {
35408 this: Box::new(arg),
35409 scale: Some(6),
35410 zone: None,
35411 hours: None,
35412 minutes: None,
35413 format: None,
35414 target_type: None,
35415 },
35416 )))
35417 }
35418
35419 // DIV(x, y) -> IntDiv expression
35420 "DIV" if args.len() == 2 => {
35421 let x = args.remove(0);
35422 let y = args.remove(0);
35423 Ok(Expression::IntDiv(Box::new(
35424 crate::expressions::BinaryFunc {
35425 this: x,
35426 expression: y,
35427 original_name: None,
35428 inferred_type: None,
35429 },
35430 )))
35431 }
35432
35433 // TO_HEX(x) -> target-specific form
35434 "TO_HEX" if args.len() == 1 => {
35435 let arg = args.remove(0);
35436 // Check if inner function already returns hex string in certain targets
35437 let inner_returns_hex = matches!(&arg, Expression::Function(f) if matches!(f.name.as_str(), "MD5" | "SHA1" | "SHA256" | "SHA512"));
35438 if matches!(target, DialectType::BigQuery) {
35439 // BQ->BQ: keep as TO_HEX
35440 Ok(Expression::Function(Box::new(Function::new(
35441 "TO_HEX".to_string(),
35442 vec![arg],
35443 ))))
35444 } else if matches!(target, DialectType::DuckDB) && inner_returns_hex {
35445 // DuckDB: MD5/SHA already return hex strings, so TO_HEX is redundant
35446 Ok(arg)
35447 } else if matches!(target, DialectType::Snowflake) && inner_returns_hex {
35448 // Snowflake: TO_HEX(SHA1(x)) -> TO_CHAR(SHA1_BINARY(x))
35449 // TO_HEX(MD5(x)) -> TO_CHAR(MD5_BINARY(x))
35450 // TO_HEX(SHA256(x)) -> TO_CHAR(SHA2_BINARY(x, 256))
35451 // TO_HEX(SHA512(x)) -> TO_CHAR(SHA2_BINARY(x, 512))
35452 if let Expression::Function(ref inner_f) = arg {
35453 let inner_args = inner_f.args.clone();
35454 let binary_func = match inner_f.name.to_ascii_uppercase().as_str() {
35455 "SHA1" => Expression::Function(Box::new(Function::new(
35456 "SHA1_BINARY".to_string(),
35457 inner_args,
35458 ))),
35459 "MD5" => Expression::Function(Box::new(Function::new(
35460 "MD5_BINARY".to_string(),
35461 inner_args,
35462 ))),
35463 "SHA256" => {
35464 let mut a = inner_args;
35465 a.push(Expression::number(256));
35466 Expression::Function(Box::new(Function::new(
35467 "SHA2_BINARY".to_string(),
35468 a,
35469 )))
35470 }
35471 "SHA512" => {
35472 let mut a = inner_args;
35473 a.push(Expression::number(512));
35474 Expression::Function(Box::new(Function::new(
35475 "SHA2_BINARY".to_string(),
35476 a,
35477 )))
35478 }
35479 _ => arg.clone(),
35480 };
35481 Ok(Expression::Function(Box::new(Function::new(
35482 "TO_CHAR".to_string(),
35483 vec![binary_func],
35484 ))))
35485 } else {
35486 let inner = Expression::Function(Box::new(Function::new(
35487 "HEX".to_string(),
35488 vec![arg],
35489 )));
35490 Ok(Expression::Lower(Box::new(
35491 crate::expressions::UnaryFunc::new(inner),
35492 )))
35493 }
35494 } else if matches!(target, DialectType::Presto | DialectType::Trino) {
35495 let inner = Expression::Function(Box::new(Function::new(
35496 "TO_HEX".to_string(),
35497 vec![arg],
35498 )));
35499 Ok(Expression::Lower(Box::new(
35500 crate::expressions::UnaryFunc::new(inner),
35501 )))
35502 } else {
35503 let inner =
35504 Expression::Function(Box::new(Function::new("HEX".to_string(), vec![arg])));
35505 Ok(Expression::Lower(Box::new(
35506 crate::expressions::UnaryFunc::new(inner),
35507 )))
35508 }
35509 }
35510
35511 // LAST_DAY(date, unit) -> strip unit for most targets, or transform for PostgreSQL
35512 "LAST_DAY" if args.len() == 2 => {
35513 let date = args.remove(0);
35514 let _unit = args.remove(0); // Strip the unit (MONTH is default)
35515 Ok(Expression::Function(Box::new(Function::new(
35516 "LAST_DAY".to_string(),
35517 vec![date],
35518 ))))
35519 }
35520
35521 // GENERATE_ARRAY(start, end, step?) -> GenerateSeries expression
35522 "GENERATE_ARRAY" => {
35523 let start = args.get(0).cloned();
35524 let end = args.get(1).cloned();
35525 let step = args.get(2).cloned();
35526 Ok(Expression::GenerateSeries(Box::new(
35527 crate::expressions::GenerateSeries {
35528 start: start.map(Box::new),
35529 end: end.map(Box::new),
35530 step: step.map(Box::new),
35531 is_end_exclusive: None,
35532 },
35533 )))
35534 }
35535
35536 // GENERATE_TIMESTAMP_ARRAY(start, end, step) -> GenerateSeries expression
35537 "GENERATE_TIMESTAMP_ARRAY" => {
35538 let start = args.get(0).cloned();
35539 let end = args.get(1).cloned();
35540 let step = args.get(2).cloned();
35541
35542 if matches!(target, DialectType::DuckDB) {
35543 // DuckDB: GENERATE_SERIES(CAST(start AS TIMESTAMP), CAST(end AS TIMESTAMP), step)
35544 // Only cast string literals - leave columns/expressions as-is
35545 let maybe_cast_ts = |expr: Expression| -> Expression {
35546 if matches!(&expr, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)))
35547 {
35548 Expression::Cast(Box::new(Cast {
35549 this: expr,
35550 to: DataType::Timestamp {
35551 precision: None,
35552 timezone: false,
35553 },
35554 trailing_comments: vec![],
35555 double_colon_syntax: false,
35556 format: None,
35557 default: None,
35558 inferred_type: None,
35559 }))
35560 } else {
35561 expr
35562 }
35563 };
35564 let cast_start = start.map(maybe_cast_ts);
35565 let cast_end = end.map(maybe_cast_ts);
35566 Ok(Expression::GenerateSeries(Box::new(
35567 crate::expressions::GenerateSeries {
35568 start: cast_start.map(Box::new),
35569 end: cast_end.map(Box::new),
35570 step: step.map(Box::new),
35571 is_end_exclusive: None,
35572 },
35573 )))
35574 } else {
35575 Ok(Expression::GenerateSeries(Box::new(
35576 crate::expressions::GenerateSeries {
35577 start: start.map(Box::new),
35578 end: end.map(Box::new),
35579 step: step.map(Box::new),
35580 is_end_exclusive: None,
35581 },
35582 )))
35583 }
35584 }
35585
35586 // TO_JSON(x) -> target-specific (from Spark/Hive)
35587 "TO_JSON" => {
35588 match target {
35589 DialectType::Presto | DialectType::Trino => {
35590 // JSON_FORMAT(CAST(x AS JSON))
35591 let arg = args
35592 .into_iter()
35593 .next()
35594 .unwrap_or(Expression::Null(crate::expressions::Null));
35595 let cast_json = Expression::Cast(Box::new(Cast {
35596 this: arg,
35597 to: DataType::Custom {
35598 name: "JSON".to_string(),
35599 },
35600 trailing_comments: vec![],
35601 double_colon_syntax: false,
35602 format: None,
35603 default: None,
35604 inferred_type: None,
35605 }));
35606 Ok(Expression::Function(Box::new(Function::new(
35607 "JSON_FORMAT".to_string(),
35608 vec![cast_json],
35609 ))))
35610 }
35611 DialectType::BigQuery => Ok(Expression::Function(Box::new(Function::new(
35612 "TO_JSON_STRING".to_string(),
35613 args,
35614 )))),
35615 DialectType::DuckDB => {
35616 // CAST(TO_JSON(x) AS TEXT)
35617 let arg = args
35618 .into_iter()
35619 .next()
35620 .unwrap_or(Expression::Null(crate::expressions::Null));
35621 let to_json = Expression::Function(Box::new(Function::new(
35622 "TO_JSON".to_string(),
35623 vec![arg],
35624 )));
35625 Ok(Expression::Cast(Box::new(Cast {
35626 this: to_json,
35627 to: DataType::Text,
35628 trailing_comments: vec![],
35629 double_colon_syntax: false,
35630 format: None,
35631 default: None,
35632 inferred_type: None,
35633 })))
35634 }
35635 _ => Ok(Expression::Function(Box::new(Function::new(
35636 "TO_JSON".to_string(),
35637 args,
35638 )))),
35639 }
35640 }
35641
35642 // TO_JSON_STRING(x) -> target-specific
35643 "TO_JSON_STRING" => {
35644 match target {
35645 DialectType::Spark | DialectType::Databricks | DialectType::Hive => Ok(
35646 Expression::Function(Box::new(Function::new("TO_JSON".to_string(), args))),
35647 ),
35648 DialectType::Presto | DialectType::Trino => {
35649 // JSON_FORMAT(CAST(x AS JSON))
35650 let arg = args
35651 .into_iter()
35652 .next()
35653 .unwrap_or(Expression::Null(crate::expressions::Null));
35654 let cast_json = Expression::Cast(Box::new(Cast {
35655 this: arg,
35656 to: DataType::Custom {
35657 name: "JSON".to_string(),
35658 },
35659 trailing_comments: vec![],
35660 double_colon_syntax: false,
35661 format: None,
35662 default: None,
35663 inferred_type: None,
35664 }));
35665 Ok(Expression::Function(Box::new(Function::new(
35666 "JSON_FORMAT".to_string(),
35667 vec![cast_json],
35668 ))))
35669 }
35670 DialectType::DuckDB => {
35671 // CAST(TO_JSON(x) AS TEXT)
35672 let arg = args
35673 .into_iter()
35674 .next()
35675 .unwrap_or(Expression::Null(crate::expressions::Null));
35676 let to_json = Expression::Function(Box::new(Function::new(
35677 "TO_JSON".to_string(),
35678 vec![arg],
35679 )));
35680 Ok(Expression::Cast(Box::new(Cast {
35681 this: to_json,
35682 to: DataType::Text,
35683 trailing_comments: vec![],
35684 double_colon_syntax: false,
35685 format: None,
35686 default: None,
35687 inferred_type: None,
35688 })))
35689 }
35690 DialectType::Snowflake => {
35691 // TO_JSON(x)
35692 Ok(Expression::Function(Box::new(Function::new(
35693 "TO_JSON".to_string(),
35694 args,
35695 ))))
35696 }
35697 _ => Ok(Expression::Function(Box::new(Function::new(
35698 "TO_JSON_STRING".to_string(),
35699 args,
35700 )))),
35701 }
35702 }
35703
35704 // SAFE_ADD(x, y) -> SafeAdd expression
35705 "SAFE_ADD" if args.len() == 2 => {
35706 let x = args.remove(0);
35707 let y = args.remove(0);
35708 Ok(Expression::SafeAdd(Box::new(crate::expressions::SafeAdd {
35709 this: Box::new(x),
35710 expression: Box::new(y),
35711 })))
35712 }
35713
35714 // SAFE_SUBTRACT(x, y) -> SafeSubtract expression
35715 "SAFE_SUBTRACT" if args.len() == 2 => {
35716 let x = args.remove(0);
35717 let y = args.remove(0);
35718 Ok(Expression::SafeSubtract(Box::new(
35719 crate::expressions::SafeSubtract {
35720 this: Box::new(x),
35721 expression: Box::new(y),
35722 },
35723 )))
35724 }
35725
35726 // SAFE_MULTIPLY(x, y) -> SafeMultiply expression
35727 "SAFE_MULTIPLY" if args.len() == 2 => {
35728 let x = args.remove(0);
35729 let y = args.remove(0);
35730 Ok(Expression::SafeMultiply(Box::new(
35731 crate::expressions::SafeMultiply {
35732 this: Box::new(x),
35733 expression: Box::new(y),
35734 },
35735 )))
35736 }
35737
35738 // REGEXP_CONTAINS(str, pattern) -> RegexpLike expression
35739 "REGEXP_CONTAINS" if args.len() == 2 => {
35740 let str_expr = args.remove(0);
35741 let pattern = args.remove(0);
35742 Ok(Expression::RegexpLike(Box::new(
35743 crate::expressions::RegexpFunc {
35744 this: str_expr,
35745 pattern,
35746 flags: None,
35747 },
35748 )))
35749 }
35750
35751 // CONTAINS_SUBSTR(a, b) -> CONTAINS(LOWER(a), LOWER(b))
35752 "CONTAINS_SUBSTR" if args.len() == 2 => {
35753 let a = args.remove(0);
35754 let b = args.remove(0);
35755 let lower_a = Expression::Lower(Box::new(crate::expressions::UnaryFunc::new(a)));
35756 let lower_b = Expression::Lower(Box::new(crate::expressions::UnaryFunc::new(b)));
35757 Ok(Expression::Function(Box::new(Function::new(
35758 "CONTAINS".to_string(),
35759 vec![lower_a, lower_b],
35760 ))))
35761 }
35762
35763 // INT64(x) -> CAST(x AS BIGINT)
35764 "INT64" if args.len() == 1 => {
35765 let arg = args.remove(0);
35766 Ok(Expression::Cast(Box::new(Cast {
35767 this: arg,
35768 to: DataType::BigInt { length: None },
35769 trailing_comments: vec![],
35770 double_colon_syntax: false,
35771 format: None,
35772 default: None,
35773 inferred_type: None,
35774 })))
35775 }
35776
35777 // INSTR(str, substr) -> target-specific
35778 "INSTR" if args.len() >= 2 => {
35779 let str_expr = args.remove(0);
35780 let substr = args.remove(0);
35781 if matches!(target, DialectType::Snowflake) {
35782 // CHARINDEX(substr, str)
35783 Ok(Expression::Function(Box::new(Function::new(
35784 "CHARINDEX".to_string(),
35785 vec![substr, str_expr],
35786 ))))
35787 } else if matches!(target, DialectType::BigQuery) {
35788 // Keep as INSTR
35789 Ok(Expression::Function(Box::new(Function::new(
35790 "INSTR".to_string(),
35791 vec![str_expr, substr],
35792 ))))
35793 } else {
35794 // Default: keep as INSTR
35795 Ok(Expression::Function(Box::new(Function::new(
35796 "INSTR".to_string(),
35797 vec![str_expr, substr],
35798 ))))
35799 }
35800 }
35801
35802 // BigQuery DATE_TRUNC(expr, unit) -> DATE_TRUNC('unit', expr) for standard SQL
35803 "DATE_TRUNC" if args.len() == 2 => {
35804 let expr = args.remove(0);
35805 let unit_expr = args.remove(0);
35806 let unit_str = get_unit_str(&unit_expr);
35807
35808 match target {
35809 DialectType::DuckDB
35810 | DialectType::Snowflake
35811 | DialectType::PostgreSQL
35812 | DialectType::Presto
35813 | DialectType::Trino
35814 | DialectType::Databricks
35815 | DialectType::Spark
35816 | DialectType::Redshift
35817 | DialectType::ClickHouse
35818 | DialectType::TSQL => {
35819 // Standard: DATE_TRUNC('UNIT', expr)
35820 Ok(Expression::Function(Box::new(Function::new(
35821 "DATE_TRUNC".to_string(),
35822 vec![
35823 Expression::Literal(Box::new(Literal::String(unit_str))),
35824 expr,
35825 ],
35826 ))))
35827 }
35828 _ => {
35829 // Keep BigQuery arg order: DATE_TRUNC(expr, unit)
35830 Ok(Expression::Function(Box::new(Function::new(
35831 "DATE_TRUNC".to_string(),
35832 vec![expr, unit_expr],
35833 ))))
35834 }
35835 }
35836 }
35837
35838 // TIMESTAMP_TRUNC / DATETIME_TRUNC -> target-specific
35839 "TIMESTAMP_TRUNC" | "DATETIME_TRUNC" if args.len() >= 2 => {
35840 // TIMESTAMP_TRUNC(ts, unit) or TIMESTAMP_TRUNC(ts, unit, timezone)
35841 let ts = args.remove(0);
35842 let unit_expr = args.remove(0);
35843 let tz = if !args.is_empty() {
35844 Some(args.remove(0))
35845 } else {
35846 None
35847 };
35848 let unit_str = get_unit_str(&unit_expr);
35849
35850 match target {
35851 DialectType::DuckDB => {
35852 // DuckDB: DATE_TRUNC('UNIT', CAST(ts AS TIMESTAMPTZ))
35853 // With timezone: DATE_TRUNC('UNIT', ts AT TIME ZONE 'tz') AT TIME ZONE 'tz' (for DAY granularity)
35854 // Without timezone for MINUTE+ granularity: just DATE_TRUNC
35855 let is_coarse = matches!(
35856 unit_str.as_str(),
35857 "DAY" | "WEEK" | "MONTH" | "QUARTER" | "YEAR"
35858 );
35859 // For DATETIME_TRUNC, cast string args to TIMESTAMP
35860 let cast_ts = if name == "DATETIME_TRUNC" {
35861 match ts {
35862 Expression::Literal(ref lit)
35863 if matches!(lit.as_ref(), Literal::String(ref _s)) =>
35864 {
35865 Expression::Cast(Box::new(Cast {
35866 this: ts,
35867 to: DataType::Timestamp {
35868 precision: None,
35869 timezone: false,
35870 },
35871 trailing_comments: vec![],
35872 double_colon_syntax: false,
35873 format: None,
35874 default: None,
35875 inferred_type: None,
35876 }))
35877 }
35878 _ => Self::maybe_cast_ts_to_tz(ts, &name),
35879 }
35880 } else {
35881 Self::maybe_cast_ts_to_tz(ts, &name)
35882 };
35883
35884 if let Some(tz_arg) = tz {
35885 if is_coarse {
35886 // DATE_TRUNC('UNIT', ts AT TIME ZONE 'tz') AT TIME ZONE 'tz'
35887 let at_tz = Expression::AtTimeZone(Box::new(
35888 crate::expressions::AtTimeZone {
35889 this: cast_ts,
35890 zone: tz_arg.clone(),
35891 },
35892 ));
35893 let date_trunc = Expression::Function(Box::new(Function::new(
35894 "DATE_TRUNC".to_string(),
35895 vec![
35896 Expression::Literal(Box::new(Literal::String(unit_str))),
35897 at_tz,
35898 ],
35899 )));
35900 Ok(Expression::AtTimeZone(Box::new(
35901 crate::expressions::AtTimeZone {
35902 this: date_trunc,
35903 zone: tz_arg,
35904 },
35905 )))
35906 } else {
35907 // For MINUTE/HOUR: no AT TIME ZONE wrapper, just DATE_TRUNC('UNIT', ts)
35908 Ok(Expression::Function(Box::new(Function::new(
35909 "DATE_TRUNC".to_string(),
35910 vec![
35911 Expression::Literal(Box::new(Literal::String(unit_str))),
35912 cast_ts,
35913 ],
35914 ))))
35915 }
35916 } else {
35917 // No timezone: DATE_TRUNC('UNIT', CAST(ts AS TIMESTAMPTZ))
35918 Ok(Expression::Function(Box::new(Function::new(
35919 "DATE_TRUNC".to_string(),
35920 vec![
35921 Expression::Literal(Box::new(Literal::String(unit_str))),
35922 cast_ts,
35923 ],
35924 ))))
35925 }
35926 }
35927 DialectType::Databricks | DialectType::Spark => {
35928 // Databricks/Spark: DATE_TRUNC('UNIT', ts)
35929 Ok(Expression::Function(Box::new(Function::new(
35930 "DATE_TRUNC".to_string(),
35931 vec![Expression::Literal(Box::new(Literal::String(unit_str))), ts],
35932 ))))
35933 }
35934 _ => {
35935 // Default: keep as TIMESTAMP_TRUNC('UNIT', ts, [tz])
35936 let unit = Expression::Literal(Box::new(Literal::String(unit_str)));
35937 let mut date_trunc_args = vec![unit, ts];
35938 if let Some(tz_arg) = tz {
35939 date_trunc_args.push(tz_arg);
35940 }
35941 Ok(Expression::Function(Box::new(Function::new(
35942 "TIMESTAMP_TRUNC".to_string(),
35943 date_trunc_args,
35944 ))))
35945 }
35946 }
35947 }
35948
35949 // TIME(h, m, s) -> target-specific, TIME('string') -> CAST('string' AS TIME)
35950 "TIME" => {
35951 if args.len() == 3 {
35952 // TIME(h, m, s) constructor
35953 match target {
35954 DialectType::TSQL => {
35955 // TIMEFROMPARTS(h, m, s, 0, 0)
35956 args.push(Expression::number(0));
35957 args.push(Expression::number(0));
35958 Ok(Expression::Function(Box::new(Function::new(
35959 "TIMEFROMPARTS".to_string(),
35960 args,
35961 ))))
35962 }
35963 DialectType::MySQL => Ok(Expression::Function(Box::new(Function::new(
35964 "MAKETIME".to_string(),
35965 args,
35966 )))),
35967 DialectType::PostgreSQL => Ok(Expression::Function(Box::new(
35968 Function::new("MAKE_TIME".to_string(), args),
35969 ))),
35970 _ => Ok(Expression::Function(Box::new(Function::new(
35971 "TIME".to_string(),
35972 args,
35973 )))),
35974 }
35975 } else if args.len() == 1 {
35976 let arg = args.remove(0);
35977 if matches!(target, DialectType::Spark) {
35978 // Spark: CAST(x AS TIMESTAMP) (yes, TIMESTAMP not TIME)
35979 Ok(Expression::Cast(Box::new(Cast {
35980 this: arg,
35981 to: DataType::Timestamp {
35982 timezone: false,
35983 precision: None,
35984 },
35985 trailing_comments: vec![],
35986 double_colon_syntax: false,
35987 format: None,
35988 default: None,
35989 inferred_type: None,
35990 })))
35991 } else {
35992 // Most targets: CAST(x AS TIME)
35993 Ok(Expression::Cast(Box::new(Cast {
35994 this: arg,
35995 to: DataType::Time {
35996 precision: None,
35997 timezone: false,
35998 },
35999 trailing_comments: vec![],
36000 double_colon_syntax: false,
36001 format: None,
36002 default: None,
36003 inferred_type: None,
36004 })))
36005 }
36006 } else if args.len() == 2 {
36007 // TIME(expr, timezone) -> CAST(CAST(expr AS TIMESTAMPTZ) AT TIME ZONE tz AS TIME)
36008 let expr = args.remove(0);
36009 let tz = args.remove(0);
36010 let cast_tstz = Expression::Cast(Box::new(Cast {
36011 this: expr,
36012 to: DataType::Timestamp {
36013 timezone: true,
36014 precision: None,
36015 },
36016 trailing_comments: vec![],
36017 double_colon_syntax: false,
36018 format: None,
36019 default: None,
36020 inferred_type: None,
36021 }));
36022 let at_tz = Expression::AtTimeZone(Box::new(crate::expressions::AtTimeZone {
36023 this: cast_tstz,
36024 zone: tz,
36025 }));
36026 Ok(Expression::Cast(Box::new(Cast {
36027 this: at_tz,
36028 to: DataType::Time {
36029 precision: None,
36030 timezone: false,
36031 },
36032 trailing_comments: vec![],
36033 double_colon_syntax: false,
36034 format: None,
36035 default: None,
36036 inferred_type: None,
36037 })))
36038 } else {
36039 Ok(Expression::Function(Box::new(Function::new(
36040 "TIME".to_string(),
36041 args,
36042 ))))
36043 }
36044 }
36045
36046 // DATETIME('string') -> CAST('string' AS TIMESTAMP)
36047 // DATETIME('date', TIME 'time') -> CAST(CAST('date' AS DATE) + CAST('time' AS TIME) AS TIMESTAMP)
36048 // DATETIME('string', 'timezone') -> CAST(CAST('string' AS TIMESTAMPTZ) AT TIME ZONE tz AS TIMESTAMP)
36049 // DATETIME(y, m, d, h, min, s) -> target-specific
36050 "DATETIME" => {
36051 // For BigQuery target: keep DATETIME function but convert TIME literal to CAST
36052 if matches!(target, DialectType::BigQuery) {
36053 if args.len() == 2 {
36054 let has_time_literal = matches!(&args[1], Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Time(_)));
36055 if has_time_literal {
36056 let first = args.remove(0);
36057 let second = args.remove(0);
36058 let time_as_cast = match second {
36059 Expression::Literal(lit)
36060 if matches!(lit.as_ref(), Literal::Time(_)) =>
36061 {
36062 let Literal::Time(s) = lit.as_ref() else {
36063 unreachable!()
36064 };
36065 Expression::Cast(Box::new(Cast {
36066 this: Expression::Literal(Box::new(Literal::String(
36067 s.clone(),
36068 ))),
36069 to: DataType::Time {
36070 precision: None,
36071 timezone: false,
36072 },
36073 trailing_comments: vec![],
36074 double_colon_syntax: false,
36075 format: None,
36076 default: None,
36077 inferred_type: None,
36078 }))
36079 }
36080 other => other,
36081 };
36082 return Ok(Expression::Function(Box::new(Function::new(
36083 "DATETIME".to_string(),
36084 vec![first, time_as_cast],
36085 ))));
36086 }
36087 }
36088 return Ok(Expression::Function(Box::new(Function::new(
36089 "DATETIME".to_string(),
36090 args,
36091 ))));
36092 }
36093
36094 if args.len() == 1 {
36095 let arg = args.remove(0);
36096 Ok(Expression::Cast(Box::new(Cast {
36097 this: arg,
36098 to: DataType::Timestamp {
36099 timezone: false,
36100 precision: None,
36101 },
36102 trailing_comments: vec![],
36103 double_colon_syntax: false,
36104 format: None,
36105 default: None,
36106 inferred_type: None,
36107 })))
36108 } else if args.len() == 2 {
36109 let first = args.remove(0);
36110 let second = args.remove(0);
36111 // Check if second arg is a TIME literal
36112 let is_time_literal = matches!(&second, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Time(_)));
36113 if is_time_literal {
36114 // DATETIME('date', TIME 'time') -> CAST(CAST(date AS DATE) + CAST('time' AS TIME) AS TIMESTAMP)
36115 let cast_date = Expression::Cast(Box::new(Cast {
36116 this: first,
36117 to: DataType::Date,
36118 trailing_comments: vec![],
36119 double_colon_syntax: false,
36120 format: None,
36121 default: None,
36122 inferred_type: None,
36123 }));
36124 // Convert TIME 'x' literal to string 'x' so CAST produces CAST('x' AS TIME) not CAST(TIME 'x' AS TIME)
36125 let time_as_string = match second {
36126 Expression::Literal(lit)
36127 if matches!(lit.as_ref(), Literal::Time(_)) =>
36128 {
36129 let Literal::Time(s) = lit.as_ref() else {
36130 unreachable!()
36131 };
36132 Expression::Literal(Box::new(Literal::String(s.clone())))
36133 }
36134 other => other,
36135 };
36136 let cast_time = Expression::Cast(Box::new(Cast {
36137 this: time_as_string,
36138 to: DataType::Time {
36139 precision: None,
36140 timezone: false,
36141 },
36142 trailing_comments: vec![],
36143 double_colon_syntax: false,
36144 format: None,
36145 default: None,
36146 inferred_type: None,
36147 }));
36148 let add_expr =
36149 Expression::Add(Box::new(BinaryOp::new(cast_date, cast_time)));
36150 Ok(Expression::Cast(Box::new(Cast {
36151 this: add_expr,
36152 to: DataType::Timestamp {
36153 timezone: false,
36154 precision: None,
36155 },
36156 trailing_comments: vec![],
36157 double_colon_syntax: false,
36158 format: None,
36159 default: None,
36160 inferred_type: None,
36161 })))
36162 } else {
36163 // DATETIME('string', 'timezone')
36164 let cast_tstz = Expression::Cast(Box::new(Cast {
36165 this: first,
36166 to: DataType::Timestamp {
36167 timezone: true,
36168 precision: None,
36169 },
36170 trailing_comments: vec![],
36171 double_colon_syntax: false,
36172 format: None,
36173 default: None,
36174 inferred_type: None,
36175 }));
36176 let at_tz =
36177 Expression::AtTimeZone(Box::new(crate::expressions::AtTimeZone {
36178 this: cast_tstz,
36179 zone: second,
36180 }));
36181 Ok(Expression::Cast(Box::new(Cast {
36182 this: at_tz,
36183 to: DataType::Timestamp {
36184 timezone: false,
36185 precision: None,
36186 },
36187 trailing_comments: vec![],
36188 double_colon_syntax: false,
36189 format: None,
36190 default: None,
36191 inferred_type: None,
36192 })))
36193 }
36194 } else if args.len() >= 3 {
36195 // DATETIME(y, m, d, h, min, s) -> TIMESTAMP_FROM_PARTS for Snowflake
36196 // For other targets, use MAKE_TIMESTAMP or similar
36197 if matches!(target, DialectType::Snowflake) {
36198 Ok(Expression::Function(Box::new(Function::new(
36199 "TIMESTAMP_FROM_PARTS".to_string(),
36200 args,
36201 ))))
36202 } else {
36203 Ok(Expression::Function(Box::new(Function::new(
36204 "DATETIME".to_string(),
36205 args,
36206 ))))
36207 }
36208 } else {
36209 Ok(Expression::Function(Box::new(Function::new(
36210 "DATETIME".to_string(),
36211 args,
36212 ))))
36213 }
36214 }
36215
36216 // TIMESTAMP(x) -> CAST(x AS TIMESTAMP WITH TIME ZONE) for Presto
36217 // TIMESTAMP(x, tz) -> CAST(x AS TIMESTAMP) AT TIME ZONE tz for DuckDB
36218 "TIMESTAMP" => {
36219 if args.len() == 1 {
36220 let arg = args.remove(0);
36221 Ok(Expression::Cast(Box::new(Cast {
36222 this: arg,
36223 to: DataType::Timestamp {
36224 timezone: true,
36225 precision: None,
36226 },
36227 trailing_comments: vec![],
36228 double_colon_syntax: false,
36229 format: None,
36230 default: None,
36231 inferred_type: None,
36232 })))
36233 } else if args.len() == 2 {
36234 let arg = args.remove(0);
36235 let tz = args.remove(0);
36236 let cast_ts = Expression::Cast(Box::new(Cast {
36237 this: arg,
36238 to: DataType::Timestamp {
36239 timezone: false,
36240 precision: None,
36241 },
36242 trailing_comments: vec![],
36243 double_colon_syntax: false,
36244 format: None,
36245 default: None,
36246 inferred_type: None,
36247 }));
36248 if matches!(target, DialectType::Snowflake) {
36249 // CONVERT_TIMEZONE('tz', CAST(x AS TIMESTAMP))
36250 Ok(Expression::Function(Box::new(Function::new(
36251 "CONVERT_TIMEZONE".to_string(),
36252 vec![tz, cast_ts],
36253 ))))
36254 } else {
36255 Ok(Expression::AtTimeZone(Box::new(
36256 crate::expressions::AtTimeZone {
36257 this: cast_ts,
36258 zone: tz,
36259 },
36260 )))
36261 }
36262 } else {
36263 Ok(Expression::Function(Box::new(Function::new(
36264 "TIMESTAMP".to_string(),
36265 args,
36266 ))))
36267 }
36268 }
36269
36270 // STRING(x) -> CAST(x AS VARCHAR/TEXT)
36271 // STRING(x, tz) -> CAST(CAST(x AS TIMESTAMP) AT TIME ZONE 'UTC' AT TIME ZONE tz AS VARCHAR/TEXT)
36272 "STRING" => {
36273 if args.len() == 1 {
36274 let arg = args.remove(0);
36275 let cast_type = match target {
36276 DialectType::DuckDB => DataType::Text,
36277 _ => DataType::VarChar {
36278 length: None,
36279 parenthesized_length: false,
36280 },
36281 };
36282 Ok(Expression::Cast(Box::new(Cast {
36283 this: arg,
36284 to: cast_type,
36285 trailing_comments: vec![],
36286 double_colon_syntax: false,
36287 format: None,
36288 default: None,
36289 inferred_type: None,
36290 })))
36291 } else if args.len() == 2 {
36292 let arg = args.remove(0);
36293 let tz = args.remove(0);
36294 let cast_type = match target {
36295 DialectType::DuckDB => DataType::Text,
36296 _ => DataType::VarChar {
36297 length: None,
36298 parenthesized_length: false,
36299 },
36300 };
36301 if matches!(target, DialectType::Snowflake) {
36302 // STRING(x, tz) -> CAST(CONVERT_TIMEZONE('UTC', tz, x) AS VARCHAR)
36303 let convert_tz = Expression::Function(Box::new(Function::new(
36304 "CONVERT_TIMEZONE".to_string(),
36305 vec![
36306 Expression::Literal(Box::new(Literal::String("UTC".to_string()))),
36307 tz,
36308 arg,
36309 ],
36310 )));
36311 Ok(Expression::Cast(Box::new(Cast {
36312 this: convert_tz,
36313 to: cast_type,
36314 trailing_comments: vec![],
36315 double_colon_syntax: false,
36316 format: None,
36317 default: None,
36318 inferred_type: None,
36319 })))
36320 } else {
36321 // STRING(x, tz) -> CAST(CAST(x AS TIMESTAMP) AT TIME ZONE 'UTC' AT TIME ZONE tz AS TEXT/VARCHAR)
36322 let cast_ts = Expression::Cast(Box::new(Cast {
36323 this: arg,
36324 to: DataType::Timestamp {
36325 timezone: false,
36326 precision: None,
36327 },
36328 trailing_comments: vec![],
36329 double_colon_syntax: false,
36330 format: None,
36331 default: None,
36332 inferred_type: None,
36333 }));
36334 let at_utc =
36335 Expression::AtTimeZone(Box::new(crate::expressions::AtTimeZone {
36336 this: cast_ts,
36337 zone: Expression::Literal(Box::new(Literal::String(
36338 "UTC".to_string(),
36339 ))),
36340 }));
36341 let at_tz =
36342 Expression::AtTimeZone(Box::new(crate::expressions::AtTimeZone {
36343 this: at_utc,
36344 zone: tz,
36345 }));
36346 Ok(Expression::Cast(Box::new(Cast {
36347 this: at_tz,
36348 to: cast_type,
36349 trailing_comments: vec![],
36350 double_colon_syntax: false,
36351 format: None,
36352 default: None,
36353 inferred_type: None,
36354 })))
36355 }
36356 } else {
36357 Ok(Expression::Function(Box::new(Function::new(
36358 "STRING".to_string(),
36359 args,
36360 ))))
36361 }
36362 }
36363
36364 // UNIX_SECONDS, UNIX_MILLIS, UNIX_MICROS as functions (not expressions)
36365 "UNIX_SECONDS" if args.len() == 1 => {
36366 let ts = args.remove(0);
36367 match target {
36368 DialectType::DuckDB => {
36369 // CAST(EPOCH(CAST(ts AS TIMESTAMPTZ)) AS BIGINT)
36370 let cast_ts = Self::ensure_cast_timestamptz(ts);
36371 let epoch = Expression::Function(Box::new(Function::new(
36372 "EPOCH".to_string(),
36373 vec![cast_ts],
36374 )));
36375 Ok(Expression::Cast(Box::new(Cast {
36376 this: epoch,
36377 to: DataType::BigInt { length: None },
36378 trailing_comments: vec![],
36379 double_colon_syntax: false,
36380 format: None,
36381 default: None,
36382 inferred_type: None,
36383 })))
36384 }
36385 DialectType::Snowflake => {
36386 // TIMESTAMPDIFF(SECONDS, CAST('1970-01-01 00:00:00+00' AS TIMESTAMPTZ), ts)
36387 let epoch = Expression::Cast(Box::new(Cast {
36388 this: Expression::Literal(Box::new(Literal::String(
36389 "1970-01-01 00:00:00+00".to_string(),
36390 ))),
36391 to: DataType::Timestamp {
36392 timezone: true,
36393 precision: None,
36394 },
36395 trailing_comments: vec![],
36396 double_colon_syntax: false,
36397 format: None,
36398 default: None,
36399 inferred_type: None,
36400 }));
36401 Ok(Expression::TimestampDiff(Box::new(
36402 crate::expressions::TimestampDiff {
36403 this: Box::new(epoch),
36404 expression: Box::new(ts),
36405 unit: Some("SECONDS".to_string()),
36406 },
36407 )))
36408 }
36409 _ => Ok(Expression::Function(Box::new(Function::new(
36410 "UNIX_SECONDS".to_string(),
36411 vec![ts],
36412 )))),
36413 }
36414 }
36415
36416 "UNIX_MILLIS" if args.len() == 1 => {
36417 let ts = args.remove(0);
36418 match target {
36419 DialectType::DuckDB => {
36420 // EPOCH_MS(CAST(ts AS TIMESTAMPTZ))
36421 let cast_ts = Self::ensure_cast_timestamptz(ts);
36422 Ok(Expression::Function(Box::new(Function::new(
36423 "EPOCH_MS".to_string(),
36424 vec![cast_ts],
36425 ))))
36426 }
36427 _ => Ok(Expression::Function(Box::new(Function::new(
36428 "UNIX_MILLIS".to_string(),
36429 vec![ts],
36430 )))),
36431 }
36432 }
36433
36434 "UNIX_MICROS" if args.len() == 1 => {
36435 let ts = args.remove(0);
36436 match target {
36437 DialectType::DuckDB => {
36438 // EPOCH_US(CAST(ts AS TIMESTAMPTZ))
36439 let cast_ts = Self::ensure_cast_timestamptz(ts);
36440 Ok(Expression::Function(Box::new(Function::new(
36441 "EPOCH_US".to_string(),
36442 vec![cast_ts],
36443 ))))
36444 }
36445 _ => Ok(Expression::Function(Box::new(Function::new(
36446 "UNIX_MICROS".to_string(),
36447 vec![ts],
36448 )))),
36449 }
36450 }
36451
36452 // ARRAY_CONCAT / LIST_CONCAT -> target-specific
36453 "ARRAY_CONCAT" | "LIST_CONCAT" => {
36454 match target {
36455 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
36456 // CONCAT(arr1, arr2, ...)
36457 Ok(Expression::Function(Box::new(Function::new(
36458 "CONCAT".to_string(),
36459 args,
36460 ))))
36461 }
36462 DialectType::Presto | DialectType::Trino => {
36463 // CONCAT(arr1, arr2, ...)
36464 Ok(Expression::Function(Box::new(Function::new(
36465 "CONCAT".to_string(),
36466 args,
36467 ))))
36468 }
36469 DialectType::Snowflake => {
36470 // ARRAY_CAT(arr1, ARRAY_CAT(arr2, arr3))
36471 if args.len() == 1 {
36472 // ARRAY_CAT requires 2 args, add empty array as []
36473 let empty_arr = Expression::ArrayFunc(Box::new(
36474 crate::expressions::ArrayConstructor {
36475 expressions: vec![],
36476 bracket_notation: true,
36477 use_list_keyword: false,
36478 },
36479 ));
36480 let mut new_args = args;
36481 new_args.push(empty_arr);
36482 Ok(Expression::Function(Box::new(Function::new(
36483 "ARRAY_CAT".to_string(),
36484 new_args,
36485 ))))
36486 } else if args.is_empty() {
36487 Ok(Expression::Function(Box::new(Function::new(
36488 "ARRAY_CAT".to_string(),
36489 args,
36490 ))))
36491 } else {
36492 let mut it = args.into_iter().rev();
36493 let mut result = it.next().unwrap();
36494 for arr in it {
36495 result = Expression::Function(Box::new(Function::new(
36496 "ARRAY_CAT".to_string(),
36497 vec![arr, result],
36498 )));
36499 }
36500 Ok(result)
36501 }
36502 }
36503 DialectType::PostgreSQL => {
36504 // ARRAY_CAT(arr1, ARRAY_CAT(arr2, arr3))
36505 if args.len() <= 1 {
36506 Ok(Expression::Function(Box::new(Function::new(
36507 "ARRAY_CAT".to_string(),
36508 args,
36509 ))))
36510 } else {
36511 let mut it = args.into_iter().rev();
36512 let mut result = it.next().unwrap();
36513 for arr in it {
36514 result = Expression::Function(Box::new(Function::new(
36515 "ARRAY_CAT".to_string(),
36516 vec![arr, result],
36517 )));
36518 }
36519 Ok(result)
36520 }
36521 }
36522 DialectType::Redshift => {
36523 // ARRAY_CONCAT(arr1, ARRAY_CONCAT(arr2, arr3))
36524 if args.len() <= 2 {
36525 Ok(Expression::Function(Box::new(Function::new(
36526 "ARRAY_CONCAT".to_string(),
36527 args,
36528 ))))
36529 } else {
36530 let mut it = args.into_iter().rev();
36531 let mut result = it.next().unwrap();
36532 for arr in it {
36533 result = Expression::Function(Box::new(Function::new(
36534 "ARRAY_CONCAT".to_string(),
36535 vec![arr, result],
36536 )));
36537 }
36538 Ok(result)
36539 }
36540 }
36541 DialectType::DuckDB => {
36542 // LIST_CONCAT supports multiple args natively in DuckDB
36543 Ok(Expression::Function(Box::new(Function::new(
36544 "LIST_CONCAT".to_string(),
36545 args,
36546 ))))
36547 }
36548 _ => Ok(Expression::Function(Box::new(Function::new(
36549 "ARRAY_CONCAT".to_string(),
36550 args,
36551 )))),
36552 }
36553 }
36554
36555 // ARRAY_CONCAT_AGG -> Snowflake: ARRAY_FLATTEN(ARRAY_AGG(x))
36556 "ARRAY_CONCAT_AGG" if args.len() == 1 => {
36557 let arg = args.remove(0);
36558 match target {
36559 DialectType::Snowflake => {
36560 let array_agg =
36561 Expression::ArrayAgg(Box::new(crate::expressions::AggFunc {
36562 this: arg,
36563 distinct: false,
36564 filter: None,
36565 order_by: vec![],
36566 name: None,
36567 ignore_nulls: None,
36568 having_max: None,
36569 limit: None,
36570 inferred_type: None,
36571 }));
36572 Ok(Expression::Function(Box::new(Function::new(
36573 "ARRAY_FLATTEN".to_string(),
36574 vec![array_agg],
36575 ))))
36576 }
36577 _ => Ok(Expression::Function(Box::new(Function::new(
36578 "ARRAY_CONCAT_AGG".to_string(),
36579 vec![arg],
36580 )))),
36581 }
36582 }
36583
36584 // MD5/SHA1/SHA256/SHA512 -> target-specific hash functions
36585 "MD5" if args.len() == 1 => {
36586 let arg = args.remove(0);
36587 match target {
36588 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
36589 // UNHEX(MD5(x))
36590 let md5 = Expression::Function(Box::new(Function::new(
36591 "MD5".to_string(),
36592 vec![arg],
36593 )));
36594 Ok(Expression::Function(Box::new(Function::new(
36595 "UNHEX".to_string(),
36596 vec![md5],
36597 ))))
36598 }
36599 DialectType::Snowflake => {
36600 // MD5_BINARY(x)
36601 Ok(Expression::Function(Box::new(Function::new(
36602 "MD5_BINARY".to_string(),
36603 vec![arg],
36604 ))))
36605 }
36606 _ => Ok(Expression::Function(Box::new(Function::new(
36607 "MD5".to_string(),
36608 vec![arg],
36609 )))),
36610 }
36611 }
36612
36613 "SHA1" if args.len() == 1 => {
36614 let arg = args.remove(0);
36615 match target {
36616 DialectType::DuckDB => {
36617 // UNHEX(SHA1(x))
36618 let sha1 = Expression::Function(Box::new(Function::new(
36619 "SHA1".to_string(),
36620 vec![arg],
36621 )));
36622 Ok(Expression::Function(Box::new(Function::new(
36623 "UNHEX".to_string(),
36624 vec![sha1],
36625 ))))
36626 }
36627 _ => Ok(Expression::Function(Box::new(Function::new(
36628 "SHA1".to_string(),
36629 vec![arg],
36630 )))),
36631 }
36632 }
36633
36634 "SHA256" if args.len() == 1 => {
36635 let arg = args.remove(0);
36636 match target {
36637 DialectType::DuckDB => {
36638 // UNHEX(SHA256(x))
36639 let sha = Expression::Function(Box::new(Function::new(
36640 "SHA256".to_string(),
36641 vec![arg],
36642 )));
36643 Ok(Expression::Function(Box::new(Function::new(
36644 "UNHEX".to_string(),
36645 vec![sha],
36646 ))))
36647 }
36648 DialectType::Snowflake => {
36649 // SHA2_BINARY(x, 256)
36650 Ok(Expression::Function(Box::new(Function::new(
36651 "SHA2_BINARY".to_string(),
36652 vec![arg, Expression::number(256)],
36653 ))))
36654 }
36655 DialectType::Redshift | DialectType::Spark => {
36656 // SHA2(x, 256)
36657 Ok(Expression::Function(Box::new(Function::new(
36658 "SHA2".to_string(),
36659 vec![arg, Expression::number(256)],
36660 ))))
36661 }
36662 _ => Ok(Expression::Function(Box::new(Function::new(
36663 "SHA256".to_string(),
36664 vec![arg],
36665 )))),
36666 }
36667 }
36668
36669 "SHA512" if args.len() == 1 => {
36670 let arg = args.remove(0);
36671 match target {
36672 DialectType::Snowflake => {
36673 // SHA2_BINARY(x, 512)
36674 Ok(Expression::Function(Box::new(Function::new(
36675 "SHA2_BINARY".to_string(),
36676 vec![arg, Expression::number(512)],
36677 ))))
36678 }
36679 DialectType::Redshift | DialectType::Spark => {
36680 // SHA2(x, 512)
36681 Ok(Expression::Function(Box::new(Function::new(
36682 "SHA2".to_string(),
36683 vec![arg, Expression::number(512)],
36684 ))))
36685 }
36686 _ => Ok(Expression::Function(Box::new(Function::new(
36687 "SHA512".to_string(),
36688 vec![arg],
36689 )))),
36690 }
36691 }
36692
36693 // REGEXP_EXTRACT_ALL(str, pattern) -> add default group arg
36694 "REGEXP_EXTRACT_ALL" if args.len() == 2 => {
36695 let str_expr = args.remove(0);
36696 let pattern = args.remove(0);
36697
36698 // Check if pattern contains capturing groups (parentheses)
36699 let has_groups = match &pattern {
36700 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)) => {
36701 let Literal::String(s) = lit.as_ref() else {
36702 unreachable!()
36703 };
36704 s.contains('(') && s.contains(')')
36705 }
36706 _ => false,
36707 };
36708
36709 match target {
36710 DialectType::DuckDB => {
36711 let group = if has_groups {
36712 Expression::number(1)
36713 } else {
36714 Expression::number(0)
36715 };
36716 Ok(Expression::Function(Box::new(Function::new(
36717 "REGEXP_EXTRACT_ALL".to_string(),
36718 vec![str_expr, pattern, group],
36719 ))))
36720 }
36721 DialectType::Spark | DialectType::Databricks => {
36722 // Spark's default group_index is 1 (same as BigQuery), so omit for capturing groups
36723 if has_groups {
36724 Ok(Expression::Function(Box::new(Function::new(
36725 "REGEXP_EXTRACT_ALL".to_string(),
36726 vec![str_expr, pattern],
36727 ))))
36728 } else {
36729 Ok(Expression::Function(Box::new(Function::new(
36730 "REGEXP_EXTRACT_ALL".to_string(),
36731 vec![str_expr, pattern, Expression::number(0)],
36732 ))))
36733 }
36734 }
36735 DialectType::Presto | DialectType::Trino => {
36736 if has_groups {
36737 Ok(Expression::Function(Box::new(Function::new(
36738 "REGEXP_EXTRACT_ALL".to_string(),
36739 vec![str_expr, pattern, Expression::number(1)],
36740 ))))
36741 } else {
36742 Ok(Expression::Function(Box::new(Function::new(
36743 "REGEXP_EXTRACT_ALL".to_string(),
36744 vec![str_expr, pattern],
36745 ))))
36746 }
36747 }
36748 DialectType::Snowflake => {
36749 if has_groups {
36750 // REGEXP_EXTRACT_ALL(str, pattern, 1, 1, 'c', 1)
36751 Ok(Expression::Function(Box::new(Function::new(
36752 "REGEXP_EXTRACT_ALL".to_string(),
36753 vec![
36754 str_expr,
36755 pattern,
36756 Expression::number(1),
36757 Expression::number(1),
36758 Expression::Literal(Box::new(Literal::String("c".to_string()))),
36759 Expression::number(1),
36760 ],
36761 ))))
36762 } else {
36763 Ok(Expression::Function(Box::new(Function::new(
36764 "REGEXP_EXTRACT_ALL".to_string(),
36765 vec![str_expr, pattern],
36766 ))))
36767 }
36768 }
36769 _ => Ok(Expression::Function(Box::new(Function::new(
36770 "REGEXP_EXTRACT_ALL".to_string(),
36771 vec![str_expr, pattern],
36772 )))),
36773 }
36774 }
36775
36776 // MOD(x, y) -> x % y for dialects that prefer or require the infix operator.
36777 "MOD" if args.len() == 2 => {
36778 match target {
36779 DialectType::PostgreSQL
36780 | DialectType::DuckDB
36781 | DialectType::Presto
36782 | DialectType::Trino
36783 | DialectType::Athena
36784 | DialectType::Snowflake
36785 | DialectType::TSQL
36786 | DialectType::Fabric => {
36787 let x = args.remove(0);
36788 let y = args.remove(0);
36789 // Wrap complex expressions in parens to preserve precedence
36790 let needs_paren = |e: &Expression| {
36791 matches!(
36792 e,
36793 Expression::Add(_)
36794 | Expression::Sub(_)
36795 | Expression::Mul(_)
36796 | Expression::Div(_)
36797 | Expression::Mod(_)
36798 | Expression::ModFunc(_)
36799 )
36800 };
36801 let x = if needs_paren(&x) {
36802 Expression::Paren(Box::new(crate::expressions::Paren {
36803 this: x,
36804 trailing_comments: vec![],
36805 }))
36806 } else {
36807 x
36808 };
36809 let y = if needs_paren(&y) {
36810 Expression::Paren(Box::new(crate::expressions::Paren {
36811 this: y,
36812 trailing_comments: vec![],
36813 }))
36814 } else {
36815 y
36816 };
36817 Ok(Expression::Mod(Box::new(
36818 crate::expressions::BinaryOp::new(x, y),
36819 )))
36820 }
36821 DialectType::Hive | DialectType::Spark | DialectType::Databricks => {
36822 // Hive/Spark: a % b
36823 let x = args.remove(0);
36824 let y = args.remove(0);
36825 let needs_paren = |e: &Expression| {
36826 matches!(
36827 e,
36828 Expression::Add(_)
36829 | Expression::Sub(_)
36830 | Expression::Mul(_)
36831 | Expression::Div(_)
36832 | Expression::Mod(_)
36833 | Expression::ModFunc(_)
36834 )
36835 };
36836 let x = if needs_paren(&x) {
36837 Expression::Paren(Box::new(crate::expressions::Paren {
36838 this: x,
36839 trailing_comments: vec![],
36840 }))
36841 } else {
36842 x
36843 };
36844 let y = if needs_paren(&y) {
36845 Expression::Paren(Box::new(crate::expressions::Paren {
36846 this: y,
36847 trailing_comments: vec![],
36848 }))
36849 } else {
36850 y
36851 };
36852 Ok(Expression::Mod(Box::new(
36853 crate::expressions::BinaryOp::new(x, y),
36854 )))
36855 }
36856 _ => Ok(Expression::Function(Box::new(Function::new(
36857 "MOD".to_string(),
36858 args,
36859 )))),
36860 }
36861 }
36862
36863 // ARRAY_FILTER(arr, lambda) -> FILTER for Hive/Spark/Presto, ARRAY_FILTER for StarRocks
36864 "ARRAY_FILTER" if args.len() == 2 => {
36865 let name = match target {
36866 DialectType::DuckDB => "LIST_FILTER",
36867 DialectType::StarRocks => "ARRAY_FILTER",
36868 _ => "FILTER",
36869 };
36870 Ok(Expression::Function(Box::new(Function::new(
36871 name.to_string(),
36872 args,
36873 ))))
36874 }
36875 // FILTER(arr, lambda) -> ARRAY_FILTER for StarRocks, LIST_FILTER for DuckDB
36876 "FILTER" if args.len() == 2 => {
36877 let name = match target {
36878 DialectType::DuckDB => "LIST_FILTER",
36879 DialectType::StarRocks => "ARRAY_FILTER",
36880 _ => "FILTER",
36881 };
36882 Ok(Expression::Function(Box::new(Function::new(
36883 name.to_string(),
36884 args,
36885 ))))
36886 }
36887 // REDUCE(arr, init, lambda1, lambda2) -> AGGREGATE for Spark
36888 "REDUCE" if args.len() >= 3 => {
36889 let name = match target {
36890 DialectType::Spark | DialectType::Databricks => "AGGREGATE",
36891 _ => "REDUCE",
36892 };
36893 Ok(Expression::Function(Box::new(Function::new(
36894 name.to_string(),
36895 args,
36896 ))))
36897 }
36898 // ARRAY_REVERSE(x) -> arrayReverse for ClickHouse (handled by generator)
36899 "ARRAY_REVERSE" if args.len() == 1 => Ok(Expression::Function(Box::new(
36900 Function::new("ARRAY_REVERSE".to_string(), args),
36901 ))),
36902
36903 // CONCAT(a, b, ...) -> a || b || ... for DuckDB with 3+ args
36904 "CONCAT" if args.len() > 2 => match target {
36905 DialectType::DuckDB => {
36906 let mut it = args.into_iter();
36907 let mut result = it.next().unwrap();
36908 for arg in it {
36909 result = Expression::DPipe(Box::new(crate::expressions::DPipe {
36910 this: Box::new(result),
36911 expression: Box::new(arg),
36912 safe: None,
36913 }));
36914 }
36915 Ok(result)
36916 }
36917 _ => Ok(Expression::Function(Box::new(Function::new(
36918 "CONCAT".to_string(),
36919 args,
36920 )))),
36921 },
36922
36923 // GENERATE_DATE_ARRAY(start, end[, step]) -> target-specific
36924 "GENERATE_DATE_ARRAY" => {
36925 if matches!(target, DialectType::BigQuery) {
36926 // BQ->BQ: add default interval if not present
36927 if args.len() == 2 {
36928 let start = args.remove(0);
36929 let end = args.remove(0);
36930 let default_interval =
36931 Expression::Interval(Box::new(crate::expressions::Interval {
36932 this: Some(Expression::Literal(Box::new(Literal::String(
36933 "1".to_string(),
36934 )))),
36935 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
36936 unit: crate::expressions::IntervalUnit::Day,
36937 use_plural: false,
36938 }),
36939 }));
36940 Ok(Expression::Function(Box::new(Function::new(
36941 "GENERATE_DATE_ARRAY".to_string(),
36942 vec![start, end, default_interval],
36943 ))))
36944 } else {
36945 Ok(Expression::Function(Box::new(Function::new(
36946 "GENERATE_DATE_ARRAY".to_string(),
36947 args,
36948 ))))
36949 }
36950 } else if matches!(target, DialectType::DuckDB) {
36951 // DuckDB: CAST(GENERATE_SERIES(CAST(start AS DATE), CAST(end AS DATE), step) AS DATE[])
36952 let start = args.get(0).cloned();
36953 let end = args.get(1).cloned();
36954 let step = args.get(2).cloned().or_else(|| {
36955 Some(Expression::Interval(Box::new(
36956 crate::expressions::Interval {
36957 this: Some(Expression::Literal(Box::new(Literal::String(
36958 "1".to_string(),
36959 )))),
36960 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
36961 unit: crate::expressions::IntervalUnit::Day,
36962 use_plural: false,
36963 }),
36964 },
36965 )))
36966 });
36967
36968 // Wrap start/end in CAST(... AS DATE) only for string literals
36969 let maybe_cast_date = |expr: Expression| -> Expression {
36970 if matches!(&expr, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)))
36971 {
36972 Expression::Cast(Box::new(Cast {
36973 this: expr,
36974 to: DataType::Date,
36975 trailing_comments: vec![],
36976 double_colon_syntax: false,
36977 format: None,
36978 default: None,
36979 inferred_type: None,
36980 }))
36981 } else {
36982 expr
36983 }
36984 };
36985 let cast_start = start.map(maybe_cast_date);
36986 let cast_end = end.map(maybe_cast_date);
36987
36988 let gen_series =
36989 Expression::GenerateSeries(Box::new(crate::expressions::GenerateSeries {
36990 start: cast_start.map(Box::new),
36991 end: cast_end.map(Box::new),
36992 step: step.map(Box::new),
36993 is_end_exclusive: None,
36994 }));
36995
36996 // Wrap in CAST(... AS DATE[])
36997 Ok(Expression::Cast(Box::new(Cast {
36998 this: gen_series,
36999 to: DataType::Array {
37000 element_type: Box::new(DataType::Date),
37001 dimension: None,
37002 },
37003 trailing_comments: vec![],
37004 double_colon_syntax: false,
37005 format: None,
37006 default: None,
37007 inferred_type: None,
37008 })))
37009 } else if matches!(target, DialectType::Snowflake) {
37010 // Snowflake: keep as GENERATE_DATE_ARRAY function for later transform
37011 // (transform_generate_date_array_snowflake will convert to ARRAY_GENERATE_RANGE + DATEADD)
37012 if args.len() == 2 {
37013 let start = args.remove(0);
37014 let end = args.remove(0);
37015 let default_interval =
37016 Expression::Interval(Box::new(crate::expressions::Interval {
37017 this: Some(Expression::Literal(Box::new(Literal::String(
37018 "1".to_string(),
37019 )))),
37020 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
37021 unit: crate::expressions::IntervalUnit::Day,
37022 use_plural: false,
37023 }),
37024 }));
37025 Ok(Expression::Function(Box::new(Function::new(
37026 "GENERATE_DATE_ARRAY".to_string(),
37027 vec![start, end, default_interval],
37028 ))))
37029 } else {
37030 Ok(Expression::Function(Box::new(Function::new(
37031 "GENERATE_DATE_ARRAY".to_string(),
37032 args,
37033 ))))
37034 }
37035 } else {
37036 // Convert to GenerateSeries for other targets
37037 let start = args.get(0).cloned();
37038 let end = args.get(1).cloned();
37039 let step = args.get(2).cloned().or_else(|| {
37040 Some(Expression::Interval(Box::new(
37041 crate::expressions::Interval {
37042 this: Some(Expression::Literal(Box::new(Literal::String(
37043 "1".to_string(),
37044 )))),
37045 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
37046 unit: crate::expressions::IntervalUnit::Day,
37047 use_plural: false,
37048 }),
37049 },
37050 )))
37051 });
37052 Ok(Expression::GenerateSeries(Box::new(
37053 crate::expressions::GenerateSeries {
37054 start: start.map(Box::new),
37055 end: end.map(Box::new),
37056 step: step.map(Box::new),
37057 is_end_exclusive: None,
37058 },
37059 )))
37060 }
37061 }
37062
37063 // PARSE_DATE(format, str) -> target-specific
37064 "PARSE_DATE" if args.len() == 2 => {
37065 let format = args.remove(0);
37066 let str_expr = args.remove(0);
37067 match target {
37068 DialectType::DuckDB => {
37069 // CAST(STRPTIME(str, duck_format) AS DATE)
37070 let duck_format = Self::bq_format_to_duckdb(&format);
37071 let strptime = Expression::Function(Box::new(Function::new(
37072 "STRPTIME".to_string(),
37073 vec![str_expr, duck_format],
37074 )));
37075 Ok(Expression::Cast(Box::new(Cast {
37076 this: strptime,
37077 to: DataType::Date,
37078 trailing_comments: vec![],
37079 double_colon_syntax: false,
37080 format: None,
37081 default: None,
37082 inferred_type: None,
37083 })))
37084 }
37085 DialectType::Snowflake => {
37086 // _POLYGLOT_DATE(str, snowflake_format)
37087 // Use marker so Snowflake target transform keeps it as DATE() instead of TO_DATE()
37088 let sf_format = Self::bq_format_to_snowflake(&format);
37089 Ok(Expression::Function(Box::new(Function::new(
37090 "_POLYGLOT_DATE".to_string(),
37091 vec![str_expr, sf_format],
37092 ))))
37093 }
37094 _ => Ok(Expression::Function(Box::new(Function::new(
37095 "PARSE_DATE".to_string(),
37096 vec![format, str_expr],
37097 )))),
37098 }
37099 }
37100
37101 // PARSE_TIMESTAMP(format, str) -> target-specific
37102 "PARSE_TIMESTAMP" if args.len() >= 2 => {
37103 let format = args.remove(0);
37104 let str_expr = args.remove(0);
37105 let tz = if !args.is_empty() {
37106 Some(args.remove(0))
37107 } else {
37108 None
37109 };
37110 match target {
37111 DialectType::DuckDB => {
37112 let duck_format = Self::bq_format_to_duckdb(&format);
37113 let strptime = Expression::Function(Box::new(Function::new(
37114 "STRPTIME".to_string(),
37115 vec![str_expr, duck_format],
37116 )));
37117 Ok(strptime)
37118 }
37119 _ => {
37120 let mut result_args = vec![format, str_expr];
37121 if let Some(tz_arg) = tz {
37122 result_args.push(tz_arg);
37123 }
37124 Ok(Expression::Function(Box::new(Function::new(
37125 "PARSE_TIMESTAMP".to_string(),
37126 result_args,
37127 ))))
37128 }
37129 }
37130 }
37131
37132 // FORMAT_DATE(format, date) -> target-specific
37133 "FORMAT_DATE" if args.len() == 2 => {
37134 let format = args.remove(0);
37135 let date_expr = args.remove(0);
37136 match target {
37137 DialectType::DuckDB => {
37138 // STRFTIME(CAST(date AS DATE), format)
37139 let cast_date = Expression::Cast(Box::new(Cast {
37140 this: date_expr,
37141 to: DataType::Date,
37142 trailing_comments: vec![],
37143 double_colon_syntax: false,
37144 format: None,
37145 default: None,
37146 inferred_type: None,
37147 }));
37148 Ok(Expression::Function(Box::new(Function::new(
37149 "STRFTIME".to_string(),
37150 vec![cast_date, format],
37151 ))))
37152 }
37153 _ => Ok(Expression::Function(Box::new(Function::new(
37154 "FORMAT_DATE".to_string(),
37155 vec![format, date_expr],
37156 )))),
37157 }
37158 }
37159
37160 // FORMAT_DATETIME(format, datetime) -> target-specific
37161 "FORMAT_DATETIME" if args.len() == 2 => {
37162 let format = args.remove(0);
37163 let dt_expr = args.remove(0);
37164
37165 if matches!(target, DialectType::BigQuery) {
37166 // BQ->BQ: normalize %H:%M:%S to %T, %x to %D
37167 let norm_format = Self::bq_format_normalize_bq(&format);
37168 // Also strip DATETIME keyword from typed literals
37169 let norm_dt = match dt_expr {
37170 Expression::Literal(lit)
37171 if matches!(lit.as_ref(), Literal::Timestamp(_)) =>
37172 {
37173 let Literal::Timestamp(s) = lit.as_ref() else {
37174 unreachable!()
37175 };
37176 Expression::Cast(Box::new(Cast {
37177 this: Expression::Literal(Box::new(Literal::String(s.clone()))),
37178 to: DataType::Custom {
37179 name: "DATETIME".to_string(),
37180 },
37181 trailing_comments: vec![],
37182 double_colon_syntax: false,
37183 format: None,
37184 default: None,
37185 inferred_type: None,
37186 }))
37187 }
37188 other => other,
37189 };
37190 return Ok(Expression::Function(Box::new(Function::new(
37191 "FORMAT_DATETIME".to_string(),
37192 vec![norm_format, norm_dt],
37193 ))));
37194 }
37195
37196 match target {
37197 DialectType::DuckDB => {
37198 // STRFTIME(CAST(dt AS TIMESTAMP), duckdb_format)
37199 let cast_dt = Self::ensure_cast_timestamp(dt_expr);
37200 let duck_format = Self::bq_format_to_duckdb(&format);
37201 Ok(Expression::Function(Box::new(Function::new(
37202 "STRFTIME".to_string(),
37203 vec![cast_dt, duck_format],
37204 ))))
37205 }
37206 _ => Ok(Expression::Function(Box::new(Function::new(
37207 "FORMAT_DATETIME".to_string(),
37208 vec![format, dt_expr],
37209 )))),
37210 }
37211 }
37212
37213 // FORMAT_TIMESTAMP(format, ts) -> target-specific
37214 "FORMAT_TIMESTAMP" if args.len() == 2 => {
37215 let format = args.remove(0);
37216 let ts_expr = args.remove(0);
37217 match target {
37218 DialectType::DuckDB => {
37219 // STRFTIME(CAST(CAST(ts AS TIMESTAMPTZ) AS TIMESTAMP), format)
37220 let cast_tstz = Self::ensure_cast_timestamptz(ts_expr);
37221 let cast_ts = Expression::Cast(Box::new(Cast {
37222 this: cast_tstz,
37223 to: DataType::Timestamp {
37224 timezone: false,
37225 precision: None,
37226 },
37227 trailing_comments: vec![],
37228 double_colon_syntax: false,
37229 format: None,
37230 default: None,
37231 inferred_type: None,
37232 }));
37233 Ok(Expression::Function(Box::new(Function::new(
37234 "STRFTIME".to_string(),
37235 vec![cast_ts, format],
37236 ))))
37237 }
37238 DialectType::Snowflake => {
37239 // TO_CHAR(CAST(CAST(ts AS TIMESTAMPTZ) AS TIMESTAMP), snowflake_format)
37240 let cast_tstz = Self::ensure_cast_timestamptz(ts_expr);
37241 let cast_ts = Expression::Cast(Box::new(Cast {
37242 this: cast_tstz,
37243 to: DataType::Timestamp {
37244 timezone: false,
37245 precision: None,
37246 },
37247 trailing_comments: vec![],
37248 double_colon_syntax: false,
37249 format: None,
37250 default: None,
37251 inferred_type: None,
37252 }));
37253 let sf_format = Self::bq_format_to_snowflake(&format);
37254 Ok(Expression::Function(Box::new(Function::new(
37255 "TO_CHAR".to_string(),
37256 vec![cast_ts, sf_format],
37257 ))))
37258 }
37259 _ => Ok(Expression::Function(Box::new(Function::new(
37260 "FORMAT_TIMESTAMP".to_string(),
37261 vec![format, ts_expr],
37262 )))),
37263 }
37264 }
37265
37266 // UNIX_DATE(date) -> DATE_DIFF('DAY', '1970-01-01', date) for DuckDB
37267 "UNIX_DATE" if args.len() == 1 => {
37268 let date = args.remove(0);
37269 match target {
37270 DialectType::DuckDB => {
37271 let epoch = Expression::Cast(Box::new(Cast {
37272 this: Expression::Literal(Box::new(Literal::String(
37273 "1970-01-01".to_string(),
37274 ))),
37275 to: DataType::Date,
37276 trailing_comments: vec![],
37277 double_colon_syntax: false,
37278 format: None,
37279 default: None,
37280 inferred_type: None,
37281 }));
37282 // DATE_DIFF('DAY', epoch, date) but date might be DATE '...' literal
37283 // Need to convert DATE literal to CAST
37284 let norm_date = Self::date_literal_to_cast(date);
37285 Ok(Expression::Function(Box::new(Function::new(
37286 "DATE_DIFF".to_string(),
37287 vec![
37288 Expression::Literal(Box::new(Literal::String("DAY".to_string()))),
37289 epoch,
37290 norm_date,
37291 ],
37292 ))))
37293 }
37294 _ => Ok(Expression::Function(Box::new(Function::new(
37295 "UNIX_DATE".to_string(),
37296 vec![date],
37297 )))),
37298 }
37299 }
37300
37301 // UNIX_SECONDS(ts) -> target-specific
37302 "UNIX_SECONDS" if args.len() == 1 => {
37303 let ts = args.remove(0);
37304 match target {
37305 DialectType::DuckDB => {
37306 // CAST(EPOCH(CAST(ts AS TIMESTAMPTZ)) AS BIGINT)
37307 let norm_ts = Self::ts_literal_to_cast_tz(ts);
37308 let epoch = Expression::Function(Box::new(Function::new(
37309 "EPOCH".to_string(),
37310 vec![norm_ts],
37311 )));
37312 Ok(Expression::Cast(Box::new(Cast {
37313 this: epoch,
37314 to: DataType::BigInt { length: None },
37315 trailing_comments: vec![],
37316 double_colon_syntax: false,
37317 format: None,
37318 default: None,
37319 inferred_type: None,
37320 })))
37321 }
37322 DialectType::Snowflake => {
37323 // TIMESTAMPDIFF(SECONDS, CAST('1970-01-01 00:00:00+00' AS TIMESTAMPTZ), ts)
37324 let epoch = Expression::Cast(Box::new(Cast {
37325 this: Expression::Literal(Box::new(Literal::String(
37326 "1970-01-01 00:00:00+00".to_string(),
37327 ))),
37328 to: DataType::Timestamp {
37329 timezone: true,
37330 precision: None,
37331 },
37332 trailing_comments: vec![],
37333 double_colon_syntax: false,
37334 format: None,
37335 default: None,
37336 inferred_type: None,
37337 }));
37338 Ok(Expression::Function(Box::new(Function::new(
37339 "TIMESTAMPDIFF".to_string(),
37340 vec![
37341 Expression::Identifier(Identifier::new("SECONDS".to_string())),
37342 epoch,
37343 ts,
37344 ],
37345 ))))
37346 }
37347 _ => Ok(Expression::Function(Box::new(Function::new(
37348 "UNIX_SECONDS".to_string(),
37349 vec![ts],
37350 )))),
37351 }
37352 }
37353
37354 // UNIX_MILLIS(ts) -> target-specific
37355 "UNIX_MILLIS" if args.len() == 1 => {
37356 let ts = args.remove(0);
37357 match target {
37358 DialectType::DuckDB => {
37359 let norm_ts = Self::ts_literal_to_cast_tz(ts);
37360 Ok(Expression::Function(Box::new(Function::new(
37361 "EPOCH_MS".to_string(),
37362 vec![norm_ts],
37363 ))))
37364 }
37365 _ => Ok(Expression::Function(Box::new(Function::new(
37366 "UNIX_MILLIS".to_string(),
37367 vec![ts],
37368 )))),
37369 }
37370 }
37371
37372 // UNIX_MICROS(ts) -> target-specific
37373 "UNIX_MICROS" if args.len() == 1 => {
37374 let ts = args.remove(0);
37375 match target {
37376 DialectType::DuckDB => {
37377 let norm_ts = Self::ts_literal_to_cast_tz(ts);
37378 Ok(Expression::Function(Box::new(Function::new(
37379 "EPOCH_US".to_string(),
37380 vec![norm_ts],
37381 ))))
37382 }
37383 _ => Ok(Expression::Function(Box::new(Function::new(
37384 "UNIX_MICROS".to_string(),
37385 vec![ts],
37386 )))),
37387 }
37388 }
37389
37390 // INSTR(str, substr) -> target-specific
37391 "INSTR" => {
37392 if matches!(target, DialectType::BigQuery) {
37393 // BQ->BQ: keep as INSTR
37394 Ok(Expression::Function(Box::new(Function::new(
37395 "INSTR".to_string(),
37396 args,
37397 ))))
37398 } else if matches!(target, DialectType::Snowflake) && args.len() == 2 {
37399 // Snowflake: CHARINDEX(substr, str) - swap args
37400 let str_expr = args.remove(0);
37401 let substr = args.remove(0);
37402 Ok(Expression::Function(Box::new(Function::new(
37403 "CHARINDEX".to_string(),
37404 vec![substr, str_expr],
37405 ))))
37406 } else {
37407 // Keep as INSTR for other targets
37408 Ok(Expression::Function(Box::new(Function::new(
37409 "INSTR".to_string(),
37410 args,
37411 ))))
37412 }
37413 }
37414
37415 // CURRENT_TIMESTAMP / CURRENT_DATE handling - parens normalization and timezone
37416 "CURRENT_TIMESTAMP" | "CURRENT_DATE" | "CURRENT_DATETIME" | "CURRENT_TIME" => {
37417 if matches!(target, DialectType::BigQuery) {
37418 // BQ->BQ: always output with parens (function form), keep any timezone arg
37419 Ok(Expression::Function(Box::new(Function::new(name, args))))
37420 } else if name == "CURRENT_DATE" && args.len() == 1 {
37421 // CURRENT_DATE('UTC') - has timezone arg
37422 let tz_arg = args.remove(0);
37423 match target {
37424 DialectType::DuckDB => {
37425 // CAST(CURRENT_TIMESTAMP AT TIME ZONE 'UTC' AS DATE)
37426 let ct = Expression::CurrentTimestamp(
37427 crate::expressions::CurrentTimestamp {
37428 precision: None,
37429 sysdate: false,
37430 },
37431 );
37432 let at_tz =
37433 Expression::AtTimeZone(Box::new(crate::expressions::AtTimeZone {
37434 this: ct,
37435 zone: tz_arg,
37436 }));
37437 Ok(Expression::Cast(Box::new(Cast {
37438 this: at_tz,
37439 to: DataType::Date,
37440 trailing_comments: vec![],
37441 double_colon_syntax: false,
37442 format: None,
37443 default: None,
37444 inferred_type: None,
37445 })))
37446 }
37447 DialectType::Snowflake => {
37448 // CAST(CONVERT_TIMEZONE('UTC', CURRENT_TIMESTAMP()) AS DATE)
37449 let ct = Expression::Function(Box::new(Function::new(
37450 "CURRENT_TIMESTAMP".to_string(),
37451 vec![],
37452 )));
37453 let convert = Expression::Function(Box::new(Function::new(
37454 "CONVERT_TIMEZONE".to_string(),
37455 vec![tz_arg, ct],
37456 )));
37457 Ok(Expression::Cast(Box::new(Cast {
37458 this: convert,
37459 to: DataType::Date,
37460 trailing_comments: vec![],
37461 double_colon_syntax: false,
37462 format: None,
37463 default: None,
37464 inferred_type: None,
37465 })))
37466 }
37467 _ => {
37468 // PostgreSQL, MySQL, etc.: CURRENT_DATE AT TIME ZONE 'UTC'
37469 let cd = Expression::CurrentDate(crate::expressions::CurrentDate);
37470 Ok(Expression::AtTimeZone(Box::new(
37471 crate::expressions::AtTimeZone {
37472 this: cd,
37473 zone: tz_arg,
37474 },
37475 )))
37476 }
37477 }
37478 } else if (name == "CURRENT_TIMESTAMP"
37479 || name == "CURRENT_TIME"
37480 || name == "CURRENT_DATE")
37481 && args.is_empty()
37482 && matches!(
37483 target,
37484 DialectType::PostgreSQL
37485 | DialectType::DuckDB
37486 | DialectType::Presto
37487 | DialectType::Trino
37488 )
37489 {
37490 // These targets want no-parens CURRENT_TIMESTAMP / CURRENT_DATE / CURRENT_TIME
37491 if name == "CURRENT_TIMESTAMP" {
37492 Ok(Expression::CurrentTimestamp(
37493 crate::expressions::CurrentTimestamp {
37494 precision: None,
37495 sysdate: false,
37496 },
37497 ))
37498 } else if name == "CURRENT_DATE" {
37499 Ok(Expression::CurrentDate(crate::expressions::CurrentDate))
37500 } else {
37501 // CURRENT_TIME
37502 Ok(Expression::CurrentTime(crate::expressions::CurrentTime {
37503 precision: None,
37504 }))
37505 }
37506 } else {
37507 // All other targets: keep as function (with parens)
37508 Ok(Expression::Function(Box::new(Function::new(name, args))))
37509 }
37510 }
37511
37512 // JSON_QUERY(json, path) -> target-specific
37513 "JSON_QUERY" if args.len() == 2 => {
37514 match target {
37515 DialectType::DuckDB | DialectType::SQLite => {
37516 // json -> path syntax
37517 let json_expr = args.remove(0);
37518 let path = args.remove(0);
37519 Ok(Expression::JsonExtract(Box::new(
37520 crate::expressions::JsonExtractFunc {
37521 this: json_expr,
37522 path,
37523 returning: None,
37524 arrow_syntax: true,
37525 hash_arrow_syntax: false,
37526 wrapper_option: None,
37527 quotes_option: None,
37528 on_scalar_string: false,
37529 on_error: None,
37530 },
37531 )))
37532 }
37533 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
37534 Ok(Expression::Function(Box::new(Function::new(
37535 "GET_JSON_OBJECT".to_string(),
37536 args,
37537 ))))
37538 }
37539 DialectType::PostgreSQL | DialectType::Redshift => Ok(Expression::Function(
37540 Box::new(Function::new("JSON_EXTRACT_PATH".to_string(), args)),
37541 )),
37542 _ => Ok(Expression::Function(Box::new(Function::new(
37543 "JSON_QUERY".to_string(),
37544 args,
37545 )))),
37546 }
37547 }
37548
37549 // JSON_VALUE_ARRAY(json, path) -> target-specific
37550 "JSON_VALUE_ARRAY" if args.len() == 2 => {
37551 match target {
37552 DialectType::DuckDB => {
37553 // CAST(json -> path AS TEXT[])
37554 let json_expr = args.remove(0);
37555 let path = args.remove(0);
37556 let arrow = Expression::JsonExtract(Box::new(
37557 crate::expressions::JsonExtractFunc {
37558 this: json_expr,
37559 path,
37560 returning: None,
37561 arrow_syntax: true,
37562 hash_arrow_syntax: false,
37563 wrapper_option: None,
37564 quotes_option: None,
37565 on_scalar_string: false,
37566 on_error: None,
37567 },
37568 ));
37569 Ok(Expression::Cast(Box::new(Cast {
37570 this: arrow,
37571 to: DataType::Array {
37572 element_type: Box::new(DataType::Text),
37573 dimension: None,
37574 },
37575 trailing_comments: vec![],
37576 double_colon_syntax: false,
37577 format: None,
37578 default: None,
37579 inferred_type: None,
37580 })))
37581 }
37582 DialectType::Snowflake => {
37583 let json_expr = args.remove(0);
37584 let path_expr = args.remove(0);
37585 // Convert JSON path from $.path to just path
37586 let sf_path = if let Expression::Literal(ref lit) = path_expr {
37587 if let Literal::String(ref s) = lit.as_ref() {
37588 let trimmed = s.trim_start_matches('$').trim_start_matches('.');
37589 Expression::Literal(Box::new(Literal::String(trimmed.to_string())))
37590 } else {
37591 path_expr.clone()
37592 }
37593 } else {
37594 path_expr
37595 };
37596 let parse_json = Expression::Function(Box::new(Function::new(
37597 "PARSE_JSON".to_string(),
37598 vec![json_expr],
37599 )));
37600 let get_path = Expression::Function(Box::new(Function::new(
37601 "GET_PATH".to_string(),
37602 vec![parse_json, sf_path],
37603 )));
37604 // TRANSFORM(get_path, x -> CAST(x AS VARCHAR))
37605 let cast_expr = Expression::Cast(Box::new(Cast {
37606 this: Expression::Identifier(Identifier::new("x")),
37607 to: DataType::VarChar {
37608 length: None,
37609 parenthesized_length: false,
37610 },
37611 trailing_comments: vec![],
37612 double_colon_syntax: false,
37613 format: None,
37614 default: None,
37615 inferred_type: None,
37616 }));
37617 let lambda = Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
37618 parameters: vec![Identifier::new("x")],
37619 body: cast_expr,
37620 colon: false,
37621 parameter_types: vec![],
37622 }));
37623 Ok(Expression::Function(Box::new(Function::new(
37624 "TRANSFORM".to_string(),
37625 vec![get_path, lambda],
37626 ))))
37627 }
37628 _ => Ok(Expression::Function(Box::new(Function::new(
37629 "JSON_VALUE_ARRAY".to_string(),
37630 args,
37631 )))),
37632 }
37633 }
37634
37635 // BigQuery REGEXP_EXTRACT(val, regex[, position[, occurrence]]) -> target dialects
37636 // BigQuery's 3rd arg is "position" (starting char index), 4th is "occurrence" (which match to return)
37637 // This is different from Hive/Spark where 3rd arg is "group_index"
37638 "REGEXP_EXTRACT" if matches!(source, DialectType::BigQuery) => {
37639 match target {
37640 DialectType::DuckDB
37641 | DialectType::Presto
37642 | DialectType::Trino
37643 | DialectType::Athena => {
37644 if args.len() == 2 {
37645 // REGEXP_EXTRACT(val, regex) -> REGEXP_EXTRACT(val, regex, 1)
37646 args.push(Expression::number(1));
37647 Ok(Expression::Function(Box::new(Function::new(
37648 "REGEXP_EXTRACT".to_string(),
37649 args,
37650 ))))
37651 } else if args.len() == 3 {
37652 let val = args.remove(0);
37653 let regex = args.remove(0);
37654 let position = args.remove(0);
37655 let is_pos_1 = matches!(&position, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(n) if n == "1"));
37656 if is_pos_1 {
37657 Ok(Expression::Function(Box::new(Function::new(
37658 "REGEXP_EXTRACT".to_string(),
37659 vec![val, regex, Expression::number(1)],
37660 ))))
37661 } else {
37662 let substring_expr = Expression::Function(Box::new(Function::new(
37663 "SUBSTRING".to_string(),
37664 vec![val, position],
37665 )));
37666 let nullif_expr = Expression::Function(Box::new(Function::new(
37667 "NULLIF".to_string(),
37668 vec![
37669 substring_expr,
37670 Expression::Literal(Box::new(Literal::String(
37671 String::new(),
37672 ))),
37673 ],
37674 )));
37675 Ok(Expression::Function(Box::new(Function::new(
37676 "REGEXP_EXTRACT".to_string(),
37677 vec![nullif_expr, regex, Expression::number(1)],
37678 ))))
37679 }
37680 } else if args.len() == 4 {
37681 let val = args.remove(0);
37682 let regex = args.remove(0);
37683 let position = args.remove(0);
37684 let occurrence = args.remove(0);
37685 let is_pos_1 = matches!(&position, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(n) if n == "1"));
37686 let is_occ_1 = matches!(&occurrence, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(n) if n == "1"));
37687 if is_pos_1 && is_occ_1 {
37688 Ok(Expression::Function(Box::new(Function::new(
37689 "REGEXP_EXTRACT".to_string(),
37690 vec![val, regex, Expression::number(1)],
37691 ))))
37692 } else {
37693 let subject = if is_pos_1 {
37694 val
37695 } else {
37696 let substring_expr = Expression::Function(Box::new(
37697 Function::new("SUBSTRING".to_string(), vec![val, position]),
37698 ));
37699 Expression::Function(Box::new(Function::new(
37700 "NULLIF".to_string(),
37701 vec![
37702 substring_expr,
37703 Expression::Literal(Box::new(Literal::String(
37704 String::new(),
37705 ))),
37706 ],
37707 )))
37708 };
37709 let extract_all = Expression::Function(Box::new(Function::new(
37710 "REGEXP_EXTRACT_ALL".to_string(),
37711 vec![subject, regex, Expression::number(1)],
37712 )));
37713 Ok(Expression::Function(Box::new(Function::new(
37714 "ARRAY_EXTRACT".to_string(),
37715 vec![extract_all, occurrence],
37716 ))))
37717 }
37718 } else {
37719 Ok(Expression::Function(Box::new(Function {
37720 name: f.name,
37721 args,
37722 distinct: f.distinct,
37723 trailing_comments: f.trailing_comments,
37724 use_bracket_syntax: f.use_bracket_syntax,
37725 no_parens: f.no_parens,
37726 quoted: f.quoted,
37727 span: None,
37728 inferred_type: None,
37729 })))
37730 }
37731 }
37732 DialectType::Snowflake => {
37733 // BigQuery REGEXP_EXTRACT -> Snowflake REGEXP_SUBSTR
37734 Ok(Expression::Function(Box::new(Function::new(
37735 "REGEXP_SUBSTR".to_string(),
37736 args,
37737 ))))
37738 }
37739 _ => {
37740 // For other targets (Hive/Spark/BigQuery): pass through as-is
37741 // BigQuery's default group behavior matches Hive/Spark for 2-arg case
37742 Ok(Expression::Function(Box::new(Function {
37743 name: f.name,
37744 args,
37745 distinct: f.distinct,
37746 trailing_comments: f.trailing_comments,
37747 use_bracket_syntax: f.use_bracket_syntax,
37748 no_parens: f.no_parens,
37749 quoted: f.quoted,
37750 span: None,
37751 inferred_type: None,
37752 })))
37753 }
37754 }
37755 }
37756
37757 // BigQuery STRUCT(args) -> target-specific struct expression
37758 "STRUCT" => {
37759 // Convert Function args to Struct fields
37760 let mut fields: Vec<(Option<String>, Expression)> = Vec::new();
37761 for (i, arg) in args.into_iter().enumerate() {
37762 match arg {
37763 Expression::Alias(a) => {
37764 // Named field: expr AS name
37765 fields.push((Some(a.alias.name.clone()), a.this));
37766 }
37767 other => {
37768 // Unnamed field: for Spark/Hive, keep as None
37769 // For Snowflake, auto-name as _N
37770 // For DuckDB, use column name for column refs, _N for others
37771 if matches!(target, DialectType::Snowflake) {
37772 fields.push((Some(format!("_{}", i)), other));
37773 } else if matches!(target, DialectType::DuckDB) {
37774 let auto_name = match &other {
37775 Expression::Column(col) => col.name.name.clone(),
37776 _ => format!("_{}", i),
37777 };
37778 fields.push((Some(auto_name), other));
37779 } else {
37780 fields.push((None, other));
37781 }
37782 }
37783 }
37784 }
37785
37786 match target {
37787 DialectType::Snowflake => {
37788 // OBJECT_CONSTRUCT('name', value, ...)
37789 let mut oc_args = Vec::new();
37790 for (name, val) in &fields {
37791 if let Some(n) = name {
37792 oc_args.push(Expression::Literal(Box::new(Literal::String(
37793 n.clone(),
37794 ))));
37795 oc_args.push(val.clone());
37796 } else {
37797 oc_args.push(val.clone());
37798 }
37799 }
37800 Ok(Expression::Function(Box::new(Function::new(
37801 "OBJECT_CONSTRUCT".to_string(),
37802 oc_args,
37803 ))))
37804 }
37805 DialectType::DuckDB => {
37806 // {'name': value, ...}
37807 Ok(Expression::Struct(Box::new(crate::expressions::Struct {
37808 fields,
37809 })))
37810 }
37811 DialectType::Hive => {
37812 // STRUCT(val1, val2, ...) - strip aliases
37813 let hive_fields: Vec<(Option<String>, Expression)> =
37814 fields.into_iter().map(|(_, v)| (None, v)).collect();
37815 Ok(Expression::Struct(Box::new(crate::expressions::Struct {
37816 fields: hive_fields,
37817 })))
37818 }
37819 DialectType::Spark | DialectType::Databricks => {
37820 // Use Expression::Struct to bypass Spark target transform auto-naming
37821 Ok(Expression::Struct(Box::new(crate::expressions::Struct {
37822 fields,
37823 })))
37824 }
37825 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
37826 // Check if all fields are named AND all have inferable types - if so, wrap in CAST(ROW(...) AS ROW(name TYPE, ...))
37827 let all_named =
37828 !fields.is_empty() && fields.iter().all(|(name, _)| name.is_some());
37829 let all_types_inferable = all_named
37830 && fields
37831 .iter()
37832 .all(|(_, val)| Self::can_infer_presto_type(val));
37833 let row_args: Vec<Expression> =
37834 fields.iter().map(|(_, v)| v.clone()).collect();
37835 let row_expr = Expression::Function(Box::new(Function::new(
37836 "ROW".to_string(),
37837 row_args,
37838 )));
37839 if all_named && all_types_inferable {
37840 // Build ROW type with inferred types
37841 let mut row_type_fields = Vec::new();
37842 for (name, val) in &fields {
37843 if let Some(n) = name {
37844 let type_str = Self::infer_sql_type_for_presto(val);
37845 row_type_fields.push(crate::expressions::StructField::new(
37846 n.clone(),
37847 crate::expressions::DataType::Custom { name: type_str },
37848 ));
37849 }
37850 }
37851 let row_type = crate::expressions::DataType::Struct {
37852 fields: row_type_fields,
37853 nested: true,
37854 };
37855 Ok(Expression::Cast(Box::new(Cast {
37856 this: row_expr,
37857 to: row_type,
37858 trailing_comments: Vec::new(),
37859 double_colon_syntax: false,
37860 format: None,
37861 default: None,
37862 inferred_type: None,
37863 })))
37864 } else {
37865 Ok(row_expr)
37866 }
37867 }
37868 _ => {
37869 // Default: keep as STRUCT function with original args
37870 let mut new_args = Vec::new();
37871 for (name, val) in fields {
37872 if let Some(n) = name {
37873 new_args.push(Expression::Alias(Box::new(
37874 crate::expressions::Alias::new(val, Identifier::new(n)),
37875 )));
37876 } else {
37877 new_args.push(val);
37878 }
37879 }
37880 Ok(Expression::Function(Box::new(Function::new(
37881 "STRUCT".to_string(),
37882 new_args,
37883 ))))
37884 }
37885 }
37886 }
37887
37888 // ROUND(x, n, 'ROUND_HALF_EVEN') -> ROUND_EVEN(x, n) for DuckDB
37889 "ROUND" if args.len() == 3 => {
37890 let x = args.remove(0);
37891 let n = args.remove(0);
37892 let mode = args.remove(0);
37893 // Check if mode is 'ROUND_HALF_EVEN'
37894 let is_half_even = matches!(&mode, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(s) if s.eq_ignore_ascii_case("ROUND_HALF_EVEN")));
37895 if is_half_even && matches!(target, DialectType::DuckDB) {
37896 Ok(Expression::Function(Box::new(Function::new(
37897 "ROUND_EVEN".to_string(),
37898 vec![x, n],
37899 ))))
37900 } else {
37901 // Pass through with all args
37902 Ok(Expression::Function(Box::new(Function::new(
37903 "ROUND".to_string(),
37904 vec![x, n, mode],
37905 ))))
37906 }
37907 }
37908
37909 // MAKE_INTERVAL(year, month, named_args...) -> INTERVAL string for Snowflake/DuckDB
37910 "MAKE_INTERVAL" => {
37911 // MAKE_INTERVAL(1, 2, minute => 5, day => 3)
37912 // The positional args are: year, month
37913 // Named args are: day =>, minute =>, etc.
37914 // For Snowflake: INTERVAL '1 year, 2 month, 5 minute, 3 day'
37915 // For DuckDB: INTERVAL '1 year 2 month 5 minute 3 day'
37916 // For BigQuery->BigQuery: reorder named args (day before minute)
37917 if matches!(target, DialectType::Snowflake | DialectType::DuckDB) {
37918 let mut parts: Vec<(String, String)> = Vec::new();
37919 let mut pos_idx = 0;
37920 let pos_units = ["year", "month"];
37921 for arg in &args {
37922 if let Expression::NamedArgument(na) = arg {
37923 // Named arg like minute => 5
37924 let unit = na.name.name.clone();
37925 if let Expression::Literal(lit) = &na.value {
37926 if let Literal::Number(n) = lit.as_ref() {
37927 parts.push((unit, n.clone()));
37928 }
37929 }
37930 } else if pos_idx < pos_units.len() {
37931 if let Expression::Literal(lit) = arg {
37932 if let Literal::Number(n) = lit.as_ref() {
37933 parts.push((pos_units[pos_idx].to_string(), n.clone()));
37934 }
37935 }
37936 pos_idx += 1;
37937 }
37938 }
37939 // Don't sort - preserve original argument order
37940 let separator = if matches!(target, DialectType::Snowflake) {
37941 ", "
37942 } else {
37943 " "
37944 };
37945 let interval_str = parts
37946 .iter()
37947 .map(|(u, v)| format!("{} {}", v, u))
37948 .collect::<Vec<_>>()
37949 .join(separator);
37950 Ok(Expression::Interval(Box::new(
37951 crate::expressions::Interval {
37952 this: Some(Expression::Literal(Box::new(Literal::String(
37953 interval_str,
37954 )))),
37955 unit: None,
37956 },
37957 )))
37958 } else if matches!(target, DialectType::BigQuery) {
37959 // BigQuery->BigQuery: reorder named args (day, minute, etc.)
37960 let mut positional = Vec::new();
37961 let mut named: Vec<(
37962 String,
37963 Expression,
37964 crate::expressions::NamedArgSeparator,
37965 )> = Vec::new();
37966 let _pos_units = ["year", "month"];
37967 let mut _pos_idx = 0;
37968 for arg in args {
37969 if let Expression::NamedArgument(na) = arg {
37970 named.push((na.name.name.clone(), na.value, na.separator));
37971 } else {
37972 positional.push(arg);
37973 _pos_idx += 1;
37974 }
37975 }
37976 // Sort named args by: day, hour, minute, second
37977 let unit_order = |u: &str| -> usize {
37978 match u.to_ascii_lowercase().as_str() {
37979 "day" => 0,
37980 "hour" => 1,
37981 "minute" => 2,
37982 "second" => 3,
37983 _ => 4,
37984 }
37985 };
37986 named.sort_by_key(|(u, _, _)| unit_order(u));
37987 let mut result_args = positional;
37988 for (name, value, sep) in named {
37989 result_args.push(Expression::NamedArgument(Box::new(
37990 crate::expressions::NamedArgument {
37991 name: Identifier::new(&name),
37992 value,
37993 separator: sep,
37994 },
37995 )));
37996 }
37997 Ok(Expression::Function(Box::new(Function::new(
37998 "MAKE_INTERVAL".to_string(),
37999 result_args,
38000 ))))
38001 } else {
38002 Ok(Expression::Function(Box::new(Function::new(
38003 "MAKE_INTERVAL".to_string(),
38004 args,
38005 ))))
38006 }
38007 }
38008
38009 // ARRAY_TO_STRING(array, sep, null_text) -> ARRAY_TO_STRING(LIST_TRANSFORM(array, x -> COALESCE(x, null_text)), sep) for DuckDB
38010 "ARRAY_TO_STRING" if args.len() == 3 => {
38011 let arr = args.remove(0);
38012 let sep = args.remove(0);
38013 let null_text = args.remove(0);
38014 match target {
38015 DialectType::DuckDB => {
38016 // LIST_TRANSFORM(array, x -> COALESCE(x, null_text))
38017 let _lambda_param =
38018 Expression::Identifier(crate::expressions::Identifier::new("x"));
38019 let coalesce =
38020 Expression::Coalesce(Box::new(crate::expressions::VarArgFunc {
38021 original_name: None,
38022 expressions: vec![
38023 Expression::Identifier(crate::expressions::Identifier::new(
38024 "x",
38025 )),
38026 null_text,
38027 ],
38028 inferred_type: None,
38029 }));
38030 let lambda = Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
38031 parameters: vec![crate::expressions::Identifier::new("x")],
38032 body: coalesce,
38033 colon: false,
38034 parameter_types: vec![],
38035 }));
38036 let list_transform = Expression::Function(Box::new(Function::new(
38037 "LIST_TRANSFORM".to_string(),
38038 vec![arr, lambda],
38039 )));
38040 Ok(Expression::Function(Box::new(Function::new(
38041 "ARRAY_TO_STRING".to_string(),
38042 vec![list_transform, sep],
38043 ))))
38044 }
38045 _ => Ok(Expression::Function(Box::new(Function::new(
38046 "ARRAY_TO_STRING".to_string(),
38047 vec![arr, sep, null_text],
38048 )))),
38049 }
38050 }
38051
38052 // LENGTH(x) -> CASE TYPEOF(x) ... for DuckDB
38053 "LENGTH" if args.len() == 1 => {
38054 let arg = args.remove(0);
38055 match target {
38056 DialectType::DuckDB => {
38057 // CASE TYPEOF(foo) WHEN 'BLOB' THEN OCTET_LENGTH(CAST(foo AS BLOB)) ELSE LENGTH(CAST(foo AS TEXT)) END
38058 let typeof_func = Expression::Function(Box::new(Function::new(
38059 "TYPEOF".to_string(),
38060 vec![arg.clone()],
38061 )));
38062 let blob_cast = Expression::Cast(Box::new(Cast {
38063 this: arg.clone(),
38064 to: DataType::VarBinary { length: None },
38065 trailing_comments: vec![],
38066 double_colon_syntax: false,
38067 format: None,
38068 default: None,
38069 inferred_type: None,
38070 }));
38071 let octet_length = Expression::Function(Box::new(Function::new(
38072 "OCTET_LENGTH".to_string(),
38073 vec![blob_cast],
38074 )));
38075 let text_cast = Expression::Cast(Box::new(Cast {
38076 this: arg,
38077 to: DataType::Text,
38078 trailing_comments: vec![],
38079 double_colon_syntax: false,
38080 format: None,
38081 default: None,
38082 inferred_type: None,
38083 }));
38084 let length_text = Expression::Function(Box::new(Function::new(
38085 "LENGTH".to_string(),
38086 vec![text_cast],
38087 )));
38088 Ok(Expression::Case(Box::new(crate::expressions::Case {
38089 operand: Some(typeof_func),
38090 whens: vec![(
38091 Expression::Literal(Box::new(Literal::String("BLOB".to_string()))),
38092 octet_length,
38093 )],
38094 else_: Some(length_text),
38095 comments: Vec::new(),
38096 inferred_type: None,
38097 })))
38098 }
38099 _ => Ok(Expression::Function(Box::new(Function::new(
38100 "LENGTH".to_string(),
38101 vec![arg],
38102 )))),
38103 }
38104 }
38105
38106 // PERCENTILE_CONT(x, fraction RESPECT NULLS) -> QUANTILE_CONT(x, fraction) for DuckDB
38107 "PERCENTILE_CONT" if args.len() >= 2 && matches!(source, DialectType::BigQuery) => {
38108 // BigQuery PERCENTILE_CONT(x, fraction [RESPECT|IGNORE NULLS]) OVER ()
38109 // The args should be [x, fraction] with the null handling stripped
38110 // For DuckDB: QUANTILE_CONT(x, fraction)
38111 // For Spark: PERCENTILE_CONT(x, fraction) RESPECT NULLS (handled at window level)
38112 match target {
38113 DialectType::DuckDB => {
38114 // Strip down to just 2 args, rename to QUANTILE_CONT
38115 let x = args[0].clone();
38116 let frac = args[1].clone();
38117 Ok(Expression::Function(Box::new(Function::new(
38118 "QUANTILE_CONT".to_string(),
38119 vec![x, frac],
38120 ))))
38121 }
38122 _ => Ok(Expression::Function(Box::new(Function::new(
38123 "PERCENTILE_CONT".to_string(),
38124 args,
38125 )))),
38126 }
38127 }
38128
38129 // All others: pass through
38130 _ => Ok(Expression::Function(Box::new(Function {
38131 name: f.name,
38132 args,
38133 distinct: f.distinct,
38134 trailing_comments: f.trailing_comments,
38135 use_bracket_syntax: f.use_bracket_syntax,
38136 no_parens: f.no_parens,
38137 quoted: f.quoted,
38138 span: None,
38139 inferred_type: None,
38140 }))),
38141 }
38142 }
38143
38144 /// Check if we can reliably infer the SQL type for Presto/Trino ROW CAST.
38145 /// Returns false for column references and other non-literal expressions where the type is unknown.
38146 fn can_infer_presto_type(expr: &Expression) -> bool {
38147 match expr {
38148 Expression::Literal(_) => true,
38149 Expression::Boolean(_) => true,
38150 Expression::Array(_) | Expression::ArrayFunc(_) => true,
38151 Expression::Struct(_) | Expression::StructFunc(_) => true,
38152 Expression::Function(f) => {
38153 f.name.eq_ignore_ascii_case("STRUCT")
38154 || f.name.eq_ignore_ascii_case("ROW")
38155 || f.name.eq_ignore_ascii_case("CURRENT_DATE")
38156 || f.name.eq_ignore_ascii_case("CURRENT_TIMESTAMP")
38157 || f.name.eq_ignore_ascii_case("NOW")
38158 }
38159 Expression::Cast(_) => true,
38160 Expression::Neg(inner) => Self::can_infer_presto_type(&inner.this),
38161 _ => false,
38162 }
38163 }
38164
38165 /// Infer SQL type name for a Presto/Trino ROW CAST from a literal expression
38166 fn infer_sql_type_for_presto(expr: &Expression) -> String {
38167 use crate::expressions::Literal;
38168 match expr {
38169 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)) => {
38170 "VARCHAR".to_string()
38171 }
38172 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(_)) => {
38173 let Literal::Number(n) = lit.as_ref() else {
38174 unreachable!()
38175 };
38176 if n.contains('.') {
38177 "DOUBLE".to_string()
38178 } else {
38179 "INTEGER".to_string()
38180 }
38181 }
38182 Expression::Boolean(_) => "BOOLEAN".to_string(),
38183 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Date(_)) => {
38184 "DATE".to_string()
38185 }
38186 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Timestamp(_)) => {
38187 "TIMESTAMP".to_string()
38188 }
38189 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Datetime(_)) => {
38190 "TIMESTAMP".to_string()
38191 }
38192 Expression::Array(_) | Expression::ArrayFunc(_) => "ARRAY(VARCHAR)".to_string(),
38193 Expression::Struct(_) | Expression::StructFunc(_) => "ROW".to_string(),
38194 Expression::Function(f) => {
38195 if f.name.eq_ignore_ascii_case("STRUCT") || f.name.eq_ignore_ascii_case("ROW") {
38196 "ROW".to_string()
38197 } else if f.name.eq_ignore_ascii_case("CURRENT_DATE") {
38198 "DATE".to_string()
38199 } else if f.name.eq_ignore_ascii_case("CURRENT_TIMESTAMP")
38200 || f.name.eq_ignore_ascii_case("NOW")
38201 {
38202 "TIMESTAMP".to_string()
38203 } else {
38204 "VARCHAR".to_string()
38205 }
38206 }
38207 Expression::Cast(c) => {
38208 // If already cast, use the target type
38209 Self::data_type_to_presto_string(&c.to)
38210 }
38211 _ => "VARCHAR".to_string(),
38212 }
38213 }
38214
38215 /// Convert a DataType to its Presto/Trino string representation for ROW type
38216 fn data_type_to_presto_string(dt: &crate::expressions::DataType) -> String {
38217 use crate::expressions::DataType;
38218 match dt {
38219 DataType::VarChar { .. } | DataType::Text | DataType::String { .. } => {
38220 "VARCHAR".to_string()
38221 }
38222 DataType::Int { .. }
38223 | DataType::BigInt { .. }
38224 | DataType::SmallInt { .. }
38225 | DataType::TinyInt { .. } => "INTEGER".to_string(),
38226 DataType::Float { .. } | DataType::Double { .. } => "DOUBLE".to_string(),
38227 DataType::Boolean => "BOOLEAN".to_string(),
38228 DataType::Date => "DATE".to_string(),
38229 DataType::Timestamp { .. } => "TIMESTAMP".to_string(),
38230 DataType::Struct { fields, .. } => {
38231 let field_strs: Vec<String> = fields
38232 .iter()
38233 .map(|f| {
38234 format!(
38235 "{} {}",
38236 f.name,
38237 Self::data_type_to_presto_string(&f.data_type)
38238 )
38239 })
38240 .collect();
38241 format!("ROW({})", field_strs.join(", "))
38242 }
38243 DataType::Array { element_type, .. } => {
38244 format!("ARRAY({})", Self::data_type_to_presto_string(element_type))
38245 }
38246 DataType::Custom { name } => {
38247 // Pass through custom type names (e.g., "INTEGER", "VARCHAR" from earlier inference)
38248 name.clone()
38249 }
38250 _ => "VARCHAR".to_string(),
38251 }
38252 }
38253
38254 /// Convert IntervalUnit to string
38255 fn interval_unit_to_string(unit: &crate::expressions::IntervalUnit) -> &'static str {
38256 match unit {
38257 crate::expressions::IntervalUnit::Year => "YEAR",
38258 crate::expressions::IntervalUnit::Quarter => "QUARTER",
38259 crate::expressions::IntervalUnit::Month => "MONTH",
38260 crate::expressions::IntervalUnit::Week => "WEEK",
38261 crate::expressions::IntervalUnit::Day => "DAY",
38262 crate::expressions::IntervalUnit::Hour => "HOUR",
38263 crate::expressions::IntervalUnit::Minute => "MINUTE",
38264 crate::expressions::IntervalUnit::Second => "SECOND",
38265 crate::expressions::IntervalUnit::Millisecond => "MILLISECOND",
38266 crate::expressions::IntervalUnit::Microsecond => "MICROSECOND",
38267 crate::expressions::IntervalUnit::Nanosecond => "NANOSECOND",
38268 }
38269 }
38270
38271 /// Extract unit string from an expression (uppercased)
38272 fn get_unit_str_static(expr: &Expression) -> String {
38273 use crate::expressions::Literal;
38274 match expr {
38275 Expression::Identifier(id) => id.name.to_ascii_uppercase(),
38276 Expression::Var(v) => v.this.to_ascii_uppercase(),
38277 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)) => {
38278 let Literal::String(s) = lit.as_ref() else {
38279 unreachable!()
38280 };
38281 s.to_ascii_uppercase()
38282 }
38283 Expression::Column(col) => col.name.name.to_ascii_uppercase(),
38284 Expression::Function(f) => {
38285 let base = f.name.to_ascii_uppercase();
38286 if !f.args.is_empty() {
38287 let inner = Self::get_unit_str_static(&f.args[0]);
38288 format!("{}({})", base, inner)
38289 } else {
38290 base
38291 }
38292 }
38293 _ => "DAY".to_string(),
38294 }
38295 }
38296
38297 /// Parse unit string to IntervalUnit
38298 fn parse_interval_unit_static(s: &str) -> crate::expressions::IntervalUnit {
38299 match s {
38300 "YEAR" | "YY" | "YYYY" => crate::expressions::IntervalUnit::Year,
38301 "QUARTER" | "QQ" | "Q" => crate::expressions::IntervalUnit::Quarter,
38302 "MONTH" | "MONTHS" | "MON" | "MONS" | "MM" | "M" => {
38303 crate::expressions::IntervalUnit::Month
38304 }
38305 "WEEK" | "WK" | "WW" | "ISOWEEK" => crate::expressions::IntervalUnit::Week,
38306 "DAY" | "DD" | "D" | "DY" => crate::expressions::IntervalUnit::Day,
38307 "HOUR" | "HH" => crate::expressions::IntervalUnit::Hour,
38308 "MINUTE" | "MI" | "N" => crate::expressions::IntervalUnit::Minute,
38309 "SECOND" | "SS" | "S" => crate::expressions::IntervalUnit::Second,
38310 "MILLISECOND" | "MS" => crate::expressions::IntervalUnit::Millisecond,
38311 "MICROSECOND" | "MCS" | "US" => crate::expressions::IntervalUnit::Microsecond,
38312 _ if s.starts_with("WEEK(") => crate::expressions::IntervalUnit::Week,
38313 _ => crate::expressions::IntervalUnit::Day,
38314 }
38315 }
38316
38317 /// Convert expression to simple string for interval building
38318 fn expr_to_string_static(expr: &Expression) -> String {
38319 use crate::expressions::Literal;
38320 match expr {
38321 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(_)) => {
38322 let Literal::Number(s) = lit.as_ref() else {
38323 unreachable!()
38324 };
38325 s.clone()
38326 }
38327 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)) => {
38328 let Literal::String(s) = lit.as_ref() else {
38329 unreachable!()
38330 };
38331 s.clone()
38332 }
38333 Expression::Identifier(id) => id.name.clone(),
38334 Expression::Neg(f) => format!("-{}", Self::expr_to_string_static(&f.this)),
38335 _ => "1".to_string(),
38336 }
38337 }
38338
38339 /// Extract a simple string representation from a literal expression
38340 fn expr_to_string(expr: &Expression) -> String {
38341 use crate::expressions::Literal;
38342 match expr {
38343 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(_)) => {
38344 let Literal::Number(s) = lit.as_ref() else {
38345 unreachable!()
38346 };
38347 s.clone()
38348 }
38349 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)) => {
38350 let Literal::String(s) = lit.as_ref() else {
38351 unreachable!()
38352 };
38353 s.clone()
38354 }
38355 Expression::Neg(f) => format!("-{}", Self::expr_to_string(&f.this)),
38356 Expression::Identifier(id) => id.name.clone(),
38357 _ => "1".to_string(),
38358 }
38359 }
38360
38361 /// Quote an interval value expression as a string literal if it's a number (or negated number)
38362 fn quote_interval_val(expr: &Expression) -> Expression {
38363 use crate::expressions::Literal;
38364 match expr {
38365 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(_)) => {
38366 let Literal::Number(n) = lit.as_ref() else {
38367 unreachable!()
38368 };
38369 Expression::Literal(Box::new(Literal::String(n.clone())))
38370 }
38371 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)) => expr.clone(),
38372 Expression::Neg(inner) => {
38373 if let Expression::Literal(lit) = &inner.this {
38374 if let Literal::Number(n) = lit.as_ref() {
38375 Expression::Literal(Box::new(Literal::String(format!("-{}", n))))
38376 } else {
38377 inner.this.clone()
38378 }
38379 } else {
38380 expr.clone()
38381 }
38382 }
38383 _ => expr.clone(),
38384 }
38385 }
38386
38387 /// Check if a timestamp string contains timezone info (offset like +02:00, or named timezone)
38388 fn timestamp_string_has_timezone(ts: &str) -> bool {
38389 let trimmed = ts.trim();
38390 // Check for numeric timezone offsets: +N, -N, +NN:NN, -NN:NN at end
38391 if let Some(last_space) = trimmed.rfind(' ') {
38392 let suffix = &trimmed[last_space + 1..];
38393 if (suffix.starts_with('+') || suffix.starts_with('-')) && suffix.len() > 1 {
38394 let rest = &suffix[1..];
38395 if rest.chars().all(|c| c.is_ascii_digit() || c == ':') {
38396 return true;
38397 }
38398 }
38399 }
38400 // Check for named timezone abbreviations
38401 let ts_lower = trimmed.to_ascii_lowercase();
38402 let tz_abbrevs = [" utc", " gmt", " cet", " est", " pst", " cst", " mst"];
38403 for abbrev in &tz_abbrevs {
38404 if ts_lower.ends_with(abbrev) {
38405 return true;
38406 }
38407 }
38408 false
38409 }
38410
38411 /// Maybe CAST timestamp literal to TIMESTAMPTZ for Snowflake
38412 fn maybe_cast_ts_to_tz(expr: Expression, func_name: &str) -> Expression {
38413 use crate::expressions::{Cast, DataType, Literal};
38414 match expr {
38415 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Timestamp(_)) => {
38416 let Literal::Timestamp(s) = lit.as_ref() else {
38417 unreachable!()
38418 };
38419 let tz = func_name.starts_with("TIMESTAMP");
38420 Expression::Cast(Box::new(Cast {
38421 this: Expression::Literal(Box::new(Literal::String(s.clone()))),
38422 to: if tz {
38423 DataType::Timestamp {
38424 timezone: true,
38425 precision: None,
38426 }
38427 } else {
38428 DataType::Timestamp {
38429 timezone: false,
38430 precision: None,
38431 }
38432 },
38433 trailing_comments: vec![],
38434 double_colon_syntax: false,
38435 format: None,
38436 default: None,
38437 inferred_type: None,
38438 }))
38439 }
38440 other => other,
38441 }
38442 }
38443
38444 /// Maybe CAST timestamp literal to TIMESTAMP (no tz)
38445 fn maybe_cast_ts(expr: Expression) -> Expression {
38446 use crate::expressions::{Cast, DataType, Literal};
38447 match expr {
38448 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Timestamp(_)) => {
38449 let Literal::Timestamp(s) = lit.as_ref() else {
38450 unreachable!()
38451 };
38452 Expression::Cast(Box::new(Cast {
38453 this: Expression::Literal(Box::new(Literal::String(s.clone()))),
38454 to: DataType::Timestamp {
38455 timezone: false,
38456 precision: None,
38457 },
38458 trailing_comments: vec![],
38459 double_colon_syntax: false,
38460 format: None,
38461 default: None,
38462 inferred_type: None,
38463 }))
38464 }
38465 other => other,
38466 }
38467 }
38468
38469 /// Convert DATE 'x' literal to CAST('x' AS DATE)
38470 fn date_literal_to_cast(expr: Expression) -> Expression {
38471 use crate::expressions::{Cast, DataType, Literal};
38472 match expr {
38473 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Date(_)) => {
38474 let Literal::Date(s) = lit.as_ref() else {
38475 unreachable!()
38476 };
38477 Expression::Cast(Box::new(Cast {
38478 this: Expression::Literal(Box::new(Literal::String(s.clone()))),
38479 to: DataType::Date,
38480 trailing_comments: vec![],
38481 double_colon_syntax: false,
38482 format: None,
38483 default: None,
38484 inferred_type: None,
38485 }))
38486 }
38487 other => other,
38488 }
38489 }
38490
38491 /// Ensure an expression that should be a date is CAST(... AS DATE).
38492 /// Handles both DATE literals and string literals that look like dates.
38493 fn ensure_cast_date(expr: Expression) -> Expression {
38494 use crate::expressions::{Cast, DataType, Literal};
38495 match expr {
38496 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Date(_)) => {
38497 let Literal::Date(s) = lit.as_ref() else {
38498 unreachable!()
38499 };
38500 Expression::Cast(Box::new(Cast {
38501 this: Expression::Literal(Box::new(Literal::String(s.clone()))),
38502 to: DataType::Date,
38503 trailing_comments: vec![],
38504 double_colon_syntax: false,
38505 format: None,
38506 default: None,
38507 inferred_type: None,
38508 }))
38509 }
38510 Expression::Literal(ref lit) if matches!(lit.as_ref(), Literal::String(ref _s)) => {
38511 // String literal that should be a date -> CAST('s' AS DATE)
38512 Expression::Cast(Box::new(Cast {
38513 this: expr,
38514 to: DataType::Date,
38515 trailing_comments: vec![],
38516 double_colon_syntax: false,
38517 format: None,
38518 default: None,
38519 inferred_type: None,
38520 }))
38521 }
38522 // Already a CAST or other expression -> leave as-is
38523 other => other,
38524 }
38525 }
38526
38527 /// Force CAST(expr AS DATE) for any expression (not just literals)
38528 /// Skips if the expression is already a CAST to DATE
38529 fn force_cast_date(expr: Expression) -> Expression {
38530 use crate::expressions::{Cast, DataType};
38531 // If it's already a CAST to DATE, don't double-wrap
38532 if let Expression::Cast(ref c) = expr {
38533 if matches!(c.to, DataType::Date) {
38534 return expr;
38535 }
38536 }
38537 Expression::Cast(Box::new(Cast {
38538 this: expr,
38539 to: DataType::Date,
38540 trailing_comments: vec![],
38541 double_colon_syntax: false,
38542 format: None,
38543 default: None,
38544 inferred_type: None,
38545 }))
38546 }
38547
38548 /// Internal TO_DATE function that won't be converted to CAST by the Snowflake handler.
38549 /// Uses the name `_POLYGLOT_TO_DATE` which is not recognized by the TO_DATE -> CAST logic.
38550 /// The Snowflake DATEDIFF handler converts these back to TO_DATE.
38551 const PRESERVED_TO_DATE: &'static str = "_POLYGLOT_TO_DATE";
38552
38553 fn ensure_to_date_preserved(expr: Expression) -> Expression {
38554 use crate::expressions::{Function, Literal};
38555 if matches!(expr, Expression::Literal(ref lit) if matches!(lit.as_ref(), Literal::String(_)))
38556 {
38557 Expression::Function(Box::new(Function::new(
38558 Self::PRESERVED_TO_DATE.to_string(),
38559 vec![expr],
38560 )))
38561 } else {
38562 expr
38563 }
38564 }
38565
38566 /// TRY_CAST(expr AS DATE) - used for DuckDB when TO_DATE is unwrapped
38567 fn try_cast_date(expr: Expression) -> Expression {
38568 use crate::expressions::{Cast, DataType};
38569 Expression::TryCast(Box::new(Cast {
38570 this: expr,
38571 to: DataType::Date,
38572 trailing_comments: vec![],
38573 double_colon_syntax: false,
38574 format: None,
38575 default: None,
38576 inferred_type: None,
38577 }))
38578 }
38579
38580 /// CAST(CAST(expr AS TIMESTAMP) AS DATE) - used when Hive string dates need to be cast
38581 fn double_cast_timestamp_date(expr: Expression) -> Expression {
38582 use crate::expressions::{Cast, DataType};
38583 let inner = Expression::Cast(Box::new(Cast {
38584 this: expr,
38585 to: DataType::Timestamp {
38586 timezone: false,
38587 precision: None,
38588 },
38589 trailing_comments: vec![],
38590 double_colon_syntax: false,
38591 format: None,
38592 default: None,
38593 inferred_type: None,
38594 }));
38595 Expression::Cast(Box::new(Cast {
38596 this: inner,
38597 to: DataType::Date,
38598 trailing_comments: vec![],
38599 double_colon_syntax: false,
38600 format: None,
38601 default: None,
38602 inferred_type: None,
38603 }))
38604 }
38605
38606 /// CAST(CAST(expr AS DATETIME) AS DATE) - BigQuery variant
38607 fn double_cast_datetime_date(expr: Expression) -> Expression {
38608 use crate::expressions::{Cast, DataType};
38609 let inner = Expression::Cast(Box::new(Cast {
38610 this: expr,
38611 to: DataType::Custom {
38612 name: "DATETIME".to_string(),
38613 },
38614 trailing_comments: vec![],
38615 double_colon_syntax: false,
38616 format: None,
38617 default: None,
38618 inferred_type: None,
38619 }));
38620 Expression::Cast(Box::new(Cast {
38621 this: inner,
38622 to: DataType::Date,
38623 trailing_comments: vec![],
38624 double_colon_syntax: false,
38625 format: None,
38626 default: None,
38627 inferred_type: None,
38628 }))
38629 }
38630
38631 /// CAST(CAST(expr AS DATETIME2) AS DATE) - TSQL variant
38632 fn double_cast_datetime2_date(expr: Expression) -> Expression {
38633 use crate::expressions::{Cast, DataType};
38634 let inner = Expression::Cast(Box::new(Cast {
38635 this: expr,
38636 to: DataType::Custom {
38637 name: "DATETIME2".to_string(),
38638 },
38639 trailing_comments: vec![],
38640 double_colon_syntax: false,
38641 format: None,
38642 default: None,
38643 inferred_type: None,
38644 }));
38645 Expression::Cast(Box::new(Cast {
38646 this: inner,
38647 to: DataType::Date,
38648 trailing_comments: vec![],
38649 double_colon_syntax: false,
38650 format: None,
38651 default: None,
38652 inferred_type: None,
38653 }))
38654 }
38655
38656 /// Convert Hive/Java-style date format strings to C-style (strftime) format
38657 /// e.g., "yyyy-MM-dd'T'HH" -> "%Y-%m-%d'T'%H"
38658 fn hive_format_to_c_format(fmt: &str) -> String {
38659 let mut result = String::new();
38660 let chars: Vec<char> = fmt.chars().collect();
38661 let mut i = 0;
38662 while i < chars.len() {
38663 match chars[i] {
38664 'y' => {
38665 let mut count = 0;
38666 while i < chars.len() && chars[i] == 'y' {
38667 count += 1;
38668 i += 1;
38669 }
38670 if count >= 4 {
38671 result.push_str("%Y");
38672 } else if count == 2 {
38673 result.push_str("%y");
38674 } else {
38675 result.push_str("%Y");
38676 }
38677 }
38678 'M' => {
38679 let mut count = 0;
38680 while i < chars.len() && chars[i] == 'M' {
38681 count += 1;
38682 i += 1;
38683 }
38684 if count >= 3 {
38685 result.push_str("%b");
38686 } else if count == 2 {
38687 result.push_str("%m");
38688 } else {
38689 result.push_str("%m");
38690 }
38691 }
38692 'd' => {
38693 let mut _count = 0;
38694 while i < chars.len() && chars[i] == 'd' {
38695 _count += 1;
38696 i += 1;
38697 }
38698 result.push_str("%d");
38699 }
38700 'H' => {
38701 let mut _count = 0;
38702 while i < chars.len() && chars[i] == 'H' {
38703 _count += 1;
38704 i += 1;
38705 }
38706 result.push_str("%H");
38707 }
38708 'h' => {
38709 let mut _count = 0;
38710 while i < chars.len() && chars[i] == 'h' {
38711 _count += 1;
38712 i += 1;
38713 }
38714 result.push_str("%I");
38715 }
38716 'm' => {
38717 let mut _count = 0;
38718 while i < chars.len() && chars[i] == 'm' {
38719 _count += 1;
38720 i += 1;
38721 }
38722 result.push_str("%M");
38723 }
38724 's' => {
38725 let mut _count = 0;
38726 while i < chars.len() && chars[i] == 's' {
38727 _count += 1;
38728 i += 1;
38729 }
38730 result.push_str("%S");
38731 }
38732 'S' => {
38733 // Fractional seconds - skip
38734 while i < chars.len() && chars[i] == 'S' {
38735 i += 1;
38736 }
38737 result.push_str("%f");
38738 }
38739 'a' => {
38740 // AM/PM
38741 while i < chars.len() && chars[i] == 'a' {
38742 i += 1;
38743 }
38744 result.push_str("%p");
38745 }
38746 'E' => {
38747 let mut count = 0;
38748 while i < chars.len() && chars[i] == 'E' {
38749 count += 1;
38750 i += 1;
38751 }
38752 if count >= 4 {
38753 result.push_str("%A");
38754 } else {
38755 result.push_str("%a");
38756 }
38757 }
38758 '\'' => {
38759 // Quoted literal text - pass through the quotes and content
38760 result.push('\'');
38761 i += 1;
38762 while i < chars.len() && chars[i] != '\'' {
38763 result.push(chars[i]);
38764 i += 1;
38765 }
38766 if i < chars.len() {
38767 result.push('\'');
38768 i += 1;
38769 }
38770 }
38771 c => {
38772 result.push(c);
38773 i += 1;
38774 }
38775 }
38776 }
38777 result
38778 }
38779
38780 /// Convert Hive/Java format to Presto format (uses %T for HH:mm:ss)
38781 fn hive_format_to_presto_format(fmt: &str) -> String {
38782 let c_fmt = Self::hive_format_to_c_format(fmt);
38783 // Presto uses %T for HH:MM:SS
38784 c_fmt.replace("%H:%M:%S", "%T")
38785 }
38786
38787 /// Ensure a timestamp-like expression for DuckDB with CAST(... AS TIMESTAMP)
38788 fn ensure_cast_timestamp(expr: Expression) -> Expression {
38789 use crate::expressions::{Cast, DataType, Literal};
38790 match expr {
38791 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Timestamp(_)) => {
38792 let Literal::Timestamp(s) = lit.as_ref() else {
38793 unreachable!()
38794 };
38795 Expression::Cast(Box::new(Cast {
38796 this: Expression::Literal(Box::new(Literal::String(s.clone()))),
38797 to: DataType::Timestamp {
38798 timezone: false,
38799 precision: None,
38800 },
38801 trailing_comments: vec![],
38802 double_colon_syntax: false,
38803 format: None,
38804 default: None,
38805 inferred_type: None,
38806 }))
38807 }
38808 Expression::Literal(ref lit) if matches!(lit.as_ref(), Literal::String(ref _s)) => {
38809 Expression::Cast(Box::new(Cast {
38810 this: expr,
38811 to: DataType::Timestamp {
38812 timezone: false,
38813 precision: None,
38814 },
38815 trailing_comments: vec![],
38816 double_colon_syntax: false,
38817 format: None,
38818 default: None,
38819 inferred_type: None,
38820 }))
38821 }
38822 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Datetime(_)) => {
38823 let Literal::Datetime(s) = lit.as_ref() else {
38824 unreachable!()
38825 };
38826 Expression::Cast(Box::new(Cast {
38827 this: Expression::Literal(Box::new(Literal::String(s.clone()))),
38828 to: DataType::Timestamp {
38829 timezone: false,
38830 precision: None,
38831 },
38832 trailing_comments: vec![],
38833 double_colon_syntax: false,
38834 format: None,
38835 default: None,
38836 inferred_type: None,
38837 }))
38838 }
38839 other => other,
38840 }
38841 }
38842
38843 /// Force CAST to TIMESTAMP for any expression (not just literals)
38844 /// Used when transpiling from Redshift/TSQL where DATEDIFF/DATEADD args need explicit timestamp cast
38845 fn force_cast_timestamp(expr: Expression) -> Expression {
38846 use crate::expressions::{Cast, DataType};
38847 // Don't double-wrap if already a CAST to TIMESTAMP
38848 if let Expression::Cast(ref c) = expr {
38849 if matches!(c.to, DataType::Timestamp { .. }) {
38850 return expr;
38851 }
38852 }
38853 Expression::Cast(Box::new(Cast {
38854 this: expr,
38855 to: DataType::Timestamp {
38856 timezone: false,
38857 precision: None,
38858 },
38859 trailing_comments: vec![],
38860 double_colon_syntax: false,
38861 format: None,
38862 default: None,
38863 inferred_type: None,
38864 }))
38865 }
38866
38867 /// Ensure a timestamp-like expression for DuckDB with CAST(... AS TIMESTAMPTZ)
38868 fn ensure_cast_timestamptz(expr: Expression) -> Expression {
38869 use crate::expressions::{Cast, DataType, Literal};
38870 match expr {
38871 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Timestamp(_)) => {
38872 let Literal::Timestamp(s) = lit.as_ref() else {
38873 unreachable!()
38874 };
38875 Expression::Cast(Box::new(Cast {
38876 this: Expression::Literal(Box::new(Literal::String(s.clone()))),
38877 to: DataType::Timestamp {
38878 timezone: true,
38879 precision: None,
38880 },
38881 trailing_comments: vec![],
38882 double_colon_syntax: false,
38883 format: None,
38884 default: None,
38885 inferred_type: None,
38886 }))
38887 }
38888 Expression::Literal(ref lit) if matches!(lit.as_ref(), Literal::String(ref _s)) => {
38889 Expression::Cast(Box::new(Cast {
38890 this: expr,
38891 to: DataType::Timestamp {
38892 timezone: true,
38893 precision: None,
38894 },
38895 trailing_comments: vec![],
38896 double_colon_syntax: false,
38897 format: None,
38898 default: None,
38899 inferred_type: None,
38900 }))
38901 }
38902 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Datetime(_)) => {
38903 let Literal::Datetime(s) = lit.as_ref() else {
38904 unreachable!()
38905 };
38906 Expression::Cast(Box::new(Cast {
38907 this: Expression::Literal(Box::new(Literal::String(s.clone()))),
38908 to: DataType::Timestamp {
38909 timezone: true,
38910 precision: None,
38911 },
38912 trailing_comments: vec![],
38913 double_colon_syntax: false,
38914 format: None,
38915 default: None,
38916 inferred_type: None,
38917 }))
38918 }
38919 other => other,
38920 }
38921 }
38922
38923 /// Ensure expression is CAST to DATETIME (for BigQuery)
38924 fn ensure_cast_datetime(expr: Expression) -> Expression {
38925 use crate::expressions::{Cast, DataType, Literal};
38926 match expr {
38927 Expression::Literal(ref lit) if matches!(lit.as_ref(), Literal::String(ref _s)) => {
38928 Expression::Cast(Box::new(Cast {
38929 this: expr,
38930 to: DataType::Custom {
38931 name: "DATETIME".to_string(),
38932 },
38933 trailing_comments: vec![],
38934 double_colon_syntax: false,
38935 format: None,
38936 default: None,
38937 inferred_type: None,
38938 }))
38939 }
38940 other => other,
38941 }
38942 }
38943
38944 /// Force CAST expression to DATETIME (for BigQuery) - always wraps unless already DATETIME
38945 fn force_cast_datetime(expr: Expression) -> Expression {
38946 use crate::expressions::{Cast, DataType};
38947 if let Expression::Cast(ref c) = expr {
38948 if let DataType::Custom { ref name } = c.to {
38949 if name.eq_ignore_ascii_case("DATETIME") {
38950 return expr;
38951 }
38952 }
38953 }
38954 Expression::Cast(Box::new(Cast {
38955 this: expr,
38956 to: DataType::Custom {
38957 name: "DATETIME".to_string(),
38958 },
38959 trailing_comments: vec![],
38960 double_colon_syntax: false,
38961 format: None,
38962 default: None,
38963 inferred_type: None,
38964 }))
38965 }
38966
38967 /// Ensure expression is CAST to DATETIME2 (for TSQL)
38968 fn ensure_cast_datetime2(expr: Expression) -> Expression {
38969 use crate::expressions::{Cast, DataType, Literal};
38970 match expr {
38971 Expression::Literal(ref lit) if matches!(lit.as_ref(), Literal::String(ref _s)) => {
38972 Expression::Cast(Box::new(Cast {
38973 this: expr,
38974 to: DataType::Custom {
38975 name: "DATETIME2".to_string(),
38976 },
38977 trailing_comments: vec![],
38978 double_colon_syntax: false,
38979 format: None,
38980 default: None,
38981 inferred_type: None,
38982 }))
38983 }
38984 other => other,
38985 }
38986 }
38987
38988 /// Convert TIMESTAMP 'x' literal to CAST('x' AS TIMESTAMPTZ) for DuckDB
38989 fn ts_literal_to_cast_tz(expr: Expression) -> Expression {
38990 use crate::expressions::{Cast, DataType, Literal};
38991 match expr {
38992 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Timestamp(_)) => {
38993 let Literal::Timestamp(s) = lit.as_ref() else {
38994 unreachable!()
38995 };
38996 Expression::Cast(Box::new(Cast {
38997 this: Expression::Literal(Box::new(Literal::String(s.clone()))),
38998 to: DataType::Timestamp {
38999 timezone: true,
39000 precision: None,
39001 },
39002 trailing_comments: vec![],
39003 double_colon_syntax: false,
39004 format: None,
39005 default: None,
39006 inferred_type: None,
39007 }))
39008 }
39009 other => other,
39010 }
39011 }
39012
39013 /// Convert BigQuery format string to Snowflake format string
39014 fn bq_format_to_snowflake(format_expr: &Expression) -> Expression {
39015 use crate::expressions::Literal;
39016 if let Expression::Literal(lit) = format_expr {
39017 if let Literal::String(s) = lit.as_ref() {
39018 let sf = s
39019 .replace("%Y", "yyyy")
39020 .replace("%m", "mm")
39021 .replace("%d", "DD")
39022 .replace("%H", "HH24")
39023 .replace("%M", "MI")
39024 .replace("%S", "SS")
39025 .replace("%b", "mon")
39026 .replace("%B", "Month")
39027 .replace("%e", "FMDD");
39028 Expression::Literal(Box::new(Literal::String(sf)))
39029 } else {
39030 format_expr.clone()
39031 }
39032 } else {
39033 format_expr.clone()
39034 }
39035 }
39036
39037 /// Convert BigQuery format string to DuckDB format string
39038 fn bq_format_to_duckdb(format_expr: &Expression) -> Expression {
39039 use crate::expressions::Literal;
39040 if let Expression::Literal(lit) = format_expr {
39041 if let Literal::String(s) = lit.as_ref() {
39042 let duck = s
39043 .replace("%T", "%H:%M:%S")
39044 .replace("%F", "%Y-%m-%d")
39045 .replace("%D", "%m/%d/%y")
39046 .replace("%x", "%m/%d/%y")
39047 .replace("%c", "%a %b %-d %H:%M:%S %Y")
39048 .replace("%e", "%-d")
39049 .replace("%E6S", "%S.%f");
39050 Expression::Literal(Box::new(Literal::String(duck)))
39051 } else {
39052 format_expr.clone()
39053 }
39054 } else {
39055 format_expr.clone()
39056 }
39057 }
39058
39059 /// Convert BigQuery CAST FORMAT elements (like YYYY, MM, DD) to strftime (like %Y, %m, %d)
39060 fn bq_cast_format_to_strftime(format_expr: &Expression) -> Expression {
39061 use crate::expressions::Literal;
39062 if let Expression::Literal(lit) = format_expr {
39063 if let Literal::String(s) = lit.as_ref() {
39064 // Replace format elements from longest to shortest to avoid partial matches
39065 let result = s
39066 .replace("YYYYMMDD", "%Y%m%d")
39067 .replace("YYYY", "%Y")
39068 .replace("YY", "%y")
39069 .replace("MONTH", "%B")
39070 .replace("MON", "%b")
39071 .replace("MM", "%m")
39072 .replace("DD", "%d")
39073 .replace("HH24", "%H")
39074 .replace("HH12", "%I")
39075 .replace("HH", "%I")
39076 .replace("MI", "%M")
39077 .replace("SSTZH", "%S%z")
39078 .replace("SS", "%S")
39079 .replace("TZH", "%z");
39080 Expression::Literal(Box::new(Literal::String(result)))
39081 } else {
39082 format_expr.clone()
39083 }
39084 } else {
39085 format_expr.clone()
39086 }
39087 }
39088
39089 /// Normalize BigQuery format strings for BQ->BQ output
39090 fn bq_format_normalize_bq(format_expr: &Expression) -> Expression {
39091 use crate::expressions::Literal;
39092 if let Expression::Literal(lit) = format_expr {
39093 if let Literal::String(s) = lit.as_ref() {
39094 let norm = s.replace("%H:%M:%S", "%T").replace("%x", "%D");
39095 Expression::Literal(Box::new(Literal::String(norm)))
39096 } else {
39097 format_expr.clone()
39098 }
39099 } else {
39100 format_expr.clone()
39101 }
39102 }
39103}
39104
39105#[cfg(test)]
39106mod tests {
39107 use super::*;
39108
39109 #[test]
39110 fn test_dialect_type_from_str() {
39111 assert_eq!(
39112 "postgres".parse::<DialectType>().unwrap(),
39113 DialectType::PostgreSQL
39114 );
39115 assert_eq!(
39116 "postgresql".parse::<DialectType>().unwrap(),
39117 DialectType::PostgreSQL
39118 );
39119 assert_eq!("mysql".parse::<DialectType>().unwrap(), DialectType::MySQL);
39120 assert_eq!(
39121 "bigquery".parse::<DialectType>().unwrap(),
39122 DialectType::BigQuery
39123 );
39124 }
39125
39126 #[test]
39127 fn test_basic_transpile() {
39128 let dialect = Dialect::get(DialectType::Generic);
39129 let result = dialect
39130 .transpile("SELECT 1", DialectType::PostgreSQL)
39131 .unwrap();
39132 assert_eq!(result.len(), 1);
39133 assert_eq!(result[0], "SELECT 1");
39134 }
39135
39136 #[test]
39137 fn test_sqlite_double_quoted_column_defaults_to_postgres_strings() {
39138 let sqlite = Dialect::get(DialectType::SQLite);
39139 let result = sqlite
39140 .transpile(
39141 r#"CREATE TABLE "_collections" (
39142 "type" TEXT DEFAULT "base" NOT NULL,
39143 "fields" JSON DEFAULT "[]" NOT NULL,
39144 "options" JSON DEFAULT "{}" NOT NULL
39145 )"#,
39146 DialectType::PostgreSQL,
39147 )
39148 .unwrap();
39149
39150 assert!(result[0].contains(r#""type" TEXT DEFAULT 'base' NOT NULL"#));
39151 assert!(result[0].contains(r#""fields" JSON DEFAULT '[]' NOT NULL"#));
39152 assert!(result[0].contains(r#""options" JSON DEFAULT '{}' NOT NULL"#));
39153 }
39154
39155 #[test]
39156 fn test_sqlite_identity_preserves_double_quoted_column_defaults() {
39157 let sqlite = Dialect::get(DialectType::SQLite);
39158 let result = sqlite
39159 .transpile(
39160 r#"CREATE TABLE "_collections" ("type" TEXT DEFAULT "base" NOT NULL)"#,
39161 DialectType::SQLite,
39162 )
39163 .unwrap();
39164
39165 assert_eq!(
39166 result[0],
39167 r#"CREATE TABLE "_collections" ("type" TEXT DEFAULT "base" NOT NULL)"#
39168 );
39169 }
39170
39171 #[test]
39172 fn test_function_transformation_mysql() {
39173 // NVL should be transformed to IFNULL in MySQL
39174 let dialect = Dialect::get(DialectType::Generic);
39175 let result = dialect
39176 .transpile("SELECT NVL(a, b)", DialectType::MySQL)
39177 .unwrap();
39178 assert_eq!(result[0], "SELECT IFNULL(a, b)");
39179 }
39180
39181 #[test]
39182 fn test_get_path_duckdb() {
39183 // Test: step by step
39184 let snowflake = Dialect::get(DialectType::Snowflake);
39185
39186 // Step 1: Parse and check what Snowflake produces as intermediate
39187 let result_sf_sf = snowflake
39188 .transpile(
39189 "SELECT PARSE_JSON('{\"fruit\":\"banana\"}'):fruit",
39190 DialectType::Snowflake,
39191 )
39192 .unwrap();
39193 eprintln!("Snowflake->Snowflake colon: {}", result_sf_sf[0]);
39194
39195 // Step 2: DuckDB target
39196 let result_sf_dk = snowflake
39197 .transpile(
39198 "SELECT PARSE_JSON('{\"fruit\":\"banana\"}'):fruit",
39199 DialectType::DuckDB,
39200 )
39201 .unwrap();
39202 eprintln!("Snowflake->DuckDB colon: {}", result_sf_dk[0]);
39203
39204 // Step 3: GET_PATH directly
39205 let result_gp = snowflake
39206 .transpile(
39207 "SELECT GET_PATH(PARSE_JSON('{\"fruit\":\"banana\"}'), 'fruit')",
39208 DialectType::DuckDB,
39209 )
39210 .unwrap();
39211 eprintln!("Snowflake->DuckDB explicit GET_PATH: {}", result_gp[0]);
39212 }
39213
39214 #[test]
39215 fn test_function_transformation_postgres() {
39216 // IFNULL should be transformed to COALESCE in PostgreSQL
39217 let dialect = Dialect::get(DialectType::Generic);
39218 let result = dialect
39219 .transpile("SELECT IFNULL(a, b)", DialectType::PostgreSQL)
39220 .unwrap();
39221 assert_eq!(result[0], "SELECT COALESCE(a, b)");
39222
39223 // NVL should also be transformed to COALESCE
39224 let result = dialect
39225 .transpile("SELECT NVL(a, b)", DialectType::PostgreSQL)
39226 .unwrap();
39227 assert_eq!(result[0], "SELECT COALESCE(a, b)");
39228 }
39229
39230 #[test]
39231 fn test_hive_cast_to_trycast() {
39232 // Hive CAST should become TRY_CAST for targets that support it
39233 let hive = Dialect::get(DialectType::Hive);
39234 let result = hive
39235 .transpile("CAST(1 AS INT)", DialectType::DuckDB)
39236 .unwrap();
39237 assert_eq!(result[0], "TRY_CAST(1 AS INT)");
39238
39239 let result = hive
39240 .transpile("CAST(1 AS INT)", DialectType::Presto)
39241 .unwrap();
39242 assert_eq!(result[0], "TRY_CAST(1 AS INTEGER)");
39243 }
39244
39245 #[test]
39246 fn test_hive_array_identity() {
39247 // Hive ARRAY<DATE> should preserve angle bracket syntax
39248 let sql = "CREATE EXTERNAL TABLE `my_table` (`a7` ARRAY<DATE>) ROW FORMAT SERDE 'a' STORED AS INPUTFORMAT 'b' OUTPUTFORMAT 'c' LOCATION 'd' TBLPROPERTIES ('e'='f')";
39249 let hive = Dialect::get(DialectType::Hive);
39250
39251 // Test via transpile (this works)
39252 let result = hive.transpile(sql, DialectType::Hive).unwrap();
39253 eprintln!("Hive ARRAY via transpile: {}", result[0]);
39254 assert!(
39255 result[0].contains("ARRAY<DATE>"),
39256 "transpile: Expected ARRAY<DATE>, got: {}",
39257 result[0]
39258 );
39259
39260 // Test via parse -> transform -> generate (identity test path)
39261 let ast = hive.parse(sql).unwrap();
39262 let transformed = hive.transform(ast[0].clone()).unwrap();
39263 let output = hive.generate(&transformed).unwrap();
39264 eprintln!("Hive ARRAY via identity path: {}", output);
39265 assert!(
39266 output.contains("ARRAY<DATE>"),
39267 "identity path: Expected ARRAY<DATE>, got: {}",
39268 output
39269 );
39270 }
39271
39272 #[test]
39273 fn test_starrocks_delete_between_expansion() {
39274 // StarRocks doesn't support BETWEEN in DELETE statements
39275 let dialect = Dialect::get(DialectType::Generic);
39276
39277 // BETWEEN should be expanded to >= AND <= in DELETE
39278 let result = dialect
39279 .transpile(
39280 "DELETE FROM t WHERE a BETWEEN b AND c",
39281 DialectType::StarRocks,
39282 )
39283 .unwrap();
39284 assert_eq!(result[0], "DELETE FROM t WHERE a >= b AND a <= c");
39285
39286 // NOT BETWEEN should be expanded to < OR > in DELETE
39287 let result = dialect
39288 .transpile(
39289 "DELETE FROM t WHERE a NOT BETWEEN b AND c",
39290 DialectType::StarRocks,
39291 )
39292 .unwrap();
39293 assert_eq!(result[0], "DELETE FROM t WHERE a < b OR a > c");
39294
39295 // BETWEEN in SELECT should NOT be expanded (StarRocks supports it there)
39296 let result = dialect
39297 .transpile(
39298 "SELECT * FROM t WHERE a BETWEEN b AND c",
39299 DialectType::StarRocks,
39300 )
39301 .unwrap();
39302 assert!(
39303 result[0].contains("BETWEEN"),
39304 "BETWEEN should be preserved in SELECT"
39305 );
39306 }
39307
39308 #[test]
39309 fn test_snowflake_ltrim_rtrim_parse() {
39310 let sf = Dialect::get(DialectType::Snowflake);
39311 let sql = "SELECT LTRIM(RTRIM(col)) FROM t1";
39312 let result = sf.transpile(sql, DialectType::DuckDB);
39313 match &result {
39314 Ok(r) => eprintln!("LTRIM/RTRIM result: {}", r[0]),
39315 Err(e) => eprintln!("LTRIM/RTRIM error: {}", e),
39316 }
39317 assert!(
39318 result.is_ok(),
39319 "Expected successful parse of LTRIM(RTRIM(col)), got error: {:?}",
39320 result.err()
39321 );
39322 }
39323
39324 #[test]
39325 fn test_duckdb_count_if_parse() {
39326 let duck = Dialect::get(DialectType::DuckDB);
39327 let sql = "COUNT_IF(x)";
39328 let result = duck.transpile(sql, DialectType::DuckDB);
39329 match &result {
39330 Ok(r) => eprintln!("COUNT_IF result: {}", r[0]),
39331 Err(e) => eprintln!("COUNT_IF error: {}", e),
39332 }
39333 assert!(
39334 result.is_ok(),
39335 "Expected successful parse of COUNT_IF(x), got error: {:?}",
39336 result.err()
39337 );
39338 }
39339
39340 #[test]
39341 fn test_tsql_cast_tinyint_parse() {
39342 let tsql = Dialect::get(DialectType::TSQL);
39343 let sql = "CAST(X AS TINYINT)";
39344 let result = tsql.transpile(sql, DialectType::DuckDB);
39345 match &result {
39346 Ok(r) => eprintln!("TSQL CAST TINYINT result: {}", r[0]),
39347 Err(e) => eprintln!("TSQL CAST TINYINT error: {}", e),
39348 }
39349 assert!(
39350 result.is_ok(),
39351 "Expected successful transpile, got error: {:?}",
39352 result.err()
39353 );
39354 }
39355
39356 #[test]
39357 fn test_pg_hash_bitwise_xor() {
39358 let dialect = Dialect::get(DialectType::PostgreSQL);
39359 let result = dialect.transpile("x # y", DialectType::PostgreSQL).unwrap();
39360 assert_eq!(result[0], "x # y");
39361 }
39362
39363 #[test]
39364 fn test_pg_array_to_duckdb() {
39365 let dialect = Dialect::get(DialectType::PostgreSQL);
39366 let result = dialect
39367 .transpile("SELECT ARRAY[1, 2, 3] @> ARRAY[1, 2]", DialectType::DuckDB)
39368 .unwrap();
39369 assert_eq!(result[0], "SELECT [1, 2, 3] @> [1, 2]");
39370 }
39371
39372 #[test]
39373 fn test_array_remove_bigquery() {
39374 let dialect = Dialect::get(DialectType::Generic);
39375 let result = dialect
39376 .transpile("ARRAY_REMOVE(the_array, target)", DialectType::BigQuery)
39377 .unwrap();
39378 assert_eq!(
39379 result[0],
39380 "ARRAY(SELECT _u FROM UNNEST(the_array) AS _u WHERE _u <> target)"
39381 );
39382 }
39383
39384 #[test]
39385 fn test_map_clickhouse_case() {
39386 let dialect = Dialect::get(DialectType::Generic);
39387 let parsed = dialect
39388 .parse("CAST(MAP('a', '1') AS MAP(TEXT, TEXT))")
39389 .unwrap();
39390 eprintln!("MAP parsed: {:?}", parsed);
39391 let result = dialect
39392 .transpile(
39393 "CAST(MAP('a', '1') AS MAP(TEXT, TEXT))",
39394 DialectType::ClickHouse,
39395 )
39396 .unwrap();
39397 eprintln!("MAP result: {}", result[0]);
39398 }
39399
39400 #[test]
39401 fn test_generate_date_array_presto() {
39402 let dialect = Dialect::get(DialectType::Generic);
39403 let result = dialect.transpile(
39404 "SELECT * FROM UNNEST(GENERATE_DATE_ARRAY(DATE '2020-01-01', DATE '2020-02-01', INTERVAL 1 WEEK))",
39405 DialectType::Presto,
39406 ).unwrap();
39407 eprintln!("GDA -> Presto: {}", result[0]);
39408 assert_eq!(result[0], "SELECT * FROM UNNEST(SEQUENCE(CAST('2020-01-01' AS DATE), CAST('2020-02-01' AS DATE), (1 * INTERVAL '7' DAY)))");
39409 }
39410
39411 #[test]
39412 fn test_generate_date_array_postgres() {
39413 let dialect = Dialect::get(DialectType::Generic);
39414 let result = dialect.transpile(
39415 "SELECT * FROM UNNEST(GENERATE_DATE_ARRAY(DATE '2020-01-01', DATE '2020-02-01', INTERVAL 1 WEEK))",
39416 DialectType::PostgreSQL,
39417 ).unwrap();
39418 eprintln!("GDA -> PostgreSQL: {}", result[0]);
39419 }
39420
39421 #[test]
39422 fn test_generate_date_array_snowflake() {
39423 let dialect = Dialect::get(DialectType::Generic);
39424 let result = dialect
39425 .transpile(
39426 "SELECT * FROM UNNEST(GENERATE_DATE_ARRAY(DATE '2020-01-01', DATE '2020-02-01', INTERVAL 1 WEEK))",
39427 DialectType::Snowflake,
39428 )
39429 .unwrap();
39430 eprintln!("GDA -> Snowflake: {}", result[0]);
39431 }
39432
39433 #[test]
39434 fn test_array_length_generate_date_array_snowflake() {
39435 let dialect = Dialect::get(DialectType::Generic);
39436 let result = dialect.transpile(
39437 "SELECT ARRAY_LENGTH(GENERATE_DATE_ARRAY(DATE '2020-01-01', DATE '2020-02-01', INTERVAL 1 WEEK))",
39438 DialectType::Snowflake,
39439 ).unwrap();
39440 eprintln!("ARRAY_LENGTH(GDA) -> Snowflake: {}", result[0]);
39441 }
39442
39443 #[test]
39444 fn test_generate_date_array_mysql() {
39445 let dialect = Dialect::get(DialectType::Generic);
39446 let result = dialect.transpile(
39447 "SELECT * FROM UNNEST(GENERATE_DATE_ARRAY(DATE '2020-01-01', DATE '2020-02-01', INTERVAL 1 WEEK))",
39448 DialectType::MySQL,
39449 ).unwrap();
39450 eprintln!("GDA -> MySQL: {}", result[0]);
39451 }
39452
39453 #[test]
39454 fn test_generate_date_array_redshift() {
39455 let dialect = Dialect::get(DialectType::Generic);
39456 let result = dialect.transpile(
39457 "SELECT * FROM UNNEST(GENERATE_DATE_ARRAY(DATE '2020-01-01', DATE '2020-02-01', INTERVAL 1 WEEK))",
39458 DialectType::Redshift,
39459 ).unwrap();
39460 eprintln!("GDA -> Redshift: {}", result[0]);
39461 }
39462
39463 #[test]
39464 fn test_generate_date_array_tsql() {
39465 let dialect = Dialect::get(DialectType::Generic);
39466 let result = dialect.transpile(
39467 "SELECT * FROM UNNEST(GENERATE_DATE_ARRAY(DATE '2020-01-01', DATE '2020-02-01', INTERVAL 1 WEEK))",
39468 DialectType::TSQL,
39469 ).unwrap();
39470 eprintln!("GDA -> TSQL: {}", result[0]);
39471 }
39472
39473 #[test]
39474 fn test_struct_colon_syntax() {
39475 let dialect = Dialect::get(DialectType::Generic);
39476 // Test without colon first
39477 let result = dialect.transpile(
39478 "CAST((1, 2, 3, 4) AS STRUCT<a TINYINT, b SMALLINT, c INT, d BIGINT>)",
39479 DialectType::ClickHouse,
39480 );
39481 match result {
39482 Ok(r) => eprintln!("STRUCT no colon -> ClickHouse: {}", r[0]),
39483 Err(e) => eprintln!("STRUCT no colon error: {}", e),
39484 }
39485 // Now test with colon
39486 let result = dialect.transpile(
39487 "CAST((1, 2, 3, 4) AS STRUCT<a: TINYINT, b: SMALLINT, c: INT, d: BIGINT>)",
39488 DialectType::ClickHouse,
39489 );
39490 match result {
39491 Ok(r) => eprintln!("STRUCT colon -> ClickHouse: {}", r[0]),
39492 Err(e) => eprintln!("STRUCT colon error: {}", e),
39493 }
39494 }
39495
39496 #[test]
39497 fn test_generate_date_array_cte_wrapped_mysql() {
39498 let dialect = Dialect::get(DialectType::Generic);
39499 let result = dialect.transpile(
39500 "WITH dates AS (SELECT * FROM UNNEST(GENERATE_DATE_ARRAY(DATE '2020-01-01', DATE '2020-02-01', INTERVAL 1 WEEK))) SELECT * FROM dates",
39501 DialectType::MySQL,
39502 ).unwrap();
39503 eprintln!("GDA CTE -> MySQL: {}", result[0]);
39504 }
39505
39506 #[test]
39507 fn test_generate_date_array_cte_wrapped_tsql() {
39508 let dialect = Dialect::get(DialectType::Generic);
39509 let result = dialect.transpile(
39510 "WITH dates AS (SELECT * FROM UNNEST(GENERATE_DATE_ARRAY(DATE '2020-01-01', DATE '2020-02-01', INTERVAL 1 WEEK))) SELECT * FROM dates",
39511 DialectType::TSQL,
39512 ).unwrap();
39513 eprintln!("GDA CTE -> TSQL: {}", result[0]);
39514 }
39515
39516 #[test]
39517 fn test_decode_literal_no_null_check() {
39518 // Oracle DECODE with all literals should produce simple equality, no IS NULL
39519 let dialect = Dialect::get(DialectType::Oracle);
39520 let result = dialect
39521 .transpile("SELECT decode(1,2,3,4)", DialectType::DuckDB)
39522 .unwrap();
39523 assert_eq!(
39524 result[0], "SELECT CASE WHEN 1 = 2 THEN 3 ELSE 4 END",
39525 "Literal DECODE should not have IS NULL checks"
39526 );
39527 }
39528
39529 #[test]
39530 fn test_decode_column_vs_literal_no_null_check() {
39531 // Oracle DECODE with column vs literal should use simple equality (like sqlglot)
39532 let dialect = Dialect::get(DialectType::Oracle);
39533 let result = dialect
39534 .transpile("SELECT decode(col, 2, 3, 4) FROM t", DialectType::DuckDB)
39535 .unwrap();
39536 assert_eq!(
39537 result[0], "SELECT CASE WHEN col = 2 THEN 3 ELSE 4 END FROM t",
39538 "Column vs literal DECODE should not have IS NULL checks"
39539 );
39540 }
39541
39542 #[test]
39543 fn test_decode_column_vs_column_keeps_null_check() {
39544 // Oracle DECODE with column vs column should keep null-safe comparison
39545 let dialect = Dialect::get(DialectType::Oracle);
39546 let result = dialect
39547 .transpile("SELECT decode(col, col2, 3, 4) FROM t", DialectType::DuckDB)
39548 .unwrap();
39549 assert!(
39550 result[0].contains("IS NULL"),
39551 "Column vs column DECODE should have IS NULL checks, got: {}",
39552 result[0]
39553 );
39554 }
39555
39556 #[test]
39557 fn test_decode_null_search() {
39558 // Oracle DECODE with NULL search should use IS NULL
39559 let dialect = Dialect::get(DialectType::Oracle);
39560 let result = dialect
39561 .transpile("SELECT decode(col, NULL, 3, 4) FROM t", DialectType::DuckDB)
39562 .unwrap();
39563 assert_eq!(
39564 result[0],
39565 "SELECT CASE WHEN col IS NULL THEN 3 ELSE 4 END FROM t",
39566 );
39567 }
39568
39569 // =========================================================================
39570 // REGEXP function transpilation tests
39571 // =========================================================================
39572
39573 #[test]
39574 fn test_regexp_substr_snowflake_to_duckdb_2arg() {
39575 let dialect = Dialect::get(DialectType::Snowflake);
39576 let result = dialect
39577 .transpile("SELECT REGEXP_SUBSTR(s, 'pattern')", DialectType::DuckDB)
39578 .unwrap();
39579 assert_eq!(result[0], "SELECT REGEXP_EXTRACT(s, 'pattern')");
39580 }
39581
39582 #[test]
39583 fn test_regexp_substr_snowflake_to_duckdb_3arg_pos1() {
39584 let dialect = Dialect::get(DialectType::Snowflake);
39585 let result = dialect
39586 .transpile("SELECT REGEXP_SUBSTR(s, 'pattern', 1)", DialectType::DuckDB)
39587 .unwrap();
39588 assert_eq!(result[0], "SELECT REGEXP_EXTRACT(s, 'pattern')");
39589 }
39590
39591 #[test]
39592 fn test_regexp_substr_snowflake_to_duckdb_3arg_pos_gt1() {
39593 let dialect = Dialect::get(DialectType::Snowflake);
39594 let result = dialect
39595 .transpile("SELECT REGEXP_SUBSTR(s, 'pattern', 3)", DialectType::DuckDB)
39596 .unwrap();
39597 assert_eq!(
39598 result[0],
39599 "SELECT REGEXP_EXTRACT(NULLIF(SUBSTRING(s, 3), ''), 'pattern')"
39600 );
39601 }
39602
39603 #[test]
39604 fn test_regexp_substr_snowflake_to_duckdb_4arg_occ_gt1() {
39605 let dialect = Dialect::get(DialectType::Snowflake);
39606 let result = dialect
39607 .transpile(
39608 "SELECT REGEXP_SUBSTR(s, 'pattern', 1, 3)",
39609 DialectType::DuckDB,
39610 )
39611 .unwrap();
39612 assert_eq!(
39613 result[0],
39614 "SELECT ARRAY_EXTRACT(REGEXP_EXTRACT_ALL(s, 'pattern'), 3)"
39615 );
39616 }
39617
39618 #[test]
39619 fn test_regexp_substr_snowflake_to_duckdb_5arg_e_flag() {
39620 let dialect = Dialect::get(DialectType::Snowflake);
39621 let result = dialect
39622 .transpile(
39623 "SELECT REGEXP_SUBSTR(s, 'pattern', 1, 1, 'e')",
39624 DialectType::DuckDB,
39625 )
39626 .unwrap();
39627 assert_eq!(result[0], "SELECT REGEXP_EXTRACT(s, 'pattern')");
39628 }
39629
39630 #[test]
39631 fn test_regexp_substr_snowflake_to_duckdb_6arg_group0() {
39632 let dialect = Dialect::get(DialectType::Snowflake);
39633 let result = dialect
39634 .transpile(
39635 "SELECT REGEXP_SUBSTR(s, 'pattern', 1, 1, 'e', 0)",
39636 DialectType::DuckDB,
39637 )
39638 .unwrap();
39639 assert_eq!(result[0], "SELECT REGEXP_EXTRACT(s, 'pattern')");
39640 }
39641
39642 #[test]
39643 fn test_regexp_substr_snowflake_identity_strip_group0() {
39644 let dialect = Dialect::get(DialectType::Snowflake);
39645 let result = dialect
39646 .transpile(
39647 "SELECT REGEXP_SUBSTR(s, 'pattern', 1, 1, 'e', 0)",
39648 DialectType::Snowflake,
39649 )
39650 .unwrap();
39651 assert_eq!(result[0], "SELECT REGEXP_SUBSTR(s, 'pattern', 1, 1, 'e')");
39652 }
39653
39654 #[test]
39655 fn test_regexp_substr_all_snowflake_to_duckdb_2arg() {
39656 let dialect = Dialect::get(DialectType::Snowflake);
39657 let result = dialect
39658 .transpile(
39659 "SELECT REGEXP_SUBSTR_ALL(s, 'pattern')",
39660 DialectType::DuckDB,
39661 )
39662 .unwrap();
39663 assert_eq!(result[0], "SELECT REGEXP_EXTRACT_ALL(s, 'pattern')");
39664 }
39665
39666 #[test]
39667 fn test_regexp_substr_all_snowflake_to_duckdb_3arg_pos_gt1() {
39668 let dialect = Dialect::get(DialectType::Snowflake);
39669 let result = dialect
39670 .transpile(
39671 "SELECT REGEXP_SUBSTR_ALL(s, 'pattern', 3)",
39672 DialectType::DuckDB,
39673 )
39674 .unwrap();
39675 assert_eq!(
39676 result[0],
39677 "SELECT REGEXP_EXTRACT_ALL(SUBSTRING(s, 3), 'pattern')"
39678 );
39679 }
39680
39681 #[test]
39682 fn test_regexp_substr_all_snowflake_to_duckdb_5arg_e_flag() {
39683 let dialect = Dialect::get(DialectType::Snowflake);
39684 let result = dialect
39685 .transpile(
39686 "SELECT REGEXP_SUBSTR_ALL(s, 'pattern', 1, 1, 'e')",
39687 DialectType::DuckDB,
39688 )
39689 .unwrap();
39690 assert_eq!(result[0], "SELECT REGEXP_EXTRACT_ALL(s, 'pattern')");
39691 }
39692
39693 #[test]
39694 fn test_regexp_substr_all_snowflake_to_duckdb_6arg_group0() {
39695 let dialect = Dialect::get(DialectType::Snowflake);
39696 let result = dialect
39697 .transpile(
39698 "SELECT REGEXP_SUBSTR_ALL(s, 'pattern', 1, 1, 'e', 0)",
39699 DialectType::DuckDB,
39700 )
39701 .unwrap();
39702 assert_eq!(result[0], "SELECT REGEXP_EXTRACT_ALL(s, 'pattern')");
39703 }
39704
39705 #[test]
39706 fn test_regexp_substr_all_snowflake_identity_strip_group0() {
39707 let dialect = Dialect::get(DialectType::Snowflake);
39708 let result = dialect
39709 .transpile(
39710 "SELECT REGEXP_SUBSTR_ALL(s, 'pattern', 1, 1, 'e', 0)",
39711 DialectType::Snowflake,
39712 )
39713 .unwrap();
39714 assert_eq!(
39715 result[0],
39716 "SELECT REGEXP_SUBSTR_ALL(s, 'pattern', 1, 1, 'e')"
39717 );
39718 }
39719
39720 #[test]
39721 fn test_regexp_count_snowflake_to_duckdb_2arg() {
39722 let dialect = Dialect::get(DialectType::Snowflake);
39723 let result = dialect
39724 .transpile("SELECT REGEXP_COUNT(s, 'pattern')", DialectType::DuckDB)
39725 .unwrap();
39726 assert_eq!(
39727 result[0],
39728 "SELECT CASE WHEN 'pattern' = '' THEN 0 ELSE LENGTH(REGEXP_EXTRACT_ALL(s, 'pattern')) END"
39729 );
39730 }
39731
39732 #[test]
39733 fn test_regexp_count_snowflake_to_duckdb_3arg() {
39734 let dialect = Dialect::get(DialectType::Snowflake);
39735 let result = dialect
39736 .transpile("SELECT REGEXP_COUNT(s, 'pattern', 3)", DialectType::DuckDB)
39737 .unwrap();
39738 assert_eq!(
39739 result[0],
39740 "SELECT CASE WHEN 'pattern' = '' THEN 0 ELSE LENGTH(REGEXP_EXTRACT_ALL(SUBSTRING(s, 3), 'pattern')) END"
39741 );
39742 }
39743
39744 #[test]
39745 fn test_regexp_count_snowflake_to_duckdb_4arg_flags() {
39746 let dialect = Dialect::get(DialectType::Snowflake);
39747 let result = dialect
39748 .transpile(
39749 "SELECT REGEXP_COUNT(s, 'pattern', 1, 'i')",
39750 DialectType::DuckDB,
39751 )
39752 .unwrap();
39753 assert_eq!(
39754 result[0],
39755 "SELECT CASE WHEN '(?i)' || 'pattern' = '' THEN 0 ELSE LENGTH(REGEXP_EXTRACT_ALL(SUBSTRING(s, 1), '(?i)' || 'pattern')) END"
39756 );
39757 }
39758
39759 #[test]
39760 fn test_regexp_count_snowflake_to_duckdb_4arg_flags_literal_string() {
39761 let dialect = Dialect::get(DialectType::Snowflake);
39762 let result = dialect
39763 .transpile(
39764 "SELECT REGEXP_COUNT('Hello World', 'L', 1, 'im')",
39765 DialectType::DuckDB,
39766 )
39767 .unwrap();
39768 assert_eq!(
39769 result[0],
39770 "SELECT CASE WHEN '(?im)' || 'L' = '' THEN 0 ELSE LENGTH(REGEXP_EXTRACT_ALL(SUBSTRING('Hello World', 1), '(?im)' || 'L')) END"
39771 );
39772 }
39773
39774 #[test]
39775 fn test_regexp_replace_snowflake_to_duckdb_5arg_pos1_occ1() {
39776 let dialect = Dialect::get(DialectType::Snowflake);
39777 let result = dialect
39778 .transpile(
39779 "SELECT REGEXP_REPLACE(s, 'pattern', 'repl', 1, 1)",
39780 DialectType::DuckDB,
39781 )
39782 .unwrap();
39783 assert_eq!(result[0], "SELECT REGEXP_REPLACE(s, 'pattern', 'repl')");
39784 }
39785
39786 #[test]
39787 fn test_regexp_replace_snowflake_to_duckdb_5arg_pos_gt1_occ0() {
39788 let dialect = Dialect::get(DialectType::Snowflake);
39789 let result = dialect
39790 .transpile(
39791 "SELECT REGEXP_REPLACE(s, 'pattern', 'repl', 3, 0)",
39792 DialectType::DuckDB,
39793 )
39794 .unwrap();
39795 assert_eq!(
39796 result[0],
39797 "SELECT SUBSTRING(s, 1, 2) || REGEXP_REPLACE(SUBSTRING(s, 3), 'pattern', 'repl', 'g')"
39798 );
39799 }
39800
39801 #[test]
39802 fn test_regexp_replace_snowflake_to_duckdb_5arg_pos_gt1_occ1() {
39803 let dialect = Dialect::get(DialectType::Snowflake);
39804 let result = dialect
39805 .transpile(
39806 "SELECT REGEXP_REPLACE(s, 'pattern', 'repl', 3, 1)",
39807 DialectType::DuckDB,
39808 )
39809 .unwrap();
39810 assert_eq!(
39811 result[0],
39812 "SELECT SUBSTRING(s, 1, 2) || REGEXP_REPLACE(SUBSTRING(s, 3), 'pattern', 'repl')"
39813 );
39814 }
39815
39816 #[test]
39817 fn test_rlike_snowflake_to_duckdb_2arg() {
39818 let dialect = Dialect::get(DialectType::Snowflake);
39819 let result = dialect
39820 .transpile("SELECT RLIKE(a, b)", DialectType::DuckDB)
39821 .unwrap();
39822 assert_eq!(result[0], "SELECT REGEXP_FULL_MATCH(a, b)");
39823 }
39824
39825 #[test]
39826 fn test_rlike_snowflake_to_duckdb_3arg_flags() {
39827 let dialect = Dialect::get(DialectType::Snowflake);
39828 let result = dialect
39829 .transpile("SELECT RLIKE(a, b, 'i')", DialectType::DuckDB)
39830 .unwrap();
39831 assert_eq!(result[0], "SELECT REGEXP_FULL_MATCH(a, b, 'i')");
39832 }
39833
39834 #[test]
39835 fn test_regexp_extract_all_bigquery_to_snowflake_no_capture() {
39836 let dialect = Dialect::get(DialectType::BigQuery);
39837 let result = dialect
39838 .transpile(
39839 "SELECT REGEXP_EXTRACT_ALL(s, 'pattern')",
39840 DialectType::Snowflake,
39841 )
39842 .unwrap();
39843 assert_eq!(result[0], "SELECT REGEXP_SUBSTR_ALL(s, 'pattern')");
39844 }
39845
39846 #[test]
39847 fn test_regexp_extract_all_bigquery_to_snowflake_with_capture() {
39848 let dialect = Dialect::get(DialectType::BigQuery);
39849 let result = dialect
39850 .transpile(
39851 "SELECT REGEXP_EXTRACT_ALL(s, '(a)[0-9]')",
39852 DialectType::Snowflake,
39853 )
39854 .unwrap();
39855 assert_eq!(
39856 result[0],
39857 "SELECT REGEXP_SUBSTR_ALL(s, '(a)[0-9]', 1, 1, 'c', 1)"
39858 );
39859 }
39860
39861 #[test]
39862 fn test_regexp_instr_snowflake_to_duckdb_2arg() {
39863 let dialect = Dialect::get(DialectType::Snowflake);
39864 let result = dialect
39865 .transpile("SELECT REGEXP_INSTR(s, 'pattern')", DialectType::DuckDB)
39866 .unwrap();
39867 assert!(
39868 result[0].contains("CASE WHEN"),
39869 "Expected CASE WHEN in result: {}",
39870 result[0]
39871 );
39872 assert!(
39873 result[0].contains("LIST_SUM"),
39874 "Expected LIST_SUM in result: {}",
39875 result[0]
39876 );
39877 }
39878
39879 #[test]
39880 fn test_array_except_generic_to_duckdb() {
39881 let dialect = Dialect::get(DialectType::Generic);
39882 let result = dialect
39883 .transpile(
39884 "SELECT ARRAY_EXCEPT(ARRAY(1, 2, 3), ARRAY(2))",
39885 DialectType::DuckDB,
39886 )
39887 .unwrap();
39888 eprintln!("ARRAY_EXCEPT Generic->DuckDB: {}", result[0]);
39889 assert!(
39890 result[0].contains("CASE WHEN"),
39891 "Expected CASE WHEN: {}",
39892 result[0]
39893 );
39894 assert!(
39895 result[0].contains("LIST_FILTER"),
39896 "Expected LIST_FILTER: {}",
39897 result[0]
39898 );
39899 assert!(
39900 result[0].contains("LIST_DISTINCT"),
39901 "Expected LIST_DISTINCT: {}",
39902 result[0]
39903 );
39904 assert!(
39905 result[0].contains("IS NOT DISTINCT FROM"),
39906 "Expected IS NOT DISTINCT FROM: {}",
39907 result[0]
39908 );
39909 assert!(
39910 result[0].contains("= 0"),
39911 "Expected = 0 filter: {}",
39912 result[0]
39913 );
39914 }
39915
39916 #[test]
39917 fn test_array_except_generic_to_snowflake() {
39918 let dialect = Dialect::get(DialectType::Generic);
39919 let result = dialect
39920 .transpile(
39921 "SELECT ARRAY_EXCEPT(ARRAY(1, 2, 3), ARRAY(2))",
39922 DialectType::Snowflake,
39923 )
39924 .unwrap();
39925 eprintln!("ARRAY_EXCEPT Generic->Snowflake: {}", result[0]);
39926 assert_eq!(result[0], "SELECT ARRAY_EXCEPT([1, 2, 3], [2])");
39927 }
39928
39929 #[test]
39930 fn test_array_except_generic_to_presto() {
39931 let dialect = Dialect::get(DialectType::Generic);
39932 let result = dialect
39933 .transpile(
39934 "SELECT ARRAY_EXCEPT(ARRAY(1, 2, 3), ARRAY(2))",
39935 DialectType::Presto,
39936 )
39937 .unwrap();
39938 eprintln!("ARRAY_EXCEPT Generic->Presto: {}", result[0]);
39939 assert_eq!(result[0], "SELECT ARRAY_EXCEPT(ARRAY[1, 2, 3], ARRAY[2])");
39940 }
39941
39942 #[test]
39943 fn test_array_except_snowflake_to_duckdb() {
39944 let dialect = Dialect::get(DialectType::Snowflake);
39945 let result = dialect
39946 .transpile("SELECT ARRAY_EXCEPT([1, 2, 3], [2])", DialectType::DuckDB)
39947 .unwrap();
39948 eprintln!("ARRAY_EXCEPT Snowflake->DuckDB: {}", result[0]);
39949 assert!(
39950 result[0].contains("CASE WHEN"),
39951 "Expected CASE WHEN: {}",
39952 result[0]
39953 );
39954 assert!(
39955 result[0].contains("LIST_TRANSFORM"),
39956 "Expected LIST_TRANSFORM: {}",
39957 result[0]
39958 );
39959 }
39960
39961 #[test]
39962 fn test_array_contains_snowflake_to_snowflake() {
39963 let dialect = Dialect::get(DialectType::Snowflake);
39964 let result = dialect
39965 .transpile(
39966 "SELECT ARRAY_CONTAINS(x, [1, NULL, 3])",
39967 DialectType::Snowflake,
39968 )
39969 .unwrap();
39970 eprintln!("ARRAY_CONTAINS Snowflake->Snowflake: {}", result[0]);
39971 assert_eq!(result[0], "SELECT ARRAY_CONTAINS(x, [1, NULL, 3])");
39972 }
39973
39974 #[test]
39975 fn test_array_contains_snowflake_to_duckdb() {
39976 let dialect = Dialect::get(DialectType::Snowflake);
39977 let result = dialect
39978 .transpile(
39979 "SELECT ARRAY_CONTAINS(x, [1, NULL, 3])",
39980 DialectType::DuckDB,
39981 )
39982 .unwrap();
39983 eprintln!("ARRAY_CONTAINS Snowflake->DuckDB: {}", result[0]);
39984 assert!(
39985 result[0].contains("CASE WHEN"),
39986 "Expected CASE WHEN: {}",
39987 result[0]
39988 );
39989 assert!(
39990 result[0].contains("NULLIF"),
39991 "Expected NULLIF: {}",
39992 result[0]
39993 );
39994 assert!(
39995 result[0].contains("ARRAY_CONTAINS"),
39996 "Expected ARRAY_CONTAINS: {}",
39997 result[0]
39998 );
39999 }
40000
40001 #[test]
40002 fn test_array_distinct_snowflake_to_duckdb() {
40003 let dialect = Dialect::get(DialectType::Snowflake);
40004 let result = dialect
40005 .transpile(
40006 "SELECT ARRAY_DISTINCT([1, 2, 2, 3, 1])",
40007 DialectType::DuckDB,
40008 )
40009 .unwrap();
40010 eprintln!("ARRAY_DISTINCT Snowflake->DuckDB: {}", result[0]);
40011 assert!(
40012 result[0].contains("CASE WHEN"),
40013 "Expected CASE WHEN: {}",
40014 result[0]
40015 );
40016 assert!(
40017 result[0].contains("LIST_DISTINCT"),
40018 "Expected LIST_DISTINCT: {}",
40019 result[0]
40020 );
40021 assert!(
40022 result[0].contains("LIST_APPEND"),
40023 "Expected LIST_APPEND: {}",
40024 result[0]
40025 );
40026 assert!(
40027 result[0].contains("LIST_FILTER"),
40028 "Expected LIST_FILTER: {}",
40029 result[0]
40030 );
40031 }
40032}