polyglot_sql/dialects/mod.rs
1//! SQL Dialect System
2//!
3//! This module implements the dialect abstraction layer that enables SQL transpilation
4//! between 30+ database engines. Each dialect encapsulates three concerns:
5//!
6//! - **Tokenization**: Dialect-specific lexing rules (e.g., BigQuery uses backtick quoting,
7//! MySQL uses backtick for identifiers, TSQL uses square brackets).
8//! - **Generation**: How AST nodes are rendered back to SQL text, including identifier quoting
9//! style, function name casing, and syntax variations.
10//! - **Transformation**: AST-level rewrites that convert dialect-specific constructs to/from
11//! a normalized form (e.g., Snowflake `SQUARE(x)` becomes `POWER(x, 2)`).
12//!
13//! The primary entry point is [`Dialect::get`], which returns a configured [`Dialect`] instance
14//! for a given [`DialectType`]. From there, callers can [`parse`](Dialect::parse),
15//! [`generate`](Dialect::generate), [`transform`](Dialect::transform), or
16//! [`transpile`](Dialect::transpile) to another dialect in a single call.
17//!
18//! Each concrete dialect (e.g., `PostgresDialect`, `BigQueryDialect`) implements the
19//! [`DialectImpl`] trait, which provides configuration hooks and expression-level transforms.
20//! Dialect modules live in submodules of this module and are re-exported here.
21
22mod generic; // Always compiled
23
24#[cfg(feature = "dialect-athena")]
25mod athena;
26#[cfg(feature = "dialect-bigquery")]
27mod bigquery;
28#[cfg(feature = "dialect-clickhouse")]
29mod clickhouse;
30#[cfg(feature = "dialect-cockroachdb")]
31mod cockroachdb;
32#[cfg(feature = "dialect-databricks")]
33mod databricks;
34#[cfg(feature = "dialect-datafusion")]
35mod datafusion;
36#[cfg(feature = "dialect-doris")]
37mod doris;
38#[cfg(feature = "dialect-dremio")]
39mod dremio;
40#[cfg(feature = "dialect-drill")]
41mod drill;
42#[cfg(feature = "dialect-druid")]
43mod druid;
44#[cfg(feature = "dialect-duckdb")]
45mod duckdb;
46#[cfg(feature = "dialect-dune")]
47mod dune;
48#[cfg(feature = "dialect-exasol")]
49mod exasol;
50#[cfg(feature = "dialect-fabric")]
51mod fabric;
52#[cfg(feature = "dialect-hive")]
53mod hive;
54#[cfg(feature = "dialect-materialize")]
55mod materialize;
56#[cfg(feature = "dialect-mysql")]
57mod mysql;
58#[cfg(feature = "dialect-oracle")]
59mod oracle;
60#[cfg(feature = "dialect-postgresql")]
61mod postgres;
62#[cfg(feature = "dialect-presto")]
63mod presto;
64#[cfg(feature = "dialect-redshift")]
65mod redshift;
66#[cfg(feature = "dialect-risingwave")]
67mod risingwave;
68#[cfg(feature = "dialect-singlestore")]
69mod singlestore;
70#[cfg(feature = "dialect-snowflake")]
71mod snowflake;
72#[cfg(feature = "dialect-solr")]
73mod solr;
74#[cfg(feature = "dialect-spark")]
75mod spark;
76#[cfg(feature = "dialect-sqlite")]
77mod sqlite;
78#[cfg(feature = "dialect-starrocks")]
79mod starrocks;
80#[cfg(feature = "dialect-tableau")]
81mod tableau;
82#[cfg(feature = "dialect-teradata")]
83mod teradata;
84#[cfg(feature = "dialect-tidb")]
85mod tidb;
86#[cfg(feature = "dialect-trino")]
87mod trino;
88#[cfg(feature = "dialect-tsql")]
89mod tsql;
90
91pub use generic::GenericDialect; // Always available
92
93#[cfg(feature = "dialect-athena")]
94pub use athena::AthenaDialect;
95#[cfg(feature = "dialect-bigquery")]
96pub use bigquery::BigQueryDialect;
97#[cfg(feature = "dialect-clickhouse")]
98pub use clickhouse::ClickHouseDialect;
99#[cfg(feature = "dialect-cockroachdb")]
100pub use cockroachdb::CockroachDBDialect;
101#[cfg(feature = "dialect-databricks")]
102pub use databricks::DatabricksDialect;
103#[cfg(feature = "dialect-datafusion")]
104pub use datafusion::DataFusionDialect;
105#[cfg(feature = "dialect-doris")]
106pub use doris::DorisDialect;
107#[cfg(feature = "dialect-dremio")]
108pub use dremio::DremioDialect;
109#[cfg(feature = "dialect-drill")]
110pub use drill::DrillDialect;
111#[cfg(feature = "dialect-druid")]
112pub use druid::DruidDialect;
113#[cfg(feature = "dialect-duckdb")]
114pub use duckdb::DuckDBDialect;
115#[cfg(feature = "dialect-dune")]
116pub use dune::DuneDialect;
117#[cfg(feature = "dialect-exasol")]
118pub use exasol::ExasolDialect;
119#[cfg(feature = "dialect-fabric")]
120pub use fabric::FabricDialect;
121#[cfg(feature = "dialect-hive")]
122pub use hive::HiveDialect;
123#[cfg(feature = "dialect-materialize")]
124pub use materialize::MaterializeDialect;
125#[cfg(feature = "dialect-mysql")]
126pub use mysql::MySQLDialect;
127#[cfg(feature = "dialect-oracle")]
128pub use oracle::OracleDialect;
129#[cfg(feature = "dialect-postgresql")]
130pub use postgres::PostgresDialect;
131#[cfg(feature = "dialect-presto")]
132pub use presto::PrestoDialect;
133#[cfg(feature = "dialect-redshift")]
134pub use redshift::RedshiftDialect;
135#[cfg(feature = "dialect-risingwave")]
136pub use risingwave::RisingWaveDialect;
137#[cfg(feature = "dialect-singlestore")]
138pub use singlestore::SingleStoreDialect;
139#[cfg(feature = "dialect-snowflake")]
140pub use snowflake::SnowflakeDialect;
141#[cfg(feature = "dialect-solr")]
142pub use solr::SolrDialect;
143#[cfg(feature = "dialect-spark")]
144pub use spark::SparkDialect;
145#[cfg(feature = "dialect-sqlite")]
146pub use sqlite::SQLiteDialect;
147#[cfg(feature = "dialect-starrocks")]
148pub use starrocks::StarRocksDialect;
149#[cfg(feature = "dialect-tableau")]
150pub use tableau::TableauDialect;
151#[cfg(feature = "dialect-teradata")]
152pub use teradata::TeradataDialect;
153#[cfg(feature = "dialect-tidb")]
154pub use tidb::TiDBDialect;
155#[cfg(feature = "dialect-trino")]
156pub use trino::TrinoDialect;
157#[cfg(feature = "dialect-tsql")]
158pub use tsql::TSQLDialect;
159
160use crate::error::Result;
161use crate::expressions::{Expression, FunctionBody, Null};
162use crate::generator::{Generator, GeneratorConfig};
163use crate::parser::Parser;
164use crate::tokens::{Token, Tokenizer, TokenizerConfig};
165use serde::{Deserialize, Serialize};
166use std::collections::HashMap;
167use std::sync::{Arc, LazyLock, RwLock};
168
169/// Enumeration of all supported SQL dialects.
170///
171/// Each variant corresponds to a specific SQL database engine or query language.
172/// The `Generic` variant represents standard SQL with no dialect-specific behavior,
173/// and is used as the default when no dialect is specified.
174///
175/// Dialect names are case-insensitive when parsed from strings via [`FromStr`].
176/// Some dialects accept aliases (e.g., "mssql" and "sqlserver" both resolve to [`TSQL`](DialectType::TSQL)).
177#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
178#[serde(rename_all = "lowercase")]
179pub enum DialectType {
180 /// Standard SQL with no dialect-specific behavior (default).
181 Generic,
182 /// PostgreSQL -- advanced open-source relational database.
183 PostgreSQL,
184 /// MySQL -- widely-used open-source relational database (also accepts "mysql").
185 MySQL,
186 /// Google BigQuery -- serverless cloud data warehouse with unique syntax (backtick quoting, STRUCT types, QUALIFY).
187 BigQuery,
188 /// Snowflake -- cloud data platform with QUALIFY clause, FLATTEN, and variant types.
189 Snowflake,
190 /// DuckDB -- in-process analytical database with modern SQL extensions.
191 DuckDB,
192 /// SQLite -- lightweight embedded relational database.
193 SQLite,
194 /// Apache Hive -- data warehouse on Hadoop with HiveQL syntax.
195 Hive,
196 /// Apache Spark SQL -- distributed query engine (also accepts "spark2").
197 Spark,
198 /// Trino -- distributed SQL query engine (formerly PrestoSQL).
199 Trino,
200 /// PrestoDB -- distributed SQL query engine for big data.
201 Presto,
202 /// Amazon Redshift -- cloud data warehouse based on PostgreSQL.
203 Redshift,
204 /// Transact-SQL (T-SQL) -- Microsoft SQL Server and Azure SQL (also accepts "mssql", "sqlserver").
205 TSQL,
206 /// Oracle Database -- commercial relational database with PL/SQL extensions.
207 Oracle,
208 /// ClickHouse -- column-oriented OLAP database for real-time analytics.
209 ClickHouse,
210 /// Databricks SQL -- Spark-based lakehouse platform with QUALIFY support.
211 Databricks,
212 /// Amazon Athena -- serverless query service (hybrid Trino/Hive engine).
213 Athena,
214 /// Teradata -- enterprise data warehouse with proprietary SQL extensions.
215 Teradata,
216 /// Apache Doris -- real-time analytical database (MySQL-compatible).
217 Doris,
218 /// StarRocks -- sub-second OLAP database (MySQL-compatible).
219 StarRocks,
220 /// Materialize -- streaming SQL database built on differential dataflow.
221 Materialize,
222 /// RisingWave -- distributed streaming database with PostgreSQL compatibility.
223 RisingWave,
224 /// SingleStore (formerly MemSQL) -- distributed SQL database (also accepts "memsql").
225 SingleStore,
226 /// CockroachDB -- distributed SQL database with PostgreSQL compatibility (also accepts "cockroach").
227 CockroachDB,
228 /// TiDB -- distributed HTAP database with MySQL compatibility.
229 TiDB,
230 /// Apache Druid -- real-time analytics database.
231 Druid,
232 /// Apache Solr -- search platform with SQL interface.
233 Solr,
234 /// Tableau -- data visualization platform with its own SQL dialect.
235 Tableau,
236 /// Dune Analytics -- blockchain analytics SQL engine.
237 Dune,
238 /// Microsoft Fabric -- unified analytics platform (T-SQL based).
239 Fabric,
240 /// Apache Drill -- schema-free SQL query engine for big data.
241 Drill,
242 /// Dremio -- data lakehouse platform with Arrow-based query engine.
243 Dremio,
244 /// Exasol -- in-memory analytic database.
245 Exasol,
246 /// Apache DataFusion -- Arrow-based query engine with modern SQL extensions.
247 DataFusion,
248}
249
250impl Default for DialectType {
251 fn default() -> Self {
252 DialectType::Generic
253 }
254}
255
256impl std::fmt::Display for DialectType {
257 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
258 match self {
259 DialectType::Generic => write!(f, "generic"),
260 DialectType::PostgreSQL => write!(f, "postgresql"),
261 DialectType::MySQL => write!(f, "mysql"),
262 DialectType::BigQuery => write!(f, "bigquery"),
263 DialectType::Snowflake => write!(f, "snowflake"),
264 DialectType::DuckDB => write!(f, "duckdb"),
265 DialectType::SQLite => write!(f, "sqlite"),
266 DialectType::Hive => write!(f, "hive"),
267 DialectType::Spark => write!(f, "spark"),
268 DialectType::Trino => write!(f, "trino"),
269 DialectType::Presto => write!(f, "presto"),
270 DialectType::Redshift => write!(f, "redshift"),
271 DialectType::TSQL => write!(f, "tsql"),
272 DialectType::Oracle => write!(f, "oracle"),
273 DialectType::ClickHouse => write!(f, "clickhouse"),
274 DialectType::Databricks => write!(f, "databricks"),
275 DialectType::Athena => write!(f, "athena"),
276 DialectType::Teradata => write!(f, "teradata"),
277 DialectType::Doris => write!(f, "doris"),
278 DialectType::StarRocks => write!(f, "starrocks"),
279 DialectType::Materialize => write!(f, "materialize"),
280 DialectType::RisingWave => write!(f, "risingwave"),
281 DialectType::SingleStore => write!(f, "singlestore"),
282 DialectType::CockroachDB => write!(f, "cockroachdb"),
283 DialectType::TiDB => write!(f, "tidb"),
284 DialectType::Druid => write!(f, "druid"),
285 DialectType::Solr => write!(f, "solr"),
286 DialectType::Tableau => write!(f, "tableau"),
287 DialectType::Dune => write!(f, "dune"),
288 DialectType::Fabric => write!(f, "fabric"),
289 DialectType::Drill => write!(f, "drill"),
290 DialectType::Dremio => write!(f, "dremio"),
291 DialectType::Exasol => write!(f, "exasol"),
292 DialectType::DataFusion => write!(f, "datafusion"),
293 }
294 }
295}
296
297impl std::str::FromStr for DialectType {
298 type Err = crate::error::Error;
299
300 fn from_str(s: &str) -> Result<Self> {
301 match s.to_ascii_lowercase().as_str() {
302 "generic" | "" => Ok(DialectType::Generic),
303 "postgres" | "postgresql" => Ok(DialectType::PostgreSQL),
304 "mysql" => Ok(DialectType::MySQL),
305 "bigquery" => Ok(DialectType::BigQuery),
306 "snowflake" => Ok(DialectType::Snowflake),
307 "duckdb" => Ok(DialectType::DuckDB),
308 "sqlite" => Ok(DialectType::SQLite),
309 "hive" => Ok(DialectType::Hive),
310 "spark" | "spark2" => Ok(DialectType::Spark),
311 "trino" => Ok(DialectType::Trino),
312 "presto" => Ok(DialectType::Presto),
313 "redshift" => Ok(DialectType::Redshift),
314 "tsql" | "mssql" | "sqlserver" => Ok(DialectType::TSQL),
315 "oracle" => Ok(DialectType::Oracle),
316 "clickhouse" => Ok(DialectType::ClickHouse),
317 "databricks" => Ok(DialectType::Databricks),
318 "athena" => Ok(DialectType::Athena),
319 "teradata" => Ok(DialectType::Teradata),
320 "doris" => Ok(DialectType::Doris),
321 "starrocks" => Ok(DialectType::StarRocks),
322 "materialize" => Ok(DialectType::Materialize),
323 "risingwave" => Ok(DialectType::RisingWave),
324 "singlestore" | "memsql" => Ok(DialectType::SingleStore),
325 "cockroachdb" | "cockroach" => Ok(DialectType::CockroachDB),
326 "tidb" => Ok(DialectType::TiDB),
327 "druid" => Ok(DialectType::Druid),
328 "solr" => Ok(DialectType::Solr),
329 "tableau" => Ok(DialectType::Tableau),
330 "dune" => Ok(DialectType::Dune),
331 "fabric" => Ok(DialectType::Fabric),
332 "drill" => Ok(DialectType::Drill),
333 "dremio" => Ok(DialectType::Dremio),
334 "exasol" => Ok(DialectType::Exasol),
335 "datafusion" | "arrow-datafusion" | "arrow_datafusion" => Ok(DialectType::DataFusion),
336 _ => Err(crate::error::Error::parse(
337 format!("Unknown dialect: {}", s),
338 0,
339 0,
340 0,
341 0,
342 )),
343 }
344 }
345}
346
347/// Trait that each concrete SQL dialect must implement.
348///
349/// `DialectImpl` provides the configuration hooks and per-expression transform logic
350/// that distinguish one dialect from another. Implementors supply:
351///
352/// - A [`DialectType`] identifier.
353/// - Optional overrides for tokenizer and generator configuration (defaults to generic SQL).
354/// - An expression-level transform function ([`transform_expr`](DialectImpl::transform_expr))
355/// that rewrites individual AST nodes for this dialect (e.g., converting `NVL` to `COALESCE`).
356/// - An optional preprocessing step ([`preprocess`](DialectImpl::preprocess)) for whole-tree
357/// rewrites that must run before the recursive per-node transform (e.g., eliminating QUALIFY).
358///
359/// The default implementations are no-ops, so a minimal dialect only needs to provide
360/// [`dialect_type`](DialectImpl::dialect_type) and override the methods that differ from
361/// standard SQL.
362pub trait DialectImpl {
363 /// Returns the [`DialectType`] that identifies this dialect.
364 fn dialect_type(&self) -> DialectType;
365
366 /// Returns the tokenizer configuration for this dialect.
367 ///
368 /// Override to customize identifier quoting characters, string escape rules,
369 /// comment styles, and other lexing behavior.
370 fn tokenizer_config(&self) -> TokenizerConfig {
371 TokenizerConfig::default()
372 }
373
374 /// Returns the generator configuration for this dialect.
375 ///
376 /// Override to customize identifier quoting style, function name casing,
377 /// keyword casing, and other SQL generation behavior.
378 fn generator_config(&self) -> GeneratorConfig {
379 GeneratorConfig::default()
380 }
381
382 /// Returns a generator configuration tailored to a specific expression.
383 ///
384 /// Override this for hybrid dialects like Athena that route to different SQL engines
385 /// based on expression type (e.g., Hive-style generation for DDL, Trino-style for DML).
386 /// The default delegates to [`generator_config`](DialectImpl::generator_config).
387 fn generator_config_for_expr(&self, _expr: &Expression) -> GeneratorConfig {
388 self.generator_config()
389 }
390
391 /// Transforms a single expression node for this dialect, without recursing into children.
392 ///
393 /// This is the per-node rewrite hook invoked by [`transform_recursive`]. Return the
394 /// expression unchanged if no dialect-specific rewrite is needed. Transformations
395 /// typically include function renaming, operator substitution, and type mapping.
396 fn transform_expr(&self, expr: Expression) -> Result<Expression> {
397 Ok(expr)
398 }
399
400 /// Applies whole-tree preprocessing transforms before the recursive per-node pass.
401 ///
402 /// Override this to apply structural rewrites that must see the entire tree at once,
403 /// such as `eliminate_qualify`, `eliminate_distinct_on`, `ensure_bools`, or
404 /// `explode_projection_to_unnest`. The default is a no-op pass-through.
405 fn preprocess(&self, expr: Expression) -> Result<Expression> {
406 Ok(expr)
407 }
408}
409
410/// Recursively transforms a [`DataType`](crate::expressions::DataType), handling nested
411/// parametric types such as `ARRAY<INT>`, `STRUCT<a INT, b TEXT>`, and `MAP<STRING, INT>`.
412///
413/// The outer type is first passed through `transform_fn` as an `Expression::DataType`,
414/// and then nested element/field types are recursed into. This ensures that dialect-level
415/// type mappings (e.g., `INT` to `INTEGER`) propagate into complex nested types.
416fn transform_data_type_recursive<F>(
417 dt: crate::expressions::DataType,
418 transform_fn: &F,
419) -> Result<crate::expressions::DataType>
420where
421 F: Fn(Expression) -> Result<Expression>,
422{
423 use crate::expressions::DataType;
424 // First, transform the outermost type through the expression system
425 let dt_expr = transform_fn(Expression::DataType(dt))?;
426 let dt = match dt_expr {
427 Expression::DataType(d) => d,
428 _ => {
429 return Ok(match dt_expr {
430 _ => DataType::Custom {
431 name: "UNKNOWN".to_string(),
432 },
433 })
434 }
435 };
436 // Then recurse into nested types
437 match dt {
438 DataType::Array {
439 element_type,
440 dimension,
441 } => {
442 let inner = transform_data_type_recursive(*element_type, transform_fn)?;
443 Ok(DataType::Array {
444 element_type: Box::new(inner),
445 dimension,
446 })
447 }
448 DataType::List { element_type } => {
449 let inner = transform_data_type_recursive(*element_type, transform_fn)?;
450 Ok(DataType::List {
451 element_type: Box::new(inner),
452 })
453 }
454 DataType::Struct { fields, nested } => {
455 let mut new_fields = Vec::new();
456 for mut field in fields {
457 field.data_type = transform_data_type_recursive(field.data_type, transform_fn)?;
458 new_fields.push(field);
459 }
460 Ok(DataType::Struct {
461 fields: new_fields,
462 nested,
463 })
464 }
465 DataType::Map {
466 key_type,
467 value_type,
468 } => {
469 let k = transform_data_type_recursive(*key_type, transform_fn)?;
470 let v = transform_data_type_recursive(*value_type, transform_fn)?;
471 Ok(DataType::Map {
472 key_type: Box::new(k),
473 value_type: Box::new(v),
474 })
475 }
476 other => Ok(other),
477 }
478}
479
480/// Convert DuckDB C-style format strings to Presto C-style format strings.
481/// DuckDB and Presto both use C-style % directives but with different specifiers for some cases.
482#[cfg(feature = "transpile")]
483fn duckdb_to_presto_format(fmt: &str) -> String {
484 // Order matters: handle longer patterns first to avoid partial replacements
485 let mut result = fmt.to_string();
486 // First pass: mark multi-char patterns with placeholders
487 result = result.replace("%-m", "\x01NOPADM\x01");
488 result = result.replace("%-d", "\x01NOPADD\x01");
489 result = result.replace("%-I", "\x01NOPADI\x01");
490 result = result.replace("%-H", "\x01NOPADH\x01");
491 result = result.replace("%H:%M:%S", "\x01HMS\x01");
492 result = result.replace("%Y-%m-%d", "\x01YMD\x01");
493 // Now convert individual specifiers
494 result = result.replace("%M", "%i");
495 result = result.replace("%S", "%s");
496 // Restore multi-char patterns with Presto equivalents
497 result = result.replace("\x01NOPADM\x01", "%c");
498 result = result.replace("\x01NOPADD\x01", "%e");
499 result = result.replace("\x01NOPADI\x01", "%l");
500 result = result.replace("\x01NOPADH\x01", "%k");
501 result = result.replace("\x01HMS\x01", "%T");
502 result = result.replace("\x01YMD\x01", "%Y-%m-%d");
503 result
504}
505
506/// Convert DuckDB C-style format strings to BigQuery format strings.
507/// BigQuery uses a mix of strftime-like directives.
508#[cfg(feature = "transpile")]
509fn duckdb_to_bigquery_format(fmt: &str) -> String {
510 let mut result = fmt.to_string();
511 // Handle longer patterns first
512 result = result.replace("%-d", "%e");
513 result = result.replace("%Y-%m-%d %H:%M:%S", "%F %T");
514 result = result.replace("%Y-%m-%d", "%F");
515 result = result.replace("%H:%M:%S", "%T");
516 result
517}
518
519#[derive(Debug)]
520enum TransformTask {
521 Visit(Expression),
522 Finish(FinishTask),
523}
524
525#[derive(Debug)]
526enum FinishTask {
527 Unary(Expression),
528 Binary(Expression),
529 CastLike(Expression),
530 List(Expression, usize),
531 From(crate::expressions::From, usize),
532 Select(SelectFrame),
533 SetOp(Expression),
534}
535
536#[derive(Debug)]
537struct SelectFrame {
538 select: Box<crate::expressions::Select>,
539 expr_count: usize,
540 from_present: bool,
541 where_present: bool,
542 group_by_count: usize,
543 having_present: bool,
544 qualify_present: bool,
545}
546
547fn transform_pop_result(results: &mut Vec<Expression>) -> Result<Expression> {
548 results
549 .pop()
550 .ok_or_else(|| crate::error::Error::Internal("transform stack underflow".to_string()))
551}
552
553fn transform_pop_results(results: &mut Vec<Expression>, count: usize) -> Result<Vec<Expression>> {
554 if results.len() < count {
555 return Err(crate::error::Error::Internal(
556 "transform result stack underflow".to_string(),
557 ));
558 }
559 Ok(results.split_off(results.len() - count))
560}
561
562/// Applies a transform function bottom-up through an entire expression tree.
563///
564/// The public entrypoint uses an explicit task stack for the recursion-heavy shapes
565/// that dominate deeply nested SQL (nested SELECT/FROM/SUBQUERY chains, set-operation
566/// trees, and common binary/unary expression chains). Less common shapes currently
567/// reuse the reference recursive implementation so semantics stay identical while
568/// the hot path avoids stack growth.
569pub fn transform_recursive<F>(expr: Expression, transform_fn: &F) -> Result<Expression>
570where
571 F: Fn(Expression) -> Result<Expression>,
572{
573 #[cfg(feature = "stacker")]
574 {
575 let red_zone = if cfg!(debug_assertions) {
576 4 * 1024 * 1024
577 } else {
578 1024 * 1024
579 };
580 stacker::maybe_grow(red_zone, 8 * 1024 * 1024, move || {
581 transform_recursive_inner(expr, transform_fn)
582 })
583 }
584 #[cfg(not(feature = "stacker"))]
585 {
586 transform_recursive_inner(expr, transform_fn)
587 }
588}
589
590fn transform_recursive_inner<F>(expr: Expression, transform_fn: &F) -> Result<Expression>
591where
592 F: Fn(Expression) -> Result<Expression>,
593{
594 let mut tasks = vec![TransformTask::Visit(expr)];
595 let mut results = Vec::new();
596
597 while let Some(task) = tasks.pop() {
598 match task {
599 TransformTask::Visit(expr) => {
600 if matches!(
601 &expr,
602 Expression::Literal(_)
603 | Expression::Boolean(_)
604 | Expression::Null(_)
605 | Expression::Identifier(_)
606 | Expression::Star(_)
607 | Expression::Parameter(_)
608 | Expression::Placeholder(_)
609 | Expression::SessionParameter(_)
610 ) {
611 results.push(transform_fn(expr)?);
612 continue;
613 }
614
615 match expr {
616 Expression::Alias(mut alias) => {
617 let child = std::mem::replace(&mut alias.this, Expression::Null(Null));
618 tasks.push(TransformTask::Finish(FinishTask::Unary(Expression::Alias(
619 alias,
620 ))));
621 tasks.push(TransformTask::Visit(child));
622 }
623 Expression::Paren(mut paren) => {
624 let child = std::mem::replace(&mut paren.this, Expression::Null(Null));
625 tasks.push(TransformTask::Finish(FinishTask::Unary(Expression::Paren(
626 paren,
627 ))));
628 tasks.push(TransformTask::Visit(child));
629 }
630 Expression::Not(mut not) => {
631 let child = std::mem::replace(&mut not.this, Expression::Null(Null));
632 tasks.push(TransformTask::Finish(FinishTask::Unary(Expression::Not(
633 not,
634 ))));
635 tasks.push(TransformTask::Visit(child));
636 }
637 Expression::Neg(mut neg) => {
638 let child = std::mem::replace(&mut neg.this, Expression::Null(Null));
639 tasks.push(TransformTask::Finish(FinishTask::Unary(Expression::Neg(
640 neg,
641 ))));
642 tasks.push(TransformTask::Visit(child));
643 }
644 Expression::IsNull(mut expr) => {
645 let child = std::mem::replace(&mut expr.this, Expression::Null(Null));
646 tasks.push(TransformTask::Finish(FinishTask::Unary(
647 Expression::IsNull(expr),
648 )));
649 tasks.push(TransformTask::Visit(child));
650 }
651 Expression::IsTrue(mut expr) => {
652 let child = std::mem::replace(&mut expr.this, Expression::Null(Null));
653 tasks.push(TransformTask::Finish(FinishTask::Unary(
654 Expression::IsTrue(expr),
655 )));
656 tasks.push(TransformTask::Visit(child));
657 }
658 Expression::IsFalse(mut expr) => {
659 let child = std::mem::replace(&mut expr.this, Expression::Null(Null));
660 tasks.push(TransformTask::Finish(FinishTask::Unary(
661 Expression::IsFalse(expr),
662 )));
663 tasks.push(TransformTask::Visit(child));
664 }
665 Expression::Subquery(mut subquery) => {
666 let child = std::mem::replace(&mut subquery.this, Expression::Null(Null));
667 tasks.push(TransformTask::Finish(FinishTask::Unary(
668 Expression::Subquery(subquery),
669 )));
670 tasks.push(TransformTask::Visit(child));
671 }
672 Expression::Exists(mut exists) => {
673 let child = std::mem::replace(&mut exists.this, Expression::Null(Null));
674 tasks.push(TransformTask::Finish(FinishTask::Unary(
675 Expression::Exists(exists),
676 )));
677 tasks.push(TransformTask::Visit(child));
678 }
679 Expression::TableArgument(mut arg) => {
680 let child = std::mem::replace(&mut arg.this, Expression::Null(Null));
681 tasks.push(TransformTask::Finish(FinishTask::Unary(
682 Expression::TableArgument(arg),
683 )));
684 tasks.push(TransformTask::Visit(child));
685 }
686 Expression::And(mut op) => {
687 let right = std::mem::replace(&mut op.right, Expression::Null(Null));
688 let left = std::mem::replace(&mut op.left, Expression::Null(Null));
689 tasks.push(TransformTask::Finish(FinishTask::Binary(Expression::And(
690 op,
691 ))));
692 tasks.push(TransformTask::Visit(right));
693 tasks.push(TransformTask::Visit(left));
694 }
695 Expression::Or(mut op) => {
696 let right = std::mem::replace(&mut op.right, Expression::Null(Null));
697 let left = std::mem::replace(&mut op.left, Expression::Null(Null));
698 tasks.push(TransformTask::Finish(FinishTask::Binary(Expression::Or(
699 op,
700 ))));
701 tasks.push(TransformTask::Visit(right));
702 tasks.push(TransformTask::Visit(left));
703 }
704 Expression::Add(mut op) => {
705 let right = std::mem::replace(&mut op.right, Expression::Null(Null));
706 let left = std::mem::replace(&mut op.left, Expression::Null(Null));
707 tasks.push(TransformTask::Finish(FinishTask::Binary(Expression::Add(
708 op,
709 ))));
710 tasks.push(TransformTask::Visit(right));
711 tasks.push(TransformTask::Visit(left));
712 }
713 Expression::Sub(mut op) => {
714 let right = std::mem::replace(&mut op.right, Expression::Null(Null));
715 let left = std::mem::replace(&mut op.left, Expression::Null(Null));
716 tasks.push(TransformTask::Finish(FinishTask::Binary(Expression::Sub(
717 op,
718 ))));
719 tasks.push(TransformTask::Visit(right));
720 tasks.push(TransformTask::Visit(left));
721 }
722 Expression::Mul(mut op) => {
723 let right = std::mem::replace(&mut op.right, Expression::Null(Null));
724 let left = std::mem::replace(&mut op.left, Expression::Null(Null));
725 tasks.push(TransformTask::Finish(FinishTask::Binary(Expression::Mul(
726 op,
727 ))));
728 tasks.push(TransformTask::Visit(right));
729 tasks.push(TransformTask::Visit(left));
730 }
731 Expression::Div(mut op) => {
732 let right = std::mem::replace(&mut op.right, Expression::Null(Null));
733 let left = std::mem::replace(&mut op.left, Expression::Null(Null));
734 tasks.push(TransformTask::Finish(FinishTask::Binary(Expression::Div(
735 op,
736 ))));
737 tasks.push(TransformTask::Visit(right));
738 tasks.push(TransformTask::Visit(left));
739 }
740 Expression::Eq(mut op) => {
741 let right = std::mem::replace(&mut op.right, Expression::Null(Null));
742 let left = std::mem::replace(&mut op.left, Expression::Null(Null));
743 tasks.push(TransformTask::Finish(FinishTask::Binary(Expression::Eq(
744 op,
745 ))));
746 tasks.push(TransformTask::Visit(right));
747 tasks.push(TransformTask::Visit(left));
748 }
749 Expression::Lt(mut op) => {
750 let right = std::mem::replace(&mut op.right, Expression::Null(Null));
751 let left = std::mem::replace(&mut op.left, Expression::Null(Null));
752 tasks.push(TransformTask::Finish(FinishTask::Binary(Expression::Lt(
753 op,
754 ))));
755 tasks.push(TransformTask::Visit(right));
756 tasks.push(TransformTask::Visit(left));
757 }
758 Expression::Gt(mut op) => {
759 let right = std::mem::replace(&mut op.right, Expression::Null(Null));
760 let left = std::mem::replace(&mut op.left, Expression::Null(Null));
761 tasks.push(TransformTask::Finish(FinishTask::Binary(Expression::Gt(
762 op,
763 ))));
764 tasks.push(TransformTask::Visit(right));
765 tasks.push(TransformTask::Visit(left));
766 }
767 Expression::Neq(mut op) => {
768 let right = std::mem::replace(&mut op.right, Expression::Null(Null));
769 let left = std::mem::replace(&mut op.left, Expression::Null(Null));
770 tasks.push(TransformTask::Finish(FinishTask::Binary(Expression::Neq(
771 op,
772 ))));
773 tasks.push(TransformTask::Visit(right));
774 tasks.push(TransformTask::Visit(left));
775 }
776 Expression::Lte(mut op) => {
777 let right = std::mem::replace(&mut op.right, Expression::Null(Null));
778 let left = std::mem::replace(&mut op.left, Expression::Null(Null));
779 tasks.push(TransformTask::Finish(FinishTask::Binary(Expression::Lte(
780 op,
781 ))));
782 tasks.push(TransformTask::Visit(right));
783 tasks.push(TransformTask::Visit(left));
784 }
785 Expression::Gte(mut op) => {
786 let right = std::mem::replace(&mut op.right, Expression::Null(Null));
787 let left = std::mem::replace(&mut op.left, Expression::Null(Null));
788 tasks.push(TransformTask::Finish(FinishTask::Binary(Expression::Gte(
789 op,
790 ))));
791 tasks.push(TransformTask::Visit(right));
792 tasks.push(TransformTask::Visit(left));
793 }
794 Expression::Mod(mut op) => {
795 let right = std::mem::replace(&mut op.right, Expression::Null(Null));
796 let left = std::mem::replace(&mut op.left, Expression::Null(Null));
797 tasks.push(TransformTask::Finish(FinishTask::Binary(Expression::Mod(
798 op,
799 ))));
800 tasks.push(TransformTask::Visit(right));
801 tasks.push(TransformTask::Visit(left));
802 }
803 Expression::Concat(mut op) => {
804 let right = std::mem::replace(&mut op.right, Expression::Null(Null));
805 let left = std::mem::replace(&mut op.left, Expression::Null(Null));
806 tasks.push(TransformTask::Finish(FinishTask::Binary(
807 Expression::Concat(op),
808 )));
809 tasks.push(TransformTask::Visit(right));
810 tasks.push(TransformTask::Visit(left));
811 }
812 Expression::BitwiseAnd(mut op) => {
813 let right = std::mem::replace(&mut op.right, Expression::Null(Null));
814 let left = std::mem::replace(&mut op.left, Expression::Null(Null));
815 tasks.push(TransformTask::Finish(FinishTask::Binary(
816 Expression::BitwiseAnd(op),
817 )));
818 tasks.push(TransformTask::Visit(right));
819 tasks.push(TransformTask::Visit(left));
820 }
821 Expression::BitwiseOr(mut op) => {
822 let right = std::mem::replace(&mut op.right, Expression::Null(Null));
823 let left = std::mem::replace(&mut op.left, Expression::Null(Null));
824 tasks.push(TransformTask::Finish(FinishTask::Binary(
825 Expression::BitwiseOr(op),
826 )));
827 tasks.push(TransformTask::Visit(right));
828 tasks.push(TransformTask::Visit(left));
829 }
830 Expression::BitwiseXor(mut op) => {
831 let right = std::mem::replace(&mut op.right, Expression::Null(Null));
832 let left = std::mem::replace(&mut op.left, Expression::Null(Null));
833 tasks.push(TransformTask::Finish(FinishTask::Binary(
834 Expression::BitwiseXor(op),
835 )));
836 tasks.push(TransformTask::Visit(right));
837 tasks.push(TransformTask::Visit(left));
838 }
839 Expression::Is(mut op) => {
840 let right = std::mem::replace(&mut op.right, Expression::Null(Null));
841 let left = std::mem::replace(&mut op.left, Expression::Null(Null));
842 tasks.push(TransformTask::Finish(FinishTask::Binary(Expression::Is(
843 op,
844 ))));
845 tasks.push(TransformTask::Visit(right));
846 tasks.push(TransformTask::Visit(left));
847 }
848 Expression::MemberOf(mut op) => {
849 let right = std::mem::replace(&mut op.right, Expression::Null(Null));
850 let left = std::mem::replace(&mut op.left, Expression::Null(Null));
851 tasks.push(TransformTask::Finish(FinishTask::Binary(
852 Expression::MemberOf(op),
853 )));
854 tasks.push(TransformTask::Visit(right));
855 tasks.push(TransformTask::Visit(left));
856 }
857 Expression::ArrayContainsAll(mut op) => {
858 let right = std::mem::replace(&mut op.right, Expression::Null(Null));
859 let left = std::mem::replace(&mut op.left, Expression::Null(Null));
860 tasks.push(TransformTask::Finish(FinishTask::Binary(
861 Expression::ArrayContainsAll(op),
862 )));
863 tasks.push(TransformTask::Visit(right));
864 tasks.push(TransformTask::Visit(left));
865 }
866 Expression::ArrayContainedBy(mut op) => {
867 let right = std::mem::replace(&mut op.right, Expression::Null(Null));
868 let left = std::mem::replace(&mut op.left, Expression::Null(Null));
869 tasks.push(TransformTask::Finish(FinishTask::Binary(
870 Expression::ArrayContainedBy(op),
871 )));
872 tasks.push(TransformTask::Visit(right));
873 tasks.push(TransformTask::Visit(left));
874 }
875 Expression::ArrayOverlaps(mut op) => {
876 let right = std::mem::replace(&mut op.right, Expression::Null(Null));
877 let left = std::mem::replace(&mut op.left, Expression::Null(Null));
878 tasks.push(TransformTask::Finish(FinishTask::Binary(
879 Expression::ArrayOverlaps(op),
880 )));
881 tasks.push(TransformTask::Visit(right));
882 tasks.push(TransformTask::Visit(left));
883 }
884 Expression::TsMatch(mut op) => {
885 let right = std::mem::replace(&mut op.right, Expression::Null(Null));
886 let left = std::mem::replace(&mut op.left, Expression::Null(Null));
887 tasks.push(TransformTask::Finish(FinishTask::Binary(
888 Expression::TsMatch(op),
889 )));
890 tasks.push(TransformTask::Visit(right));
891 tasks.push(TransformTask::Visit(left));
892 }
893 Expression::Adjacent(mut op) => {
894 let right = std::mem::replace(&mut op.right, Expression::Null(Null));
895 let left = std::mem::replace(&mut op.left, Expression::Null(Null));
896 tasks.push(TransformTask::Finish(FinishTask::Binary(
897 Expression::Adjacent(op),
898 )));
899 tasks.push(TransformTask::Visit(right));
900 tasks.push(TransformTask::Visit(left));
901 }
902 Expression::Like(mut like) => {
903 let right = std::mem::replace(&mut like.right, Expression::Null(Null));
904 let left = std::mem::replace(&mut like.left, Expression::Null(Null));
905 tasks.push(TransformTask::Finish(FinishTask::Binary(Expression::Like(
906 like,
907 ))));
908 tasks.push(TransformTask::Visit(right));
909 tasks.push(TransformTask::Visit(left));
910 }
911 Expression::ILike(mut like) => {
912 let right = std::mem::replace(&mut like.right, Expression::Null(Null));
913 let left = std::mem::replace(&mut like.left, Expression::Null(Null));
914 tasks.push(TransformTask::Finish(FinishTask::Binary(
915 Expression::ILike(like),
916 )));
917 tasks.push(TransformTask::Visit(right));
918 tasks.push(TransformTask::Visit(left));
919 }
920 Expression::Cast(mut cast) => {
921 let child = std::mem::replace(&mut cast.this, Expression::Null(Null));
922 tasks.push(TransformTask::Finish(FinishTask::CastLike(
923 Expression::Cast(cast),
924 )));
925 tasks.push(TransformTask::Visit(child));
926 }
927 Expression::TryCast(mut cast) => {
928 let child = std::mem::replace(&mut cast.this, Expression::Null(Null));
929 tasks.push(TransformTask::Finish(FinishTask::CastLike(
930 Expression::TryCast(cast),
931 )));
932 tasks.push(TransformTask::Visit(child));
933 }
934 Expression::SafeCast(mut cast) => {
935 let child = std::mem::replace(&mut cast.this, Expression::Null(Null));
936 tasks.push(TransformTask::Finish(FinishTask::CastLike(
937 Expression::SafeCast(cast),
938 )));
939 tasks.push(TransformTask::Visit(child));
940 }
941 Expression::Function(mut function) => {
942 let args = std::mem::take(&mut function.args);
943 let count = args.len();
944 tasks.push(TransformTask::Finish(FinishTask::List(
945 Expression::Function(function),
946 count,
947 )));
948 for child in args.into_iter().rev() {
949 tasks.push(TransformTask::Visit(child));
950 }
951 }
952 Expression::Array(mut array) => {
953 let expressions = std::mem::take(&mut array.expressions);
954 let count = expressions.len();
955 tasks.push(TransformTask::Finish(FinishTask::List(
956 Expression::Array(array),
957 count,
958 )));
959 for child in expressions.into_iter().rev() {
960 tasks.push(TransformTask::Visit(child));
961 }
962 }
963 Expression::Tuple(mut tuple) => {
964 let expressions = std::mem::take(&mut tuple.expressions);
965 let count = expressions.len();
966 tasks.push(TransformTask::Finish(FinishTask::List(
967 Expression::Tuple(tuple),
968 count,
969 )));
970 for child in expressions.into_iter().rev() {
971 tasks.push(TransformTask::Visit(child));
972 }
973 }
974 Expression::ArrayFunc(mut array) => {
975 let expressions = std::mem::take(&mut array.expressions);
976 let count = expressions.len();
977 tasks.push(TransformTask::Finish(FinishTask::List(
978 Expression::ArrayFunc(array),
979 count,
980 )));
981 for child in expressions.into_iter().rev() {
982 tasks.push(TransformTask::Visit(child));
983 }
984 }
985 Expression::Coalesce(mut func) => {
986 let expressions = std::mem::take(&mut func.expressions);
987 let count = expressions.len();
988 tasks.push(TransformTask::Finish(FinishTask::List(
989 Expression::Coalesce(func),
990 count,
991 )));
992 for child in expressions.into_iter().rev() {
993 tasks.push(TransformTask::Visit(child));
994 }
995 }
996 Expression::Greatest(mut func) => {
997 let expressions = std::mem::take(&mut func.expressions);
998 let count = expressions.len();
999 tasks.push(TransformTask::Finish(FinishTask::List(
1000 Expression::Greatest(func),
1001 count,
1002 )));
1003 for child in expressions.into_iter().rev() {
1004 tasks.push(TransformTask::Visit(child));
1005 }
1006 }
1007 Expression::Least(mut func) => {
1008 let expressions = std::mem::take(&mut func.expressions);
1009 let count = expressions.len();
1010 tasks.push(TransformTask::Finish(FinishTask::List(
1011 Expression::Least(func),
1012 count,
1013 )));
1014 for child in expressions.into_iter().rev() {
1015 tasks.push(TransformTask::Visit(child));
1016 }
1017 }
1018 Expression::ArrayConcat(mut func) => {
1019 let expressions = std::mem::take(&mut func.expressions);
1020 let count = expressions.len();
1021 tasks.push(TransformTask::Finish(FinishTask::List(
1022 Expression::ArrayConcat(func),
1023 count,
1024 )));
1025 for child in expressions.into_iter().rev() {
1026 tasks.push(TransformTask::Visit(child));
1027 }
1028 }
1029 Expression::ArrayIntersect(mut func) => {
1030 let expressions = std::mem::take(&mut func.expressions);
1031 let count = expressions.len();
1032 tasks.push(TransformTask::Finish(FinishTask::List(
1033 Expression::ArrayIntersect(func),
1034 count,
1035 )));
1036 for child in expressions.into_iter().rev() {
1037 tasks.push(TransformTask::Visit(child));
1038 }
1039 }
1040 Expression::ArrayZip(mut func) => {
1041 let expressions = std::mem::take(&mut func.expressions);
1042 let count = expressions.len();
1043 tasks.push(TransformTask::Finish(FinishTask::List(
1044 Expression::ArrayZip(func),
1045 count,
1046 )));
1047 for child in expressions.into_iter().rev() {
1048 tasks.push(TransformTask::Visit(child));
1049 }
1050 }
1051 Expression::MapConcat(mut func) => {
1052 let expressions = std::mem::take(&mut func.expressions);
1053 let count = expressions.len();
1054 tasks.push(TransformTask::Finish(FinishTask::List(
1055 Expression::MapConcat(func),
1056 count,
1057 )));
1058 for child in expressions.into_iter().rev() {
1059 tasks.push(TransformTask::Visit(child));
1060 }
1061 }
1062 Expression::JsonArray(mut func) => {
1063 let expressions = std::mem::take(&mut func.expressions);
1064 let count = expressions.len();
1065 tasks.push(TransformTask::Finish(FinishTask::List(
1066 Expression::JsonArray(func),
1067 count,
1068 )));
1069 for child in expressions.into_iter().rev() {
1070 tasks.push(TransformTask::Visit(child));
1071 }
1072 }
1073 Expression::From(mut from) => {
1074 let expressions = std::mem::take(&mut from.expressions);
1075 let count = expressions.len();
1076 tasks.push(TransformTask::Finish(FinishTask::From(*from, count)));
1077 for child in expressions.into_iter().rev() {
1078 tasks.push(TransformTask::Visit(child));
1079 }
1080 }
1081 Expression::Select(mut select) => {
1082 let expressions = std::mem::take(&mut select.expressions);
1083 let expr_count = expressions.len();
1084
1085 let from_info = select.from.take().map(|mut from| {
1086 let children = std::mem::take(&mut from.expressions);
1087 (from, children)
1088 });
1089 let from_present = from_info.is_some();
1090
1091 let where_child = select.where_clause.as_mut().map(|where_clause| {
1092 std::mem::replace(&mut where_clause.this, Expression::Null(Null))
1093 });
1094 let where_present = where_child.is_some();
1095
1096 let group_expressions = select
1097 .group_by
1098 .as_mut()
1099 .map(|group_by| std::mem::take(&mut group_by.expressions))
1100 .unwrap_or_default();
1101 let group_by_count = group_expressions.len();
1102
1103 let having_child = select.having.as_mut().map(|having| {
1104 std::mem::replace(&mut having.this, Expression::Null(Null))
1105 });
1106 let having_present = having_child.is_some();
1107
1108 let qualify_child = select.qualify.as_mut().map(|qualify| {
1109 std::mem::replace(&mut qualify.this, Expression::Null(Null))
1110 });
1111 let qualify_present = qualify_child.is_some();
1112
1113 tasks.push(TransformTask::Finish(FinishTask::Select(SelectFrame {
1114 select,
1115 expr_count,
1116 from_present,
1117 where_present,
1118 group_by_count,
1119 having_present,
1120 qualify_present,
1121 })));
1122
1123 if let Some(child) = qualify_child {
1124 tasks.push(TransformTask::Visit(child));
1125 }
1126 if let Some(child) = having_child {
1127 tasks.push(TransformTask::Visit(child));
1128 }
1129 for child in group_expressions.into_iter().rev() {
1130 tasks.push(TransformTask::Visit(child));
1131 }
1132 if let Some(child) = where_child {
1133 tasks.push(TransformTask::Visit(child));
1134 }
1135 if let Some((from, children)) = from_info {
1136 tasks.push(TransformTask::Finish(FinishTask::From(
1137 from,
1138 children.len(),
1139 )));
1140 for child in children.into_iter().rev() {
1141 tasks.push(TransformTask::Visit(child));
1142 }
1143 }
1144 for child in expressions.into_iter().rev() {
1145 tasks.push(TransformTask::Visit(child));
1146 }
1147 }
1148 Expression::Union(mut union) => {
1149 let right = std::mem::replace(&mut union.right, Expression::Null(Null));
1150 let left = std::mem::replace(&mut union.left, Expression::Null(Null));
1151 tasks.push(TransformTask::Finish(FinishTask::SetOp(Expression::Union(
1152 union,
1153 ))));
1154 tasks.push(TransformTask::Visit(right));
1155 tasks.push(TransformTask::Visit(left));
1156 }
1157 Expression::Intersect(mut intersect) => {
1158 let right = std::mem::replace(&mut intersect.right, Expression::Null(Null));
1159 let left = std::mem::replace(&mut intersect.left, Expression::Null(Null));
1160 tasks.push(TransformTask::Finish(FinishTask::SetOp(
1161 Expression::Intersect(intersect),
1162 )));
1163 tasks.push(TransformTask::Visit(right));
1164 tasks.push(TransformTask::Visit(left));
1165 }
1166 Expression::Except(mut except) => {
1167 let right = std::mem::replace(&mut except.right, Expression::Null(Null));
1168 let left = std::mem::replace(&mut except.left, Expression::Null(Null));
1169 tasks.push(TransformTask::Finish(FinishTask::SetOp(
1170 Expression::Except(except),
1171 )));
1172 tasks.push(TransformTask::Visit(right));
1173 tasks.push(TransformTask::Visit(left));
1174 }
1175 other => {
1176 results.push(transform_recursive_reference(other, transform_fn)?);
1177 }
1178 }
1179 }
1180 TransformTask::Finish(finish) => match finish {
1181 FinishTask::Unary(expr) => {
1182 let child = transform_pop_result(&mut results)?;
1183 let rebuilt = match expr {
1184 Expression::Alias(mut alias) => {
1185 alias.this = child;
1186 Expression::Alias(alias)
1187 }
1188 Expression::Paren(mut paren) => {
1189 paren.this = child;
1190 Expression::Paren(paren)
1191 }
1192 Expression::Not(mut not) => {
1193 not.this = child;
1194 Expression::Not(not)
1195 }
1196 Expression::Neg(mut neg) => {
1197 neg.this = child;
1198 Expression::Neg(neg)
1199 }
1200 Expression::IsNull(mut expr) => {
1201 expr.this = child;
1202 Expression::IsNull(expr)
1203 }
1204 Expression::IsTrue(mut expr) => {
1205 expr.this = child;
1206 Expression::IsTrue(expr)
1207 }
1208 Expression::IsFalse(mut expr) => {
1209 expr.this = child;
1210 Expression::IsFalse(expr)
1211 }
1212 Expression::Subquery(mut subquery) => {
1213 subquery.this = child;
1214 Expression::Subquery(subquery)
1215 }
1216 Expression::Exists(mut exists) => {
1217 exists.this = child;
1218 Expression::Exists(exists)
1219 }
1220 Expression::TableArgument(mut arg) => {
1221 arg.this = child;
1222 Expression::TableArgument(arg)
1223 }
1224 _ => {
1225 return Err(crate::error::Error::Internal(
1226 "unexpected unary transform task".to_string(),
1227 ));
1228 }
1229 };
1230 results.push(transform_fn(rebuilt)?);
1231 }
1232 FinishTask::Binary(expr) => {
1233 let mut children = transform_pop_results(&mut results, 2)?.into_iter();
1234 let left = children.next().expect("left child");
1235 let right = children.next().expect("right child");
1236 let rebuilt = match expr {
1237 Expression::And(mut op) => {
1238 op.left = left;
1239 op.right = right;
1240 Expression::And(op)
1241 }
1242 Expression::Or(mut op) => {
1243 op.left = left;
1244 op.right = right;
1245 Expression::Or(op)
1246 }
1247 Expression::Add(mut op) => {
1248 op.left = left;
1249 op.right = right;
1250 Expression::Add(op)
1251 }
1252 Expression::Sub(mut op) => {
1253 op.left = left;
1254 op.right = right;
1255 Expression::Sub(op)
1256 }
1257 Expression::Mul(mut op) => {
1258 op.left = left;
1259 op.right = right;
1260 Expression::Mul(op)
1261 }
1262 Expression::Div(mut op) => {
1263 op.left = left;
1264 op.right = right;
1265 Expression::Div(op)
1266 }
1267 Expression::Eq(mut op) => {
1268 op.left = left;
1269 op.right = right;
1270 Expression::Eq(op)
1271 }
1272 Expression::Lt(mut op) => {
1273 op.left = left;
1274 op.right = right;
1275 Expression::Lt(op)
1276 }
1277 Expression::Gt(mut op) => {
1278 op.left = left;
1279 op.right = right;
1280 Expression::Gt(op)
1281 }
1282 Expression::Neq(mut op) => {
1283 op.left = left;
1284 op.right = right;
1285 Expression::Neq(op)
1286 }
1287 Expression::Lte(mut op) => {
1288 op.left = left;
1289 op.right = right;
1290 Expression::Lte(op)
1291 }
1292 Expression::Gte(mut op) => {
1293 op.left = left;
1294 op.right = right;
1295 Expression::Gte(op)
1296 }
1297 Expression::Mod(mut op) => {
1298 op.left = left;
1299 op.right = right;
1300 Expression::Mod(op)
1301 }
1302 Expression::Concat(mut op) => {
1303 op.left = left;
1304 op.right = right;
1305 Expression::Concat(op)
1306 }
1307 Expression::BitwiseAnd(mut op) => {
1308 op.left = left;
1309 op.right = right;
1310 Expression::BitwiseAnd(op)
1311 }
1312 Expression::BitwiseOr(mut op) => {
1313 op.left = left;
1314 op.right = right;
1315 Expression::BitwiseOr(op)
1316 }
1317 Expression::BitwiseXor(mut op) => {
1318 op.left = left;
1319 op.right = right;
1320 Expression::BitwiseXor(op)
1321 }
1322 Expression::Is(mut op) => {
1323 op.left = left;
1324 op.right = right;
1325 Expression::Is(op)
1326 }
1327 Expression::MemberOf(mut op) => {
1328 op.left = left;
1329 op.right = right;
1330 Expression::MemberOf(op)
1331 }
1332 Expression::ArrayContainsAll(mut op) => {
1333 op.left = left;
1334 op.right = right;
1335 Expression::ArrayContainsAll(op)
1336 }
1337 Expression::ArrayContainedBy(mut op) => {
1338 op.left = left;
1339 op.right = right;
1340 Expression::ArrayContainedBy(op)
1341 }
1342 Expression::ArrayOverlaps(mut op) => {
1343 op.left = left;
1344 op.right = right;
1345 Expression::ArrayOverlaps(op)
1346 }
1347 Expression::TsMatch(mut op) => {
1348 op.left = left;
1349 op.right = right;
1350 Expression::TsMatch(op)
1351 }
1352 Expression::Adjacent(mut op) => {
1353 op.left = left;
1354 op.right = right;
1355 Expression::Adjacent(op)
1356 }
1357 Expression::Like(mut like) => {
1358 like.left = left;
1359 like.right = right;
1360 Expression::Like(like)
1361 }
1362 Expression::ILike(mut like) => {
1363 like.left = left;
1364 like.right = right;
1365 Expression::ILike(like)
1366 }
1367 _ => {
1368 return Err(crate::error::Error::Internal(
1369 "unexpected binary transform task".to_string(),
1370 ));
1371 }
1372 };
1373 results.push(transform_fn(rebuilt)?);
1374 }
1375 FinishTask::CastLike(expr) => {
1376 let child = transform_pop_result(&mut results)?;
1377 let rebuilt = match expr {
1378 Expression::Cast(mut cast) => {
1379 cast.this = child;
1380 cast.to = transform_data_type_recursive(cast.to, transform_fn)?;
1381 Expression::Cast(cast)
1382 }
1383 Expression::TryCast(mut cast) => {
1384 cast.this = child;
1385 cast.to = transform_data_type_recursive(cast.to, transform_fn)?;
1386 Expression::TryCast(cast)
1387 }
1388 Expression::SafeCast(mut cast) => {
1389 cast.this = child;
1390 cast.to = transform_data_type_recursive(cast.to, transform_fn)?;
1391 Expression::SafeCast(cast)
1392 }
1393 _ => {
1394 return Err(crate::error::Error::Internal(
1395 "unexpected cast transform task".to_string(),
1396 ));
1397 }
1398 };
1399 results.push(transform_fn(rebuilt)?);
1400 }
1401 FinishTask::List(expr, count) => {
1402 let children = transform_pop_results(&mut results, count)?;
1403 let rebuilt = match expr {
1404 Expression::Function(mut function) => {
1405 function.args = children;
1406 Expression::Function(function)
1407 }
1408 Expression::Array(mut array) => {
1409 array.expressions = children;
1410 Expression::Array(array)
1411 }
1412 Expression::Tuple(mut tuple) => {
1413 tuple.expressions = children;
1414 Expression::Tuple(tuple)
1415 }
1416 Expression::ArrayFunc(mut array) => {
1417 array.expressions = children;
1418 Expression::ArrayFunc(array)
1419 }
1420 Expression::Coalesce(mut func) => {
1421 func.expressions = children;
1422 Expression::Coalesce(func)
1423 }
1424 Expression::Greatest(mut func) => {
1425 func.expressions = children;
1426 Expression::Greatest(func)
1427 }
1428 Expression::Least(mut func) => {
1429 func.expressions = children;
1430 Expression::Least(func)
1431 }
1432 Expression::ArrayConcat(mut func) => {
1433 func.expressions = children;
1434 Expression::ArrayConcat(func)
1435 }
1436 Expression::ArrayIntersect(mut func) => {
1437 func.expressions = children;
1438 Expression::ArrayIntersect(func)
1439 }
1440 Expression::ArrayZip(mut func) => {
1441 func.expressions = children;
1442 Expression::ArrayZip(func)
1443 }
1444 Expression::MapConcat(mut func) => {
1445 func.expressions = children;
1446 Expression::MapConcat(func)
1447 }
1448 Expression::JsonArray(mut func) => {
1449 func.expressions = children;
1450 Expression::JsonArray(func)
1451 }
1452 _ => {
1453 return Err(crate::error::Error::Internal(
1454 "unexpected list transform task".to_string(),
1455 ));
1456 }
1457 };
1458 results.push(transform_fn(rebuilt)?);
1459 }
1460 FinishTask::From(mut from, count) => {
1461 from.expressions = transform_pop_results(&mut results, count)?;
1462 results.push(transform_fn(Expression::From(Box::new(from)))?);
1463 }
1464 FinishTask::Select(frame) => {
1465 let mut select = *frame.select;
1466
1467 if frame.qualify_present {
1468 if let Some(ref mut qualify) = select.qualify {
1469 qualify.this = transform_pop_result(&mut results)?;
1470 }
1471 }
1472 if frame.having_present {
1473 if let Some(ref mut having) = select.having {
1474 having.this = transform_pop_result(&mut results)?;
1475 }
1476 }
1477 if frame.group_by_count > 0 {
1478 if let Some(ref mut group_by) = select.group_by {
1479 group_by.expressions =
1480 transform_pop_results(&mut results, frame.group_by_count)?;
1481 }
1482 }
1483 if frame.where_present {
1484 if let Some(ref mut where_clause) = select.where_clause {
1485 where_clause.this = transform_pop_result(&mut results)?;
1486 }
1487 }
1488 if frame.from_present {
1489 match transform_pop_result(&mut results)? {
1490 Expression::From(from) => {
1491 select.from = Some(*from);
1492 }
1493 _ => {
1494 return Err(crate::error::Error::Internal(
1495 "expected FROM expression result".to_string(),
1496 ));
1497 }
1498 }
1499 }
1500 select.expressions = transform_pop_results(&mut results, frame.expr_count)?;
1501
1502 select.joins = select
1503 .joins
1504 .into_iter()
1505 .map(|mut join| {
1506 join.this = transform_recursive(join.this, transform_fn)?;
1507 if let Some(on) = join.on.take() {
1508 join.on = Some(transform_recursive(on, transform_fn)?);
1509 }
1510 match transform_fn(Expression::Join(Box::new(join)))? {
1511 Expression::Join(j) => Ok(*j),
1512 _ => Err(crate::error::Error::parse(
1513 "Join transformation returned non-join expression",
1514 0,
1515 0,
1516 0,
1517 0,
1518 )),
1519 }
1520 })
1521 .collect::<Result<Vec<_>>>()?;
1522
1523 select.lateral_views = select
1524 .lateral_views
1525 .into_iter()
1526 .map(|mut lv| {
1527 lv.this = transform_recursive(lv.this, transform_fn)?;
1528 Ok(lv)
1529 })
1530 .collect::<Result<Vec<_>>>()?;
1531
1532 if let Some(mut with) = select.with.take() {
1533 with.ctes = with
1534 .ctes
1535 .into_iter()
1536 .map(|mut cte| {
1537 let original = cte.this.clone();
1538 cte.this =
1539 transform_recursive(cte.this, transform_fn).unwrap_or(original);
1540 cte
1541 })
1542 .collect();
1543 select.with = Some(with);
1544 }
1545
1546 if let Some(mut order) = select.order_by.take() {
1547 order.expressions = order
1548 .expressions
1549 .into_iter()
1550 .map(|o| {
1551 let mut o = o;
1552 let original = o.this.clone();
1553 o.this =
1554 transform_recursive(o.this, transform_fn).unwrap_or(original);
1555 match transform_fn(Expression::Ordered(Box::new(o.clone()))) {
1556 Ok(Expression::Ordered(transformed)) => *transformed,
1557 Ok(_) | Err(_) => o,
1558 }
1559 })
1560 .collect();
1561 select.order_by = Some(order);
1562 }
1563
1564 if let Some(ref mut windows) = select.windows {
1565 for nw in windows.iter_mut() {
1566 nw.spec.order_by = std::mem::take(&mut nw.spec.order_by)
1567 .into_iter()
1568 .map(|o| {
1569 let mut o = o;
1570 let original = o.this.clone();
1571 o.this = transform_recursive(o.this, transform_fn)
1572 .unwrap_or(original);
1573 match transform_fn(Expression::Ordered(Box::new(o.clone()))) {
1574 Ok(Expression::Ordered(transformed)) => *transformed,
1575 Ok(_) | Err(_) => o,
1576 }
1577 })
1578 .collect();
1579 }
1580 }
1581
1582 results.push(transform_fn(Expression::Select(Box::new(select)))?);
1583 }
1584 FinishTask::SetOp(expr) => {
1585 let mut children = transform_pop_results(&mut results, 2)?.into_iter();
1586 let left = children.next().expect("left child");
1587 let right = children.next().expect("right child");
1588
1589 let rebuilt = match expr {
1590 Expression::Union(mut union) => {
1591 union.left = left;
1592 union.right = right;
1593 if let Some(mut with) = union.with.take() {
1594 with.ctes = with
1595 .ctes
1596 .into_iter()
1597 .map(|mut cte| {
1598 let original = cte.this.clone();
1599 cte.this = transform_recursive(cte.this, transform_fn)
1600 .unwrap_or(original);
1601 cte
1602 })
1603 .collect();
1604 union.with = Some(with);
1605 }
1606 Expression::Union(union)
1607 }
1608 Expression::Intersect(mut intersect) => {
1609 intersect.left = left;
1610 intersect.right = right;
1611 if let Some(mut with) = intersect.with.take() {
1612 with.ctes = with
1613 .ctes
1614 .into_iter()
1615 .map(|mut cte| {
1616 let original = cte.this.clone();
1617 cte.this = transform_recursive(cte.this, transform_fn)
1618 .unwrap_or(original);
1619 cte
1620 })
1621 .collect();
1622 intersect.with = Some(with);
1623 }
1624 Expression::Intersect(intersect)
1625 }
1626 Expression::Except(mut except) => {
1627 except.left = left;
1628 except.right = right;
1629 if let Some(mut with) = except.with.take() {
1630 with.ctes = with
1631 .ctes
1632 .into_iter()
1633 .map(|mut cte| {
1634 let original = cte.this.clone();
1635 cte.this = transform_recursive(cte.this, transform_fn)
1636 .unwrap_or(original);
1637 cte
1638 })
1639 .collect();
1640 except.with = Some(with);
1641 }
1642 Expression::Except(except)
1643 }
1644 _ => {
1645 return Err(crate::error::Error::Internal(
1646 "unexpected set-op transform task".to_string(),
1647 ));
1648 }
1649 };
1650 results.push(transform_fn(rebuilt)?);
1651 }
1652 },
1653 }
1654 }
1655
1656 match results.len() {
1657 1 => Ok(results.pop().expect("single transform result")),
1658 _ => Err(crate::error::Error::Internal(
1659 "unexpected transform result stack size".to_string(),
1660 )),
1661 }
1662}
1663
1664fn transform_recursive_reference<F>(expr: Expression, transform_fn: &F) -> Result<Expression>
1665where
1666 F: Fn(Expression) -> Result<Expression>,
1667{
1668 use crate::expressions::BinaryOp;
1669
1670 // Helper macro to recurse into AggFunc-based expressions (this, filter, order_by, having_max, limit).
1671 macro_rules! recurse_agg {
1672 ($variant:ident, $f:expr) => {{
1673 let mut f = $f;
1674 f.this = transform_recursive(f.this, transform_fn)?;
1675 if let Some(filter) = f.filter.take() {
1676 f.filter = Some(transform_recursive(filter, transform_fn)?);
1677 }
1678 for ord in &mut f.order_by {
1679 ord.this = transform_recursive(
1680 std::mem::replace(&mut ord.this, Expression::Null(crate::expressions::Null)),
1681 transform_fn,
1682 )?;
1683 }
1684 if let Some((ref mut expr, _)) = f.having_max {
1685 *expr = Box::new(transform_recursive(
1686 std::mem::replace(expr.as_mut(), Expression::Null(crate::expressions::Null)),
1687 transform_fn,
1688 )?);
1689 }
1690 if let Some(limit) = f.limit.take() {
1691 f.limit = Some(Box::new(transform_recursive(*limit, transform_fn)?));
1692 }
1693 Expression::$variant(f)
1694 }};
1695 }
1696
1697 // Helper macro to transform binary ops with Box<BinaryOp>
1698 macro_rules! transform_binary {
1699 ($variant:ident, $op:expr) => {{
1700 let left = transform_recursive($op.left, transform_fn)?;
1701 let right = transform_recursive($op.right, transform_fn)?;
1702 Expression::$variant(Box::new(BinaryOp {
1703 left,
1704 right,
1705 left_comments: $op.left_comments,
1706 operator_comments: $op.operator_comments,
1707 trailing_comments: $op.trailing_comments,
1708 inferred_type: $op.inferred_type,
1709 }))
1710 }};
1711 }
1712
1713 // Fast path: leaf nodes never need child traversal, apply transform directly
1714 if matches!(
1715 &expr,
1716 Expression::Literal(_)
1717 | Expression::Boolean(_)
1718 | Expression::Null(_)
1719 | Expression::Identifier(_)
1720 | Expression::Star(_)
1721 | Expression::Parameter(_)
1722 | Expression::Placeholder(_)
1723 | Expression::SessionParameter(_)
1724 ) {
1725 return transform_fn(expr);
1726 }
1727
1728 // First recursively transform children, then apply the transform function
1729 let expr = match expr {
1730 Expression::Select(mut select) => {
1731 select.expressions = select
1732 .expressions
1733 .into_iter()
1734 .map(|e| transform_recursive(e, transform_fn))
1735 .collect::<Result<Vec<_>>>()?;
1736
1737 // Transform FROM clause
1738 if let Some(mut from) = select.from.take() {
1739 from.expressions = from
1740 .expressions
1741 .into_iter()
1742 .map(|e| transform_recursive(e, transform_fn))
1743 .collect::<Result<Vec<_>>>()?;
1744 select.from = Some(from);
1745 }
1746
1747 // Transform JOINs - important for CROSS APPLY / LATERAL transformations
1748 select.joins = select
1749 .joins
1750 .into_iter()
1751 .map(|mut join| {
1752 join.this = transform_recursive(join.this, transform_fn)?;
1753 if let Some(on) = join.on.take() {
1754 join.on = Some(transform_recursive(on, transform_fn)?);
1755 }
1756 // Wrap join in Expression::Join to allow transform_fn to transform it
1757 match transform_fn(Expression::Join(Box::new(join)))? {
1758 Expression::Join(j) => Ok(*j),
1759 _ => Err(crate::error::Error::parse(
1760 "Join transformation returned non-join expression",
1761 0,
1762 0,
1763 0,
1764 0,
1765 )),
1766 }
1767 })
1768 .collect::<Result<Vec<_>>>()?;
1769
1770 // Transform LATERAL VIEW expressions (Hive/Spark)
1771 select.lateral_views = select
1772 .lateral_views
1773 .into_iter()
1774 .map(|mut lv| {
1775 lv.this = transform_recursive(lv.this, transform_fn)?;
1776 Ok(lv)
1777 })
1778 .collect::<Result<Vec<_>>>()?;
1779
1780 // Transform WHERE clause
1781 if let Some(mut where_clause) = select.where_clause.take() {
1782 where_clause.this = transform_recursive(where_clause.this, transform_fn)?;
1783 select.where_clause = Some(where_clause);
1784 }
1785
1786 // Transform GROUP BY
1787 if let Some(mut group_by) = select.group_by.take() {
1788 group_by.expressions = group_by
1789 .expressions
1790 .into_iter()
1791 .map(|e| transform_recursive(e, transform_fn))
1792 .collect::<Result<Vec<_>>>()?;
1793 select.group_by = Some(group_by);
1794 }
1795
1796 // Transform HAVING
1797 if let Some(mut having) = select.having.take() {
1798 having.this = transform_recursive(having.this, transform_fn)?;
1799 select.having = Some(having);
1800 }
1801
1802 // Transform WITH (CTEs)
1803 if let Some(mut with) = select.with.take() {
1804 with.ctes = with
1805 .ctes
1806 .into_iter()
1807 .map(|mut cte| {
1808 let original = cte.this.clone();
1809 cte.this = transform_recursive(cte.this, transform_fn).unwrap_or(original);
1810 cte
1811 })
1812 .collect();
1813 select.with = Some(with);
1814 }
1815
1816 // Transform ORDER BY
1817 if let Some(mut order) = select.order_by.take() {
1818 order.expressions = order
1819 .expressions
1820 .into_iter()
1821 .map(|o| {
1822 let mut o = o;
1823 let original = o.this.clone();
1824 o.this = transform_recursive(o.this, transform_fn).unwrap_or(original);
1825 // Also apply transform to the Ordered wrapper itself (for NULLS FIRST etc.)
1826 match transform_fn(Expression::Ordered(Box::new(o.clone()))) {
1827 Ok(Expression::Ordered(transformed)) => *transformed,
1828 Ok(_) | Err(_) => o,
1829 }
1830 })
1831 .collect();
1832 select.order_by = Some(order);
1833 }
1834
1835 // Transform WINDOW clause order_by
1836 if let Some(ref mut windows) = select.windows {
1837 for nw in windows.iter_mut() {
1838 nw.spec.order_by = std::mem::take(&mut nw.spec.order_by)
1839 .into_iter()
1840 .map(|o| {
1841 let mut o = o;
1842 let original = o.this.clone();
1843 o.this = transform_recursive(o.this, transform_fn).unwrap_or(original);
1844 match transform_fn(Expression::Ordered(Box::new(o.clone()))) {
1845 Ok(Expression::Ordered(transformed)) => *transformed,
1846 Ok(_) | Err(_) => o,
1847 }
1848 })
1849 .collect();
1850 }
1851 }
1852
1853 // Transform QUALIFY
1854 if let Some(mut qual) = select.qualify.take() {
1855 qual.this = transform_recursive(qual.this, transform_fn)?;
1856 select.qualify = Some(qual);
1857 }
1858
1859 Expression::Select(select)
1860 }
1861 Expression::Function(mut f) => {
1862 f.args = f
1863 .args
1864 .into_iter()
1865 .map(|e| transform_recursive(e, transform_fn))
1866 .collect::<Result<Vec<_>>>()?;
1867 Expression::Function(f)
1868 }
1869 Expression::AggregateFunction(mut f) => {
1870 f.args = f
1871 .args
1872 .into_iter()
1873 .map(|e| transform_recursive(e, transform_fn))
1874 .collect::<Result<Vec<_>>>()?;
1875 if let Some(filter) = f.filter {
1876 f.filter = Some(transform_recursive(filter, transform_fn)?);
1877 }
1878 Expression::AggregateFunction(f)
1879 }
1880 Expression::WindowFunction(mut wf) => {
1881 wf.this = transform_recursive(wf.this, transform_fn)?;
1882 wf.over.partition_by = wf
1883 .over
1884 .partition_by
1885 .into_iter()
1886 .map(|e| transform_recursive(e, transform_fn))
1887 .collect::<Result<Vec<_>>>()?;
1888 // Transform order_by items through Expression::Ordered wrapper
1889 wf.over.order_by = wf
1890 .over
1891 .order_by
1892 .into_iter()
1893 .map(|o| {
1894 let mut o = o;
1895 o.this = transform_recursive(o.this, transform_fn)?;
1896 match transform_fn(Expression::Ordered(Box::new(o)))? {
1897 Expression::Ordered(transformed) => Ok(*transformed),
1898 _ => Err(crate::error::Error::parse(
1899 "Ordered transformation returned non-Ordered expression",
1900 0,
1901 0,
1902 0,
1903 0,
1904 )),
1905 }
1906 })
1907 .collect::<Result<Vec<_>>>()?;
1908 Expression::WindowFunction(wf)
1909 }
1910 Expression::Alias(mut a) => {
1911 a.this = transform_recursive(a.this, transform_fn)?;
1912 Expression::Alias(a)
1913 }
1914 Expression::Cast(mut c) => {
1915 c.this = transform_recursive(c.this, transform_fn)?;
1916 // Also transform the target data type (recursively for nested types like ARRAY<INT>, STRUCT<a INT>)
1917 c.to = transform_data_type_recursive(c.to, transform_fn)?;
1918 Expression::Cast(c)
1919 }
1920 Expression::And(op) => transform_binary!(And, *op),
1921 Expression::Or(op) => transform_binary!(Or, *op),
1922 Expression::Add(op) => transform_binary!(Add, *op),
1923 Expression::Sub(op) => transform_binary!(Sub, *op),
1924 Expression::Mul(op) => transform_binary!(Mul, *op),
1925 Expression::Div(op) => transform_binary!(Div, *op),
1926 Expression::Eq(op) => transform_binary!(Eq, *op),
1927 Expression::Lt(op) => transform_binary!(Lt, *op),
1928 Expression::Gt(op) => transform_binary!(Gt, *op),
1929 Expression::Paren(mut p) => {
1930 p.this = transform_recursive(p.this, transform_fn)?;
1931 Expression::Paren(p)
1932 }
1933 Expression::Coalesce(mut f) => {
1934 f.expressions = f
1935 .expressions
1936 .into_iter()
1937 .map(|e| transform_recursive(e, transform_fn))
1938 .collect::<Result<Vec<_>>>()?;
1939 Expression::Coalesce(f)
1940 }
1941 Expression::IfNull(mut f) => {
1942 f.this = transform_recursive(f.this, transform_fn)?;
1943 f.expression = transform_recursive(f.expression, transform_fn)?;
1944 Expression::IfNull(f)
1945 }
1946 Expression::Nvl(mut f) => {
1947 f.this = transform_recursive(f.this, transform_fn)?;
1948 f.expression = transform_recursive(f.expression, transform_fn)?;
1949 Expression::Nvl(f)
1950 }
1951 Expression::In(mut i) => {
1952 i.this = transform_recursive(i.this, transform_fn)?;
1953 i.expressions = i
1954 .expressions
1955 .into_iter()
1956 .map(|e| transform_recursive(e, transform_fn))
1957 .collect::<Result<Vec<_>>>()?;
1958 if let Some(query) = i.query {
1959 i.query = Some(transform_recursive(query, transform_fn)?);
1960 }
1961 Expression::In(i)
1962 }
1963 Expression::Not(mut n) => {
1964 n.this = transform_recursive(n.this, transform_fn)?;
1965 Expression::Not(n)
1966 }
1967 Expression::ArraySlice(mut s) => {
1968 s.this = transform_recursive(s.this, transform_fn)?;
1969 if let Some(start) = s.start {
1970 s.start = Some(transform_recursive(start, transform_fn)?);
1971 }
1972 if let Some(end) = s.end {
1973 s.end = Some(transform_recursive(end, transform_fn)?);
1974 }
1975 Expression::ArraySlice(s)
1976 }
1977 Expression::Subscript(mut s) => {
1978 s.this = transform_recursive(s.this, transform_fn)?;
1979 s.index = transform_recursive(s.index, transform_fn)?;
1980 Expression::Subscript(s)
1981 }
1982 Expression::Array(mut a) => {
1983 a.expressions = a
1984 .expressions
1985 .into_iter()
1986 .map(|e| transform_recursive(e, transform_fn))
1987 .collect::<Result<Vec<_>>>()?;
1988 Expression::Array(a)
1989 }
1990 Expression::Struct(mut s) => {
1991 let mut new_fields = Vec::new();
1992 for (name, expr) in s.fields {
1993 let transformed = transform_recursive(expr, transform_fn)?;
1994 new_fields.push((name, transformed));
1995 }
1996 s.fields = new_fields;
1997 Expression::Struct(s)
1998 }
1999 Expression::NamedArgument(mut na) => {
2000 na.value = transform_recursive(na.value, transform_fn)?;
2001 Expression::NamedArgument(na)
2002 }
2003 Expression::MapFunc(mut m) => {
2004 m.keys = m
2005 .keys
2006 .into_iter()
2007 .map(|e| transform_recursive(e, transform_fn))
2008 .collect::<Result<Vec<_>>>()?;
2009 m.values = m
2010 .values
2011 .into_iter()
2012 .map(|e| transform_recursive(e, transform_fn))
2013 .collect::<Result<Vec<_>>>()?;
2014 Expression::MapFunc(m)
2015 }
2016 Expression::ArrayFunc(mut a) => {
2017 a.expressions = a
2018 .expressions
2019 .into_iter()
2020 .map(|e| transform_recursive(e, transform_fn))
2021 .collect::<Result<Vec<_>>>()?;
2022 Expression::ArrayFunc(a)
2023 }
2024 Expression::Lambda(mut l) => {
2025 l.body = transform_recursive(l.body, transform_fn)?;
2026 Expression::Lambda(l)
2027 }
2028 Expression::JsonExtract(mut f) => {
2029 f.this = transform_recursive(f.this, transform_fn)?;
2030 f.path = transform_recursive(f.path, transform_fn)?;
2031 Expression::JsonExtract(f)
2032 }
2033 Expression::JsonExtractScalar(mut f) => {
2034 f.this = transform_recursive(f.this, transform_fn)?;
2035 f.path = transform_recursive(f.path, transform_fn)?;
2036 Expression::JsonExtractScalar(f)
2037 }
2038
2039 // ===== UnaryFunc-based expressions =====
2040 // These all have a single `this: Expression` child
2041 Expression::Length(mut f) => {
2042 f.this = transform_recursive(f.this, transform_fn)?;
2043 Expression::Length(f)
2044 }
2045 Expression::Upper(mut f) => {
2046 f.this = transform_recursive(f.this, transform_fn)?;
2047 Expression::Upper(f)
2048 }
2049 Expression::Lower(mut f) => {
2050 f.this = transform_recursive(f.this, transform_fn)?;
2051 Expression::Lower(f)
2052 }
2053 Expression::LTrim(mut f) => {
2054 f.this = transform_recursive(f.this, transform_fn)?;
2055 Expression::LTrim(f)
2056 }
2057 Expression::RTrim(mut f) => {
2058 f.this = transform_recursive(f.this, transform_fn)?;
2059 Expression::RTrim(f)
2060 }
2061 Expression::Reverse(mut f) => {
2062 f.this = transform_recursive(f.this, transform_fn)?;
2063 Expression::Reverse(f)
2064 }
2065 Expression::Abs(mut f) => {
2066 f.this = transform_recursive(f.this, transform_fn)?;
2067 Expression::Abs(f)
2068 }
2069 Expression::Ceil(mut f) => {
2070 f.this = transform_recursive(f.this, transform_fn)?;
2071 Expression::Ceil(f)
2072 }
2073 Expression::Floor(mut f) => {
2074 f.this = transform_recursive(f.this, transform_fn)?;
2075 Expression::Floor(f)
2076 }
2077 Expression::Sign(mut f) => {
2078 f.this = transform_recursive(f.this, transform_fn)?;
2079 Expression::Sign(f)
2080 }
2081 Expression::Sqrt(mut f) => {
2082 f.this = transform_recursive(f.this, transform_fn)?;
2083 Expression::Sqrt(f)
2084 }
2085 Expression::Cbrt(mut f) => {
2086 f.this = transform_recursive(f.this, transform_fn)?;
2087 Expression::Cbrt(f)
2088 }
2089 Expression::Ln(mut f) => {
2090 f.this = transform_recursive(f.this, transform_fn)?;
2091 Expression::Ln(f)
2092 }
2093 Expression::Log(mut f) => {
2094 f.this = transform_recursive(f.this, transform_fn)?;
2095 if let Some(base) = f.base {
2096 f.base = Some(transform_recursive(base, transform_fn)?);
2097 }
2098 Expression::Log(f)
2099 }
2100 Expression::Exp(mut f) => {
2101 f.this = transform_recursive(f.this, transform_fn)?;
2102 Expression::Exp(f)
2103 }
2104 Expression::Date(mut f) => {
2105 f.this = transform_recursive(f.this, transform_fn)?;
2106 Expression::Date(f)
2107 }
2108 Expression::Stddev(f) => recurse_agg!(Stddev, f),
2109 Expression::StddevSamp(f) => recurse_agg!(StddevSamp, f),
2110 Expression::Variance(f) => recurse_agg!(Variance, f),
2111
2112 // ===== BinaryFunc-based expressions =====
2113 Expression::ModFunc(mut f) => {
2114 f.this = transform_recursive(f.this, transform_fn)?;
2115 f.expression = transform_recursive(f.expression, transform_fn)?;
2116 Expression::ModFunc(f)
2117 }
2118 Expression::Power(mut f) => {
2119 f.this = transform_recursive(f.this, transform_fn)?;
2120 f.expression = transform_recursive(f.expression, transform_fn)?;
2121 Expression::Power(f)
2122 }
2123 Expression::MapFromArrays(mut f) => {
2124 f.this = transform_recursive(f.this, transform_fn)?;
2125 f.expression = transform_recursive(f.expression, transform_fn)?;
2126 Expression::MapFromArrays(f)
2127 }
2128 Expression::ElementAt(mut f) => {
2129 f.this = transform_recursive(f.this, transform_fn)?;
2130 f.expression = transform_recursive(f.expression, transform_fn)?;
2131 Expression::ElementAt(f)
2132 }
2133 Expression::MapContainsKey(mut f) => {
2134 f.this = transform_recursive(f.this, transform_fn)?;
2135 f.expression = transform_recursive(f.expression, transform_fn)?;
2136 Expression::MapContainsKey(f)
2137 }
2138 Expression::Left(mut f) => {
2139 f.this = transform_recursive(f.this, transform_fn)?;
2140 f.length = transform_recursive(f.length, transform_fn)?;
2141 Expression::Left(f)
2142 }
2143 Expression::Right(mut f) => {
2144 f.this = transform_recursive(f.this, transform_fn)?;
2145 f.length = transform_recursive(f.length, transform_fn)?;
2146 Expression::Right(f)
2147 }
2148 Expression::Repeat(mut f) => {
2149 f.this = transform_recursive(f.this, transform_fn)?;
2150 f.times = transform_recursive(f.times, transform_fn)?;
2151 Expression::Repeat(f)
2152 }
2153
2154 // ===== Complex function expressions =====
2155 Expression::Substring(mut f) => {
2156 f.this = transform_recursive(f.this, transform_fn)?;
2157 f.start = transform_recursive(f.start, transform_fn)?;
2158 if let Some(len) = f.length {
2159 f.length = Some(transform_recursive(len, transform_fn)?);
2160 }
2161 Expression::Substring(f)
2162 }
2163 Expression::Replace(mut f) => {
2164 f.this = transform_recursive(f.this, transform_fn)?;
2165 f.old = transform_recursive(f.old, transform_fn)?;
2166 f.new = transform_recursive(f.new, transform_fn)?;
2167 Expression::Replace(f)
2168 }
2169 Expression::ConcatWs(mut f) => {
2170 f.separator = transform_recursive(f.separator, transform_fn)?;
2171 f.expressions = f
2172 .expressions
2173 .into_iter()
2174 .map(|e| transform_recursive(e, transform_fn))
2175 .collect::<Result<Vec<_>>>()?;
2176 Expression::ConcatWs(f)
2177 }
2178 Expression::Trim(mut f) => {
2179 f.this = transform_recursive(f.this, transform_fn)?;
2180 if let Some(chars) = f.characters {
2181 f.characters = Some(transform_recursive(chars, transform_fn)?);
2182 }
2183 Expression::Trim(f)
2184 }
2185 Expression::Split(mut f) => {
2186 f.this = transform_recursive(f.this, transform_fn)?;
2187 f.delimiter = transform_recursive(f.delimiter, transform_fn)?;
2188 Expression::Split(f)
2189 }
2190 Expression::Lpad(mut f) => {
2191 f.this = transform_recursive(f.this, transform_fn)?;
2192 f.length = transform_recursive(f.length, transform_fn)?;
2193 if let Some(fill) = f.fill {
2194 f.fill = Some(transform_recursive(fill, transform_fn)?);
2195 }
2196 Expression::Lpad(f)
2197 }
2198 Expression::Rpad(mut f) => {
2199 f.this = transform_recursive(f.this, transform_fn)?;
2200 f.length = transform_recursive(f.length, transform_fn)?;
2201 if let Some(fill) = f.fill {
2202 f.fill = Some(transform_recursive(fill, transform_fn)?);
2203 }
2204 Expression::Rpad(f)
2205 }
2206
2207 // ===== Conditional expressions =====
2208 Expression::Case(mut c) => {
2209 if let Some(operand) = c.operand {
2210 c.operand = Some(transform_recursive(operand, transform_fn)?);
2211 }
2212 c.whens = c
2213 .whens
2214 .into_iter()
2215 .map(|(cond, then)| {
2216 let new_cond = transform_recursive(cond.clone(), transform_fn).unwrap_or(cond);
2217 let new_then = transform_recursive(then.clone(), transform_fn).unwrap_or(then);
2218 (new_cond, new_then)
2219 })
2220 .collect();
2221 if let Some(else_expr) = c.else_ {
2222 c.else_ = Some(transform_recursive(else_expr, transform_fn)?);
2223 }
2224 Expression::Case(c)
2225 }
2226 Expression::IfFunc(mut f) => {
2227 f.condition = transform_recursive(f.condition, transform_fn)?;
2228 f.true_value = transform_recursive(f.true_value, transform_fn)?;
2229 if let Some(false_val) = f.false_value {
2230 f.false_value = Some(transform_recursive(false_val, transform_fn)?);
2231 }
2232 Expression::IfFunc(f)
2233 }
2234
2235 // ===== Date/Time expressions =====
2236 Expression::DateAdd(mut f) => {
2237 f.this = transform_recursive(f.this, transform_fn)?;
2238 f.interval = transform_recursive(f.interval, transform_fn)?;
2239 Expression::DateAdd(f)
2240 }
2241 Expression::DateSub(mut f) => {
2242 f.this = transform_recursive(f.this, transform_fn)?;
2243 f.interval = transform_recursive(f.interval, transform_fn)?;
2244 Expression::DateSub(f)
2245 }
2246 Expression::DateDiff(mut f) => {
2247 f.this = transform_recursive(f.this, transform_fn)?;
2248 f.expression = transform_recursive(f.expression, transform_fn)?;
2249 Expression::DateDiff(f)
2250 }
2251 Expression::DateTrunc(mut f) => {
2252 f.this = transform_recursive(f.this, transform_fn)?;
2253 Expression::DateTrunc(f)
2254 }
2255 Expression::Extract(mut f) => {
2256 f.this = transform_recursive(f.this, transform_fn)?;
2257 Expression::Extract(f)
2258 }
2259
2260 // ===== JSON expressions =====
2261 Expression::JsonObject(mut f) => {
2262 f.pairs = f
2263 .pairs
2264 .into_iter()
2265 .map(|(k, v)| {
2266 let new_k = transform_recursive(k, transform_fn)?;
2267 let new_v = transform_recursive(v, transform_fn)?;
2268 Ok((new_k, new_v))
2269 })
2270 .collect::<Result<Vec<_>>>()?;
2271 Expression::JsonObject(f)
2272 }
2273
2274 // ===== Subquery expressions =====
2275 Expression::Subquery(mut s) => {
2276 s.this = transform_recursive(s.this, transform_fn)?;
2277 Expression::Subquery(s)
2278 }
2279 Expression::Exists(mut e) => {
2280 e.this = transform_recursive(e.this, transform_fn)?;
2281 Expression::Exists(e)
2282 }
2283 Expression::Describe(mut d) => {
2284 d.target = transform_recursive(d.target, transform_fn)?;
2285 Expression::Describe(d)
2286 }
2287
2288 // ===== Set operations =====
2289 Expression::Union(mut u) => {
2290 let left = std::mem::replace(&mut u.left, Expression::Null(Null));
2291 u.left = transform_recursive(left, transform_fn)?;
2292 let right = std::mem::replace(&mut u.right, Expression::Null(Null));
2293 u.right = transform_recursive(right, transform_fn)?;
2294 if let Some(mut with) = u.with.take() {
2295 with.ctes = with
2296 .ctes
2297 .into_iter()
2298 .map(|mut cte| {
2299 let original = cte.this.clone();
2300 cte.this = transform_recursive(cte.this, transform_fn).unwrap_or(original);
2301 cte
2302 })
2303 .collect();
2304 u.with = Some(with);
2305 }
2306 Expression::Union(u)
2307 }
2308 Expression::Intersect(mut i) => {
2309 let left = std::mem::replace(&mut i.left, Expression::Null(Null));
2310 i.left = transform_recursive(left, transform_fn)?;
2311 let right = std::mem::replace(&mut i.right, Expression::Null(Null));
2312 i.right = transform_recursive(right, transform_fn)?;
2313 if let Some(mut with) = i.with.take() {
2314 with.ctes = with
2315 .ctes
2316 .into_iter()
2317 .map(|mut cte| {
2318 let original = cte.this.clone();
2319 cte.this = transform_recursive(cte.this, transform_fn).unwrap_or(original);
2320 cte
2321 })
2322 .collect();
2323 i.with = Some(with);
2324 }
2325 Expression::Intersect(i)
2326 }
2327 Expression::Except(mut e) => {
2328 let left = std::mem::replace(&mut e.left, Expression::Null(Null));
2329 e.left = transform_recursive(left, transform_fn)?;
2330 let right = std::mem::replace(&mut e.right, Expression::Null(Null));
2331 e.right = transform_recursive(right, transform_fn)?;
2332 if let Some(mut with) = e.with.take() {
2333 with.ctes = with
2334 .ctes
2335 .into_iter()
2336 .map(|mut cte| {
2337 let original = cte.this.clone();
2338 cte.this = transform_recursive(cte.this, transform_fn).unwrap_or(original);
2339 cte
2340 })
2341 .collect();
2342 e.with = Some(with);
2343 }
2344 Expression::Except(e)
2345 }
2346
2347 // ===== DML expressions =====
2348 Expression::Insert(mut ins) => {
2349 // Transform VALUES clause expressions
2350 let mut new_values = Vec::new();
2351 for row in ins.values {
2352 let mut new_row = Vec::new();
2353 for e in row {
2354 new_row.push(transform_recursive(e, transform_fn)?);
2355 }
2356 new_values.push(new_row);
2357 }
2358 ins.values = new_values;
2359
2360 // Transform query (for INSERT ... SELECT)
2361 if let Some(query) = ins.query {
2362 ins.query = Some(transform_recursive(query, transform_fn)?);
2363 }
2364
2365 // Transform RETURNING clause
2366 let mut new_returning = Vec::new();
2367 for e in ins.returning {
2368 new_returning.push(transform_recursive(e, transform_fn)?);
2369 }
2370 ins.returning = new_returning;
2371
2372 // Transform ON CONFLICT clause
2373 if let Some(on_conflict) = ins.on_conflict {
2374 ins.on_conflict = Some(Box::new(transform_recursive(*on_conflict, transform_fn)?));
2375 }
2376
2377 Expression::Insert(ins)
2378 }
2379 Expression::Update(mut upd) => {
2380 upd.set = upd
2381 .set
2382 .into_iter()
2383 .map(|(id, val)| {
2384 let new_val = transform_recursive(val.clone(), transform_fn).unwrap_or(val);
2385 (id, new_val)
2386 })
2387 .collect();
2388 if let Some(mut where_clause) = upd.where_clause.take() {
2389 where_clause.this = transform_recursive(where_clause.this, transform_fn)?;
2390 upd.where_clause = Some(where_clause);
2391 }
2392 Expression::Update(upd)
2393 }
2394 Expression::Delete(mut del) => {
2395 if let Some(mut where_clause) = del.where_clause.take() {
2396 where_clause.this = transform_recursive(where_clause.this, transform_fn)?;
2397 del.where_clause = Some(where_clause);
2398 }
2399 Expression::Delete(del)
2400 }
2401
2402 // ===== CTE expressions =====
2403 Expression::With(mut w) => {
2404 w.ctes = w
2405 .ctes
2406 .into_iter()
2407 .map(|mut cte| {
2408 let original = cte.this.clone();
2409 cte.this = transform_recursive(cte.this, transform_fn).unwrap_or(original);
2410 cte
2411 })
2412 .collect();
2413 Expression::With(w)
2414 }
2415 Expression::Cte(mut c) => {
2416 c.this = transform_recursive(c.this, transform_fn)?;
2417 Expression::Cte(c)
2418 }
2419
2420 // ===== Order expressions =====
2421 Expression::Ordered(mut o) => {
2422 o.this = transform_recursive(o.this, transform_fn)?;
2423 Expression::Ordered(o)
2424 }
2425
2426 // ===== Negation =====
2427 Expression::Neg(mut n) => {
2428 n.this = transform_recursive(n.this, transform_fn)?;
2429 Expression::Neg(n)
2430 }
2431
2432 // ===== Between =====
2433 Expression::Between(mut b) => {
2434 b.this = transform_recursive(b.this, transform_fn)?;
2435 b.low = transform_recursive(b.low, transform_fn)?;
2436 b.high = transform_recursive(b.high, transform_fn)?;
2437 Expression::Between(b)
2438 }
2439 Expression::IsNull(mut i) => {
2440 i.this = transform_recursive(i.this, transform_fn)?;
2441 Expression::IsNull(i)
2442 }
2443 Expression::IsTrue(mut i) => {
2444 i.this = transform_recursive(i.this, transform_fn)?;
2445 Expression::IsTrue(i)
2446 }
2447 Expression::IsFalse(mut i) => {
2448 i.this = transform_recursive(i.this, transform_fn)?;
2449 Expression::IsFalse(i)
2450 }
2451
2452 // ===== Like expressions =====
2453 Expression::Like(mut l) => {
2454 l.left = transform_recursive(l.left, transform_fn)?;
2455 l.right = transform_recursive(l.right, transform_fn)?;
2456 Expression::Like(l)
2457 }
2458 Expression::ILike(mut l) => {
2459 l.left = transform_recursive(l.left, transform_fn)?;
2460 l.right = transform_recursive(l.right, transform_fn)?;
2461 Expression::ILike(l)
2462 }
2463
2464 // ===== Additional binary ops not covered by macro =====
2465 Expression::Neq(op) => transform_binary!(Neq, *op),
2466 Expression::Lte(op) => transform_binary!(Lte, *op),
2467 Expression::Gte(op) => transform_binary!(Gte, *op),
2468 Expression::Mod(op) => transform_binary!(Mod, *op),
2469 Expression::Concat(op) => transform_binary!(Concat, *op),
2470 Expression::BitwiseAnd(op) => transform_binary!(BitwiseAnd, *op),
2471 Expression::BitwiseOr(op) => transform_binary!(BitwiseOr, *op),
2472 Expression::BitwiseXor(op) => transform_binary!(BitwiseXor, *op),
2473 Expression::Is(op) => transform_binary!(Is, *op),
2474
2475 // ===== TryCast / SafeCast =====
2476 Expression::TryCast(mut c) => {
2477 c.this = transform_recursive(c.this, transform_fn)?;
2478 c.to = transform_data_type_recursive(c.to, transform_fn)?;
2479 Expression::TryCast(c)
2480 }
2481 Expression::SafeCast(mut c) => {
2482 c.this = transform_recursive(c.this, transform_fn)?;
2483 c.to = transform_data_type_recursive(c.to, transform_fn)?;
2484 Expression::SafeCast(c)
2485 }
2486
2487 // ===== Misc =====
2488 Expression::Unnest(mut f) => {
2489 f.this = transform_recursive(f.this, transform_fn)?;
2490 f.expressions = f
2491 .expressions
2492 .into_iter()
2493 .map(|e| transform_recursive(e, transform_fn))
2494 .collect::<Result<Vec<_>>>()?;
2495 Expression::Unnest(f)
2496 }
2497 Expression::Explode(mut f) => {
2498 f.this = transform_recursive(f.this, transform_fn)?;
2499 Expression::Explode(f)
2500 }
2501 Expression::GroupConcat(mut f) => {
2502 f.this = transform_recursive(f.this, transform_fn)?;
2503 Expression::GroupConcat(f)
2504 }
2505 Expression::StringAgg(mut f) => {
2506 f.this = transform_recursive(f.this, transform_fn)?;
2507 Expression::StringAgg(f)
2508 }
2509 Expression::ListAgg(mut f) => {
2510 f.this = transform_recursive(f.this, transform_fn)?;
2511 Expression::ListAgg(f)
2512 }
2513 Expression::ArrayAgg(mut f) => {
2514 f.this = transform_recursive(f.this, transform_fn)?;
2515 Expression::ArrayAgg(f)
2516 }
2517 Expression::ParseJson(mut f) => {
2518 f.this = transform_recursive(f.this, transform_fn)?;
2519 Expression::ParseJson(f)
2520 }
2521 Expression::ToJson(mut f) => {
2522 f.this = transform_recursive(f.this, transform_fn)?;
2523 Expression::ToJson(f)
2524 }
2525 Expression::JSONExtract(mut e) => {
2526 e.this = Box::new(transform_recursive(*e.this, transform_fn)?);
2527 e.expression = Box::new(transform_recursive(*e.expression, transform_fn)?);
2528 Expression::JSONExtract(e)
2529 }
2530 Expression::JSONExtractScalar(mut e) => {
2531 e.this = Box::new(transform_recursive(*e.this, transform_fn)?);
2532 e.expression = Box::new(transform_recursive(*e.expression, transform_fn)?);
2533 Expression::JSONExtractScalar(e)
2534 }
2535
2536 // StrToTime: recurse into this
2537 Expression::StrToTime(mut e) => {
2538 e.this = Box::new(transform_recursive(*e.this, transform_fn)?);
2539 Expression::StrToTime(e)
2540 }
2541
2542 // UnixToTime: recurse into this
2543 Expression::UnixToTime(mut e) => {
2544 e.this = Box::new(transform_recursive(*e.this, transform_fn)?);
2545 Expression::UnixToTime(e)
2546 }
2547
2548 // CreateTable: recurse into column defaults, on_update expressions, and data types
2549 Expression::CreateTable(mut ct) => {
2550 for col in &mut ct.columns {
2551 if let Some(default_expr) = col.default.take() {
2552 col.default = Some(transform_recursive(default_expr, transform_fn)?);
2553 }
2554 if let Some(on_update_expr) = col.on_update.take() {
2555 col.on_update = Some(transform_recursive(on_update_expr, transform_fn)?);
2556 }
2557 // Note: Column data type transformations (INT -> INT64 for BigQuery, etc.)
2558 // are NOT applied here because per-dialect transforms are designed for CAST/expression
2559 // contexts and may not produce correct results for DDL column definitions.
2560 // The DDL type mappings would need dedicated handling per source/target pair.
2561 }
2562 if let Some(as_select) = ct.as_select.take() {
2563 ct.as_select = Some(transform_recursive(as_select, transform_fn)?);
2564 }
2565 Expression::CreateTable(ct)
2566 }
2567
2568 // CreateView: recurse into the view body query
2569 Expression::CreateView(mut cv) => {
2570 cv.query = transform_recursive(cv.query, transform_fn)?;
2571 Expression::CreateView(cv)
2572 }
2573
2574 // CreateTask: recurse into the task body
2575 Expression::CreateTask(mut ct) => {
2576 ct.body = transform_recursive(ct.body, transform_fn)?;
2577 Expression::CreateTask(ct)
2578 }
2579
2580 // CreateProcedure: recurse into body expressions
2581 Expression::CreateProcedure(mut cp) => {
2582 if let Some(body) = cp.body.take() {
2583 cp.body = Some(match body {
2584 FunctionBody::Expression(expr) => {
2585 FunctionBody::Expression(transform_recursive(expr, transform_fn)?)
2586 }
2587 FunctionBody::Return(expr) => {
2588 FunctionBody::Return(transform_recursive(expr, transform_fn)?)
2589 }
2590 FunctionBody::Statements(stmts) => {
2591 let transformed_stmts = stmts
2592 .into_iter()
2593 .map(|s| transform_recursive(s, transform_fn))
2594 .collect::<Result<Vec<_>>>()?;
2595 FunctionBody::Statements(transformed_stmts)
2596 }
2597 other => other,
2598 });
2599 }
2600 Expression::CreateProcedure(cp)
2601 }
2602
2603 // CreateFunction: recurse into body expressions
2604 Expression::CreateFunction(mut cf) => {
2605 if let Some(body) = cf.body.take() {
2606 cf.body = Some(match body {
2607 FunctionBody::Expression(expr) => {
2608 FunctionBody::Expression(transform_recursive(expr, transform_fn)?)
2609 }
2610 FunctionBody::Return(expr) => {
2611 FunctionBody::Return(transform_recursive(expr, transform_fn)?)
2612 }
2613 FunctionBody::Statements(stmts) => {
2614 let transformed_stmts = stmts
2615 .into_iter()
2616 .map(|s| transform_recursive(s, transform_fn))
2617 .collect::<Result<Vec<_>>>()?;
2618 FunctionBody::Statements(transformed_stmts)
2619 }
2620 other => other,
2621 });
2622 }
2623 Expression::CreateFunction(cf)
2624 }
2625
2626 // MemberOf: recurse into left and right operands
2627 Expression::MemberOf(op) => transform_binary!(MemberOf, *op),
2628 // ArrayContainsAll (@>): recurse into left and right operands
2629 Expression::ArrayContainsAll(op) => transform_binary!(ArrayContainsAll, *op),
2630 // ArrayContainedBy (<@): recurse into left and right operands
2631 Expression::ArrayContainedBy(op) => transform_binary!(ArrayContainedBy, *op),
2632 // ArrayOverlaps (&&): recurse into left and right operands
2633 Expression::ArrayOverlaps(op) => transform_binary!(ArrayOverlaps, *op),
2634 // TsMatch (@@): recurse into left and right operands
2635 Expression::TsMatch(op) => transform_binary!(TsMatch, *op),
2636 // Adjacent (-|-): recurse into left and right operands
2637 Expression::Adjacent(op) => transform_binary!(Adjacent, *op),
2638
2639 // Table: recurse into when (HistoricalData) and changes fields
2640 Expression::Table(mut t) => {
2641 if let Some(when) = t.when.take() {
2642 let transformed =
2643 transform_recursive(Expression::HistoricalData(when), transform_fn)?;
2644 if let Expression::HistoricalData(hd) = transformed {
2645 t.when = Some(hd);
2646 }
2647 }
2648 if let Some(changes) = t.changes.take() {
2649 let transformed = transform_recursive(Expression::Changes(changes), transform_fn)?;
2650 if let Expression::Changes(c) = transformed {
2651 t.changes = Some(c);
2652 }
2653 }
2654 Expression::Table(t)
2655 }
2656
2657 // HistoricalData (Snowflake time travel): recurse into expression
2658 Expression::HistoricalData(mut hd) => {
2659 *hd.expression = transform_recursive(*hd.expression, transform_fn)?;
2660 Expression::HistoricalData(hd)
2661 }
2662
2663 // Changes (Snowflake CHANGES clause): recurse into at_before and end
2664 Expression::Changes(mut c) => {
2665 if let Some(at_before) = c.at_before.take() {
2666 c.at_before = Some(Box::new(transform_recursive(*at_before, transform_fn)?));
2667 }
2668 if let Some(end) = c.end.take() {
2669 c.end = Some(Box::new(transform_recursive(*end, transform_fn)?));
2670 }
2671 Expression::Changes(c)
2672 }
2673
2674 // TableArgument: TABLE(expr) or MODEL(expr)
2675 Expression::TableArgument(mut ta) => {
2676 ta.this = transform_recursive(ta.this, transform_fn)?;
2677 Expression::TableArgument(ta)
2678 }
2679
2680 // JoinedTable: (tbl1 JOIN tbl2 ON ...) - recurse into left and join tables
2681 Expression::JoinedTable(mut jt) => {
2682 jt.left = transform_recursive(jt.left, transform_fn)?;
2683 for join in &mut jt.joins {
2684 join.this = transform_recursive(
2685 std::mem::replace(&mut join.this, Expression::Null(crate::expressions::Null)),
2686 transform_fn,
2687 )?;
2688 if let Some(on) = join.on.take() {
2689 join.on = Some(transform_recursive(on, transform_fn)?);
2690 }
2691 }
2692 jt.lateral_views = jt
2693 .lateral_views
2694 .into_iter()
2695 .map(|mut lv| {
2696 lv.this = transform_recursive(lv.this, transform_fn)?;
2697 Ok(lv)
2698 })
2699 .collect::<Result<Vec<_>>>()?;
2700 Expression::JoinedTable(jt)
2701 }
2702
2703 // Lateral: LATERAL func() - recurse into the function expression
2704 Expression::Lateral(mut lat) => {
2705 *lat.this = transform_recursive(*lat.this, transform_fn)?;
2706 Expression::Lateral(lat)
2707 }
2708
2709 // WithinGroup: recurse into order_by items (for NULLS FIRST/LAST etc.)
2710 // but NOT into wg.this - the inner function is handled by StringAggConvert/GroupConcatConvert
2711 // as a unit together with the WithinGroup wrapper
2712 Expression::WithinGroup(mut wg) => {
2713 wg.order_by = wg
2714 .order_by
2715 .into_iter()
2716 .map(|mut o| {
2717 let original = o.this.clone();
2718 o.this = transform_recursive(o.this, transform_fn).unwrap_or(original);
2719 match transform_fn(Expression::Ordered(Box::new(o.clone()))) {
2720 Ok(Expression::Ordered(transformed)) => *transformed,
2721 Ok(_) | Err(_) => o,
2722 }
2723 })
2724 .collect();
2725 Expression::WithinGroup(wg)
2726 }
2727
2728 // Filter: recurse into both the aggregate and the filter condition
2729 Expression::Filter(mut f) => {
2730 f.this = Box::new(transform_recursive(*f.this, transform_fn)?);
2731 f.expression = Box::new(transform_recursive(*f.expression, transform_fn)?);
2732 Expression::Filter(f)
2733 }
2734
2735 // Aggregate functions (AggFunc-based): recurse into the aggregate argument,
2736 // filter, order_by, having_max, and limit.
2737 // Stddev, StddevSamp, Variance, and ArrayAgg are handled earlier in this match.
2738 Expression::Sum(f) => recurse_agg!(Sum, f),
2739 Expression::Avg(f) => recurse_agg!(Avg, f),
2740 Expression::Min(f) => recurse_agg!(Min, f),
2741 Expression::Max(f) => recurse_agg!(Max, f),
2742 Expression::CountIf(f) => recurse_agg!(CountIf, f),
2743 Expression::StddevPop(f) => recurse_agg!(StddevPop, f),
2744 Expression::VarPop(f) => recurse_agg!(VarPop, f),
2745 Expression::VarSamp(f) => recurse_agg!(VarSamp, f),
2746 Expression::Median(f) => recurse_agg!(Median, f),
2747 Expression::Mode(f) => recurse_agg!(Mode, f),
2748 Expression::First(f) => recurse_agg!(First, f),
2749 Expression::Last(f) => recurse_agg!(Last, f),
2750 Expression::AnyValue(f) => recurse_agg!(AnyValue, f),
2751 Expression::ApproxDistinct(f) => recurse_agg!(ApproxDistinct, f),
2752 Expression::ApproxCountDistinct(f) => recurse_agg!(ApproxCountDistinct, f),
2753 Expression::LogicalAnd(f) => recurse_agg!(LogicalAnd, f),
2754 Expression::LogicalOr(f) => recurse_agg!(LogicalOr, f),
2755 Expression::Skewness(f) => recurse_agg!(Skewness, f),
2756 Expression::ArrayConcatAgg(f) => recurse_agg!(ArrayConcatAgg, f),
2757 Expression::ArrayUniqueAgg(f) => recurse_agg!(ArrayUniqueAgg, f),
2758 Expression::BoolXorAgg(f) => recurse_agg!(BoolXorAgg, f),
2759 Expression::BitwiseOrAgg(f) => recurse_agg!(BitwiseOrAgg, f),
2760 Expression::BitwiseAndAgg(f) => recurse_agg!(BitwiseAndAgg, f),
2761 Expression::BitwiseXorAgg(f) => recurse_agg!(BitwiseXorAgg, f),
2762
2763 // Count has its own struct with an Option<Expression> `this` field
2764 Expression::Count(mut c) => {
2765 if let Some(this) = c.this.take() {
2766 c.this = Some(transform_recursive(this, transform_fn)?);
2767 }
2768 if let Some(filter) = c.filter.take() {
2769 c.filter = Some(transform_recursive(filter, transform_fn)?);
2770 }
2771 Expression::Count(c)
2772 }
2773
2774 Expression::PipeOperator(mut pipe) => {
2775 pipe.this = transform_recursive(pipe.this, transform_fn)?;
2776 pipe.expression = transform_recursive(pipe.expression, transform_fn)?;
2777 Expression::PipeOperator(pipe)
2778 }
2779
2780 // ArrayExcept/ArrayContains/ArrayDistinct: recurse into children
2781 Expression::ArrayExcept(mut f) => {
2782 f.this = transform_recursive(f.this, transform_fn)?;
2783 f.expression = transform_recursive(f.expression, transform_fn)?;
2784 Expression::ArrayExcept(f)
2785 }
2786 Expression::ArrayContains(mut f) => {
2787 f.this = transform_recursive(f.this, transform_fn)?;
2788 f.expression = transform_recursive(f.expression, transform_fn)?;
2789 Expression::ArrayContains(f)
2790 }
2791 Expression::ArrayDistinct(mut f) => {
2792 f.this = transform_recursive(f.this, transform_fn)?;
2793 Expression::ArrayDistinct(f)
2794 }
2795 Expression::ArrayPosition(mut f) => {
2796 f.this = transform_recursive(f.this, transform_fn)?;
2797 f.expression = transform_recursive(f.expression, transform_fn)?;
2798 Expression::ArrayPosition(f)
2799 }
2800
2801 // Pass through leaf nodes unchanged
2802 other => other,
2803 };
2804
2805 // Then apply the transform function
2806 transform_fn(expr)
2807}
2808
2809/// Returns the tokenizer config, generator config, and expression transform closure
2810/// for a built-in dialect type. This is the shared implementation used by both
2811/// `Dialect::get()` and custom dialect construction.
2812// ---------------------------------------------------------------------------
2813// Cached dialect configurations
2814// ---------------------------------------------------------------------------
2815
2816/// Pre-computed tokenizer + generator configs for a dialect, cached via `LazyLock`.
2817/// Transform closures are cheap (unit-struct method calls) and created fresh each time.
2818struct CachedDialectConfig {
2819 tokenizer_config: TokenizerConfig,
2820 generator_config: Arc<GeneratorConfig>,
2821}
2822
2823/// Declare a per-dialect `LazyLock<CachedDialectConfig>` static.
2824macro_rules! cached_dialect {
2825 ($static_name:ident, $dialect_struct:expr, $feature:literal) => {
2826 #[cfg(feature = $feature)]
2827 static $static_name: LazyLock<CachedDialectConfig> = LazyLock::new(|| {
2828 let d = $dialect_struct;
2829 CachedDialectConfig {
2830 tokenizer_config: d.tokenizer_config(),
2831 generator_config: Arc::new(d.generator_config()),
2832 }
2833 });
2834 };
2835}
2836
2837static CACHED_GENERIC: LazyLock<CachedDialectConfig> = LazyLock::new(|| {
2838 let d = GenericDialect;
2839 CachedDialectConfig {
2840 tokenizer_config: d.tokenizer_config(),
2841 generator_config: Arc::new(d.generator_config()),
2842 }
2843});
2844
2845cached_dialect!(CACHED_POSTGRESQL, PostgresDialect, "dialect-postgresql");
2846cached_dialect!(CACHED_MYSQL, MySQLDialect, "dialect-mysql");
2847cached_dialect!(CACHED_BIGQUERY, BigQueryDialect, "dialect-bigquery");
2848cached_dialect!(CACHED_SNOWFLAKE, SnowflakeDialect, "dialect-snowflake");
2849cached_dialect!(CACHED_DUCKDB, DuckDBDialect, "dialect-duckdb");
2850cached_dialect!(CACHED_TSQL, TSQLDialect, "dialect-tsql");
2851cached_dialect!(CACHED_ORACLE, OracleDialect, "dialect-oracle");
2852cached_dialect!(CACHED_HIVE, HiveDialect, "dialect-hive");
2853cached_dialect!(CACHED_SPARK, SparkDialect, "dialect-spark");
2854cached_dialect!(CACHED_SQLITE, SQLiteDialect, "dialect-sqlite");
2855cached_dialect!(CACHED_PRESTO, PrestoDialect, "dialect-presto");
2856cached_dialect!(CACHED_TRINO, TrinoDialect, "dialect-trino");
2857cached_dialect!(CACHED_REDSHIFT, RedshiftDialect, "dialect-redshift");
2858cached_dialect!(CACHED_CLICKHOUSE, ClickHouseDialect, "dialect-clickhouse");
2859cached_dialect!(CACHED_DATABRICKS, DatabricksDialect, "dialect-databricks");
2860cached_dialect!(CACHED_ATHENA, AthenaDialect, "dialect-athena");
2861cached_dialect!(CACHED_TERADATA, TeradataDialect, "dialect-teradata");
2862cached_dialect!(CACHED_DORIS, DorisDialect, "dialect-doris");
2863cached_dialect!(CACHED_STARROCKS, StarRocksDialect, "dialect-starrocks");
2864cached_dialect!(
2865 CACHED_MATERIALIZE,
2866 MaterializeDialect,
2867 "dialect-materialize"
2868);
2869cached_dialect!(CACHED_RISINGWAVE, RisingWaveDialect, "dialect-risingwave");
2870cached_dialect!(
2871 CACHED_SINGLESTORE,
2872 SingleStoreDialect,
2873 "dialect-singlestore"
2874);
2875cached_dialect!(
2876 CACHED_COCKROACHDB,
2877 CockroachDBDialect,
2878 "dialect-cockroachdb"
2879);
2880cached_dialect!(CACHED_TIDB, TiDBDialect, "dialect-tidb");
2881cached_dialect!(CACHED_DRUID, DruidDialect, "dialect-druid");
2882cached_dialect!(CACHED_SOLR, SolrDialect, "dialect-solr");
2883cached_dialect!(CACHED_TABLEAU, TableauDialect, "dialect-tableau");
2884cached_dialect!(CACHED_DUNE, DuneDialect, "dialect-dune");
2885cached_dialect!(CACHED_FABRIC, FabricDialect, "dialect-fabric");
2886cached_dialect!(CACHED_DRILL, DrillDialect, "dialect-drill");
2887cached_dialect!(CACHED_DREMIO, DremioDialect, "dialect-dremio");
2888cached_dialect!(CACHED_EXASOL, ExasolDialect, "dialect-exasol");
2889cached_dialect!(CACHED_DATAFUSION, DataFusionDialect, "dialect-datafusion");
2890
2891fn configs_for_dialect_type(
2892 dt: DialectType,
2893) -> (
2894 TokenizerConfig,
2895 Arc<GeneratorConfig>,
2896 Box<dyn Fn(Expression) -> Result<Expression> + Send + Sync>,
2897) {
2898 /// Clone configs from a cached static and pair with a fresh transform closure.
2899 macro_rules! from_cache {
2900 ($cache:expr, $dialect_struct:expr) => {{
2901 let c = &*$cache;
2902 (
2903 c.tokenizer_config.clone(),
2904 c.generator_config.clone(),
2905 Box::new(move |e| $dialect_struct.transform_expr(e)),
2906 )
2907 }};
2908 }
2909 match dt {
2910 #[cfg(feature = "dialect-postgresql")]
2911 DialectType::PostgreSQL => from_cache!(CACHED_POSTGRESQL, PostgresDialect),
2912 #[cfg(feature = "dialect-mysql")]
2913 DialectType::MySQL => from_cache!(CACHED_MYSQL, MySQLDialect),
2914 #[cfg(feature = "dialect-bigquery")]
2915 DialectType::BigQuery => from_cache!(CACHED_BIGQUERY, BigQueryDialect),
2916 #[cfg(feature = "dialect-snowflake")]
2917 DialectType::Snowflake => from_cache!(CACHED_SNOWFLAKE, SnowflakeDialect),
2918 #[cfg(feature = "dialect-duckdb")]
2919 DialectType::DuckDB => from_cache!(CACHED_DUCKDB, DuckDBDialect),
2920 #[cfg(feature = "dialect-tsql")]
2921 DialectType::TSQL => from_cache!(CACHED_TSQL, TSQLDialect),
2922 #[cfg(feature = "dialect-oracle")]
2923 DialectType::Oracle => from_cache!(CACHED_ORACLE, OracleDialect),
2924 #[cfg(feature = "dialect-hive")]
2925 DialectType::Hive => from_cache!(CACHED_HIVE, HiveDialect),
2926 #[cfg(feature = "dialect-spark")]
2927 DialectType::Spark => from_cache!(CACHED_SPARK, SparkDialect),
2928 #[cfg(feature = "dialect-sqlite")]
2929 DialectType::SQLite => from_cache!(CACHED_SQLITE, SQLiteDialect),
2930 #[cfg(feature = "dialect-presto")]
2931 DialectType::Presto => from_cache!(CACHED_PRESTO, PrestoDialect),
2932 #[cfg(feature = "dialect-trino")]
2933 DialectType::Trino => from_cache!(CACHED_TRINO, TrinoDialect),
2934 #[cfg(feature = "dialect-redshift")]
2935 DialectType::Redshift => from_cache!(CACHED_REDSHIFT, RedshiftDialect),
2936 #[cfg(feature = "dialect-clickhouse")]
2937 DialectType::ClickHouse => from_cache!(CACHED_CLICKHOUSE, ClickHouseDialect),
2938 #[cfg(feature = "dialect-databricks")]
2939 DialectType::Databricks => from_cache!(CACHED_DATABRICKS, DatabricksDialect),
2940 #[cfg(feature = "dialect-athena")]
2941 DialectType::Athena => from_cache!(CACHED_ATHENA, AthenaDialect),
2942 #[cfg(feature = "dialect-teradata")]
2943 DialectType::Teradata => from_cache!(CACHED_TERADATA, TeradataDialect),
2944 #[cfg(feature = "dialect-doris")]
2945 DialectType::Doris => from_cache!(CACHED_DORIS, DorisDialect),
2946 #[cfg(feature = "dialect-starrocks")]
2947 DialectType::StarRocks => from_cache!(CACHED_STARROCKS, StarRocksDialect),
2948 #[cfg(feature = "dialect-materialize")]
2949 DialectType::Materialize => from_cache!(CACHED_MATERIALIZE, MaterializeDialect),
2950 #[cfg(feature = "dialect-risingwave")]
2951 DialectType::RisingWave => from_cache!(CACHED_RISINGWAVE, RisingWaveDialect),
2952 #[cfg(feature = "dialect-singlestore")]
2953 DialectType::SingleStore => from_cache!(CACHED_SINGLESTORE, SingleStoreDialect),
2954 #[cfg(feature = "dialect-cockroachdb")]
2955 DialectType::CockroachDB => from_cache!(CACHED_COCKROACHDB, CockroachDBDialect),
2956 #[cfg(feature = "dialect-tidb")]
2957 DialectType::TiDB => from_cache!(CACHED_TIDB, TiDBDialect),
2958 #[cfg(feature = "dialect-druid")]
2959 DialectType::Druid => from_cache!(CACHED_DRUID, DruidDialect),
2960 #[cfg(feature = "dialect-solr")]
2961 DialectType::Solr => from_cache!(CACHED_SOLR, SolrDialect),
2962 #[cfg(feature = "dialect-tableau")]
2963 DialectType::Tableau => from_cache!(CACHED_TABLEAU, TableauDialect),
2964 #[cfg(feature = "dialect-dune")]
2965 DialectType::Dune => from_cache!(CACHED_DUNE, DuneDialect),
2966 #[cfg(feature = "dialect-fabric")]
2967 DialectType::Fabric => from_cache!(CACHED_FABRIC, FabricDialect),
2968 #[cfg(feature = "dialect-drill")]
2969 DialectType::Drill => from_cache!(CACHED_DRILL, DrillDialect),
2970 #[cfg(feature = "dialect-dremio")]
2971 DialectType::Dremio => from_cache!(CACHED_DREMIO, DremioDialect),
2972 #[cfg(feature = "dialect-exasol")]
2973 DialectType::Exasol => from_cache!(CACHED_EXASOL, ExasolDialect),
2974 #[cfg(feature = "dialect-datafusion")]
2975 DialectType::DataFusion => from_cache!(CACHED_DATAFUSION, DataFusionDialect),
2976 _ => from_cache!(CACHED_GENERIC, GenericDialect),
2977 }
2978}
2979
2980// ---------------------------------------------------------------------------
2981// Custom dialect registry
2982// ---------------------------------------------------------------------------
2983
2984static CUSTOM_DIALECT_REGISTRY: LazyLock<RwLock<HashMap<String, Arc<CustomDialectConfig>>>> =
2985 LazyLock::new(|| RwLock::new(HashMap::new()));
2986
2987struct CustomDialectConfig {
2988 name: String,
2989 base_dialect: DialectType,
2990 tokenizer_config: TokenizerConfig,
2991 generator_config: GeneratorConfig,
2992 transform: Option<Arc<dyn Fn(Expression) -> Result<Expression> + Send + Sync>>,
2993 preprocess: Option<Arc<dyn Fn(Expression) -> Result<Expression> + Send + Sync>>,
2994}
2995
2996/// Fluent builder for creating and registering custom SQL dialects.
2997///
2998/// A custom dialect is based on an existing built-in dialect and allows selective
2999/// overrides of tokenizer configuration, generator configuration, and expression
3000/// transforms.
3001///
3002/// # Example
3003///
3004/// ```rust,ignore
3005/// use polyglot_sql::dialects::{CustomDialectBuilder, DialectType, Dialect};
3006/// use polyglot_sql::generator::NormalizeFunctions;
3007///
3008/// CustomDialectBuilder::new("my_postgres")
3009/// .based_on(DialectType::PostgreSQL)
3010/// .generator_config_modifier(|gc| {
3011/// gc.normalize_functions = NormalizeFunctions::Lower;
3012/// })
3013/// .register()
3014/// .unwrap();
3015///
3016/// let d = Dialect::get_by_name("my_postgres").unwrap();
3017/// let exprs = d.parse("SELECT COUNT(*)").unwrap();
3018/// let sql = d.generate(&exprs[0]).unwrap();
3019/// assert_eq!(sql, "select count(*)");
3020///
3021/// polyglot_sql::unregister_custom_dialect("my_postgres");
3022/// ```
3023pub struct CustomDialectBuilder {
3024 name: String,
3025 base_dialect: DialectType,
3026 tokenizer_modifier: Option<Box<dyn FnOnce(&mut TokenizerConfig)>>,
3027 generator_modifier: Option<Box<dyn FnOnce(&mut GeneratorConfig)>>,
3028 transform: Option<Arc<dyn Fn(Expression) -> Result<Expression> + Send + Sync>>,
3029 preprocess: Option<Arc<dyn Fn(Expression) -> Result<Expression> + Send + Sync>>,
3030}
3031
3032impl CustomDialectBuilder {
3033 /// Create a new builder with the given name. Defaults to `Generic` as the base dialect.
3034 pub fn new(name: impl Into<String>) -> Self {
3035 Self {
3036 name: name.into(),
3037 base_dialect: DialectType::Generic,
3038 tokenizer_modifier: None,
3039 generator_modifier: None,
3040 transform: None,
3041 preprocess: None,
3042 }
3043 }
3044
3045 /// Set the base built-in dialect to inherit configuration from.
3046 pub fn based_on(mut self, dialect: DialectType) -> Self {
3047 self.base_dialect = dialect;
3048 self
3049 }
3050
3051 /// Provide a closure that modifies the tokenizer configuration inherited from the base dialect.
3052 pub fn tokenizer_config_modifier<F>(mut self, f: F) -> Self
3053 where
3054 F: FnOnce(&mut TokenizerConfig) + 'static,
3055 {
3056 self.tokenizer_modifier = Some(Box::new(f));
3057 self
3058 }
3059
3060 /// Provide a closure that modifies the generator configuration inherited from the base dialect.
3061 pub fn generator_config_modifier<F>(mut self, f: F) -> Self
3062 where
3063 F: FnOnce(&mut GeneratorConfig) + 'static,
3064 {
3065 self.generator_modifier = Some(Box::new(f));
3066 self
3067 }
3068
3069 /// Set a custom per-node expression transform function.
3070 ///
3071 /// This replaces the base dialect's transform. It is called on every expression
3072 /// node during the recursive transform pass.
3073 pub fn transform_fn<F>(mut self, f: F) -> Self
3074 where
3075 F: Fn(Expression) -> Result<Expression> + Send + Sync + 'static,
3076 {
3077 self.transform = Some(Arc::new(f));
3078 self
3079 }
3080
3081 /// Set a custom whole-tree preprocessing function.
3082 ///
3083 /// This replaces the base dialect's built-in preprocessing. It is called once
3084 /// on the entire expression tree before the recursive per-node transform.
3085 pub fn preprocess_fn<F>(mut self, f: F) -> Self
3086 where
3087 F: Fn(Expression) -> Result<Expression> + Send + Sync + 'static,
3088 {
3089 self.preprocess = Some(Arc::new(f));
3090 self
3091 }
3092
3093 /// Build the custom dialect configuration and register it in the global registry.
3094 ///
3095 /// Returns an error if:
3096 /// - The name collides with a built-in dialect name
3097 /// - A custom dialect with the same name is already registered
3098 pub fn register(self) -> Result<()> {
3099 // Reject names that collide with built-in dialects
3100 if DialectType::from_str(&self.name).is_ok() {
3101 return Err(crate::error::Error::parse(
3102 format!(
3103 "Cannot register custom dialect '{}': name collides with built-in dialect",
3104 self.name
3105 ),
3106 0,
3107 0,
3108 0,
3109 0,
3110 ));
3111 }
3112
3113 // Get base configs
3114 let (mut tok_config, arc_gen_config, _base_transform) =
3115 configs_for_dialect_type(self.base_dialect);
3116 let mut gen_config = (*arc_gen_config).clone();
3117
3118 // Apply modifiers
3119 if let Some(tok_mod) = self.tokenizer_modifier {
3120 tok_mod(&mut tok_config);
3121 }
3122 if let Some(gen_mod) = self.generator_modifier {
3123 gen_mod(&mut gen_config);
3124 }
3125
3126 let config = CustomDialectConfig {
3127 name: self.name.clone(),
3128 base_dialect: self.base_dialect,
3129 tokenizer_config: tok_config,
3130 generator_config: gen_config,
3131 transform: self.transform,
3132 preprocess: self.preprocess,
3133 };
3134
3135 register_custom_dialect(config)
3136 }
3137}
3138
3139use std::str::FromStr;
3140
3141fn register_custom_dialect(config: CustomDialectConfig) -> Result<()> {
3142 let mut registry = CUSTOM_DIALECT_REGISTRY.write().map_err(|e| {
3143 crate::error::Error::parse(format!("Registry lock poisoned: {}", e), 0, 0, 0, 0)
3144 })?;
3145
3146 if registry.contains_key(&config.name) {
3147 return Err(crate::error::Error::parse(
3148 format!("Custom dialect '{}' is already registered", config.name),
3149 0,
3150 0,
3151 0,
3152 0,
3153 ));
3154 }
3155
3156 registry.insert(config.name.clone(), Arc::new(config));
3157 Ok(())
3158}
3159
3160/// Remove a custom dialect from the global registry.
3161///
3162/// Returns `true` if a dialect with that name was found and removed,
3163/// `false` if no such custom dialect existed.
3164pub fn unregister_custom_dialect(name: &str) -> bool {
3165 if let Ok(mut registry) = CUSTOM_DIALECT_REGISTRY.write() {
3166 registry.remove(name).is_some()
3167 } else {
3168 false
3169 }
3170}
3171
3172fn get_custom_dialect_config(name: &str) -> Option<Arc<CustomDialectConfig>> {
3173 CUSTOM_DIALECT_REGISTRY
3174 .read()
3175 .ok()
3176 .and_then(|registry| registry.get(name).cloned())
3177}
3178
3179/// Main entry point for dialect-specific SQL operations.
3180///
3181/// A `Dialect` bundles together a tokenizer, generator configuration, and expression
3182/// transformer for a specific SQL database engine. It is the high-level API through
3183/// which callers parse, generate, transform, and transpile SQL.
3184///
3185/// # Usage
3186///
3187/// ```rust,ignore
3188/// use polyglot_sql::dialects::{Dialect, DialectType};
3189///
3190/// // Parse PostgreSQL SQL into an AST
3191/// let pg = Dialect::get(DialectType::PostgreSQL);
3192/// let exprs = pg.parse("SELECT id, name FROM users WHERE active")?;
3193///
3194/// // Transpile from PostgreSQL to BigQuery
3195/// let results = pg.transpile("SELECT NOW()", DialectType::BigQuery)?;
3196/// assert_eq!(results[0], "SELECT CURRENT_TIMESTAMP()");
3197/// ```
3198///
3199/// Obtain an instance via [`Dialect::get`] or [`Dialect::get_by_name`].
3200/// The struct is `Send + Sync` safe so it can be shared across threads.
3201pub struct Dialect {
3202 dialect_type: DialectType,
3203 tokenizer: Tokenizer,
3204 generator_config: Arc<GeneratorConfig>,
3205 transformer: Box<dyn Fn(Expression) -> Result<Expression> + Send + Sync>,
3206 /// Optional function to get expression-specific generator config (for hybrid dialects like Athena).
3207 generator_config_for_expr: Option<Box<dyn Fn(&Expression) -> GeneratorConfig + Send + Sync>>,
3208 /// Optional custom preprocessing function (overrides built-in preprocess for custom dialects).
3209 custom_preprocess: Option<Box<dyn Fn(Expression) -> Result<Expression> + Send + Sync>>,
3210}
3211
3212/// Options for [`Dialect::transpile_with`].
3213///
3214/// Use [`TranspileOptions::default`] for defaults, then tweak the fields you need.
3215/// The struct is marked `#[non_exhaustive]` so new fields can be added without
3216/// breaking the API.
3217///
3218/// The struct derives `Serialize`/`Deserialize` using camelCase field names so
3219/// it can be round-tripped over JSON bridges (C FFI, WASM) without mapping.
3220#[derive(Debug, Clone, Default, Serialize, Deserialize)]
3221#[serde(rename_all = "camelCase", default)]
3222#[non_exhaustive]
3223pub struct TranspileOptions {
3224 /// Whether to pretty-print the output SQL.
3225 pub pretty: bool,
3226}
3227
3228impl TranspileOptions {
3229 /// Construct options with pretty-printing enabled.
3230 pub fn pretty() -> Self {
3231 Self { pretty: true }
3232 }
3233}
3234
3235/// A value that can be used as the target dialect in [`Dialect::transpile`] /
3236/// [`Dialect::transpile_with`].
3237///
3238/// Implemented for [`DialectType`] (built-in dialect enum) and `&Dialect` (any
3239/// dialect handle, including custom ones). End users do not normally need to
3240/// implement this trait themselves.
3241pub trait TranspileTarget {
3242 /// Invoke `f` with a reference to the resolved target dialect.
3243 fn with_dialect<R>(self, f: impl FnOnce(&Dialect) -> R) -> R;
3244}
3245
3246impl TranspileTarget for DialectType {
3247 fn with_dialect<R>(self, f: impl FnOnce(&Dialect) -> R) -> R {
3248 f(&Dialect::get(self))
3249 }
3250}
3251
3252impl TranspileTarget for &Dialect {
3253 fn with_dialect<R>(self, f: impl FnOnce(&Dialect) -> R) -> R {
3254 f(self)
3255 }
3256}
3257
3258impl Dialect {
3259 /// Creates a fully configured [`Dialect`] instance for the given [`DialectType`].
3260 ///
3261 /// This is the primary constructor. It initializes the tokenizer, generator config,
3262 /// and expression transformer based on the dialect's [`DialectImpl`] implementation.
3263 /// For hybrid dialects like Athena, it also sets up expression-specific generator
3264 /// config routing.
3265 pub fn get(dialect_type: DialectType) -> Self {
3266 let (tokenizer_config, generator_config, transformer) =
3267 configs_for_dialect_type(dialect_type);
3268
3269 // Set up expression-specific generator config for hybrid dialects
3270 let generator_config_for_expr: Option<
3271 Box<dyn Fn(&Expression) -> GeneratorConfig + Send + Sync>,
3272 > = match dialect_type {
3273 #[cfg(feature = "dialect-athena")]
3274 DialectType::Athena => Some(Box::new(|expr| {
3275 AthenaDialect.generator_config_for_expr(expr)
3276 })),
3277 _ => None,
3278 };
3279
3280 Self {
3281 dialect_type,
3282 tokenizer: Tokenizer::new(tokenizer_config),
3283 generator_config,
3284 transformer,
3285 generator_config_for_expr,
3286 custom_preprocess: None,
3287 }
3288 }
3289
3290 /// Look up a dialect by string name.
3291 ///
3292 /// Checks built-in dialect names first (via [`DialectType::from_str`]), then
3293 /// falls back to the custom dialect registry. Returns `None` if no dialect
3294 /// with the given name exists.
3295 pub fn get_by_name(name: &str) -> Option<Self> {
3296 // Try built-in first
3297 if let Ok(dt) = DialectType::from_str(name) {
3298 return Some(Self::get(dt));
3299 }
3300
3301 // Try custom registry
3302 let config = get_custom_dialect_config(name)?;
3303 Some(Self::from_custom_config(&config))
3304 }
3305
3306 /// Construct a `Dialect` from a custom dialect configuration.
3307 fn from_custom_config(config: &CustomDialectConfig) -> Self {
3308 // Build the transformer: use custom if provided, else use base dialect's
3309 let transformer: Box<dyn Fn(Expression) -> Result<Expression> + Send + Sync> =
3310 if let Some(ref custom_transform) = config.transform {
3311 let t = Arc::clone(custom_transform);
3312 Box::new(move |e| t(e))
3313 } else {
3314 let (_, _, base_transform) = configs_for_dialect_type(config.base_dialect);
3315 base_transform
3316 };
3317
3318 // Build the custom preprocess: use custom if provided
3319 let custom_preprocess: Option<Box<dyn Fn(Expression) -> Result<Expression> + Send + Sync>> =
3320 config.preprocess.as_ref().map(|p| {
3321 let p = Arc::clone(p);
3322 Box::new(move |e: Expression| p(e))
3323 as Box<dyn Fn(Expression) -> Result<Expression> + Send + Sync>
3324 });
3325
3326 Self {
3327 dialect_type: config.base_dialect,
3328 tokenizer: Tokenizer::new(config.tokenizer_config.clone()),
3329 generator_config: Arc::new(config.generator_config.clone()),
3330 transformer,
3331 generator_config_for_expr: None,
3332 custom_preprocess,
3333 }
3334 }
3335
3336 /// Get the dialect type
3337 pub fn dialect_type(&self) -> DialectType {
3338 self.dialect_type
3339 }
3340
3341 /// Get the generator configuration
3342 pub fn generator_config(&self) -> &GeneratorConfig {
3343 &self.generator_config
3344 }
3345
3346 /// Parses a SQL string into a list of [`Expression`] AST nodes.
3347 ///
3348 /// The input may contain multiple semicolon-separated statements; each one
3349 /// produces a separate element in the returned vector. Tokenization uses
3350 /// this dialect's configured tokenizer, and parsing uses the dialect-aware parser.
3351 pub fn parse(&self, sql: &str) -> Result<Vec<Expression>> {
3352 let tokens = self.tokenizer.tokenize(sql)?;
3353 let config = crate::parser::ParserConfig {
3354 dialect: Some(self.dialect_type),
3355 ..Default::default()
3356 };
3357 let mut parser = Parser::with_source(tokens, config, sql.to_string());
3358 parser.parse()
3359 }
3360
3361 /// Tokenize SQL using this dialect's tokenizer configuration.
3362 pub fn tokenize(&self, sql: &str) -> Result<Vec<Token>> {
3363 self.tokenizer.tokenize(sql)
3364 }
3365
3366 /// Get the generator config for a specific expression (supports hybrid dialects).
3367 /// Returns an owned `GeneratorConfig` suitable for mutation before generation.
3368 fn get_config_for_expr(&self, expr: &Expression) -> GeneratorConfig {
3369 if let Some(ref config_fn) = self.generator_config_for_expr {
3370 config_fn(expr)
3371 } else {
3372 (*self.generator_config).clone()
3373 }
3374 }
3375
3376 /// Generates a SQL string from an [`Expression`] AST node.
3377 ///
3378 /// The output uses this dialect's generator configuration for identifier quoting,
3379 /// keyword casing, function name normalization, and syntax style. The result is
3380 /// a single-line (non-pretty) SQL string.
3381 pub fn generate(&self, expr: &Expression) -> Result<String> {
3382 // Fast path: when no per-expression config override, share the Arc cheaply.
3383 if self.generator_config_for_expr.is_none() {
3384 let mut generator = Generator::with_arc_config(self.generator_config.clone());
3385 return generator.generate(expr);
3386 }
3387 let config = self.get_config_for_expr(expr);
3388 let mut generator = Generator::with_config(config);
3389 generator.generate(expr)
3390 }
3391
3392 /// Generate SQL from an expression with pretty printing enabled
3393 pub fn generate_pretty(&self, expr: &Expression) -> Result<String> {
3394 let mut config = self.get_config_for_expr(expr);
3395 config.pretty = true;
3396 let mut generator = Generator::with_config(config);
3397 generator.generate(expr)
3398 }
3399
3400 /// Generate SQL from an expression with source dialect info (for transpilation)
3401 pub fn generate_with_source(&self, expr: &Expression, source: DialectType) -> Result<String> {
3402 let mut config = self.get_config_for_expr(expr);
3403 config.source_dialect = Some(source);
3404 let mut generator = Generator::with_config(config);
3405 generator.generate(expr)
3406 }
3407
3408 /// Generate SQL from an expression with pretty printing and source dialect info
3409 pub fn generate_pretty_with_source(
3410 &self,
3411 expr: &Expression,
3412 source: DialectType,
3413 ) -> Result<String> {
3414 let mut config = self.get_config_for_expr(expr);
3415 config.pretty = true;
3416 config.source_dialect = Some(source);
3417 let mut generator = Generator::with_config(config);
3418 generator.generate(expr)
3419 }
3420
3421 /// Generate SQL from an expression with forced identifier quoting (identify=True)
3422 pub fn generate_with_identify(&self, expr: &Expression) -> Result<String> {
3423 let mut config = self.get_config_for_expr(expr);
3424 config.always_quote_identifiers = true;
3425 let mut generator = Generator::with_config(config);
3426 generator.generate(expr)
3427 }
3428
3429 /// Generate SQL from an expression with pretty printing and forced identifier quoting
3430 pub fn generate_pretty_with_identify(&self, expr: &Expression) -> Result<String> {
3431 let mut config = (*self.generator_config).clone();
3432 config.pretty = true;
3433 config.always_quote_identifiers = true;
3434 let mut generator = Generator::with_config(config);
3435 generator.generate(expr)
3436 }
3437
3438 /// Generate SQL from an expression with caller-specified config overrides
3439 pub fn generate_with_overrides(
3440 &self,
3441 expr: &Expression,
3442 overrides: impl FnOnce(&mut GeneratorConfig),
3443 ) -> Result<String> {
3444 let mut config = self.get_config_for_expr(expr);
3445 overrides(&mut config);
3446 let mut generator = Generator::with_config(config);
3447 generator.generate(expr)
3448 }
3449
3450 /// Transforms an expression tree to conform to this dialect's syntax and semantics.
3451 ///
3452 /// The transformation proceeds in two phases:
3453 /// 1. **Preprocessing** -- whole-tree structural rewrites such as eliminating QUALIFY,
3454 /// ensuring boolean predicates, or converting DISTINCT ON to a window-function pattern.
3455 /// 2. **Recursive per-node transform** -- a bottom-up pass via [`transform_recursive`]
3456 /// that applies this dialect's [`DialectImpl::transform_expr`] to every node.
3457 ///
3458 /// This method is used both during transpilation (to rewrite an AST for a target dialect)
3459 /// and for identity transforms (normalizing SQL within the same dialect).
3460 pub fn transform(&self, expr: Expression) -> Result<Expression> {
3461 // Apply preprocessing transforms based on dialect
3462 let preprocessed = self.preprocess(expr)?;
3463 // Then apply recursive transformation
3464 transform_recursive(preprocessed, &self.transformer)
3465 }
3466
3467 /// Apply dialect-specific preprocessing transforms
3468 fn preprocess(&self, expr: Expression) -> Result<Expression> {
3469 // If a custom preprocess function is set, use it instead of the built-in logic
3470 if let Some(ref custom_preprocess) = self.custom_preprocess {
3471 return custom_preprocess(expr);
3472 }
3473
3474 #[cfg(any(
3475 feature = "dialect-mysql",
3476 feature = "dialect-postgresql",
3477 feature = "dialect-bigquery",
3478 feature = "dialect-snowflake",
3479 feature = "dialect-tsql",
3480 feature = "dialect-spark",
3481 feature = "dialect-databricks",
3482 feature = "dialect-hive",
3483 feature = "dialect-sqlite",
3484 feature = "dialect-trino",
3485 feature = "dialect-presto",
3486 feature = "dialect-duckdb",
3487 feature = "dialect-redshift",
3488 feature = "dialect-starrocks",
3489 feature = "dialect-oracle",
3490 feature = "dialect-clickhouse",
3491 ))]
3492 use crate::transforms;
3493
3494 match self.dialect_type {
3495 // MySQL doesn't support QUALIFY, DISTINCT ON, FULL OUTER JOIN
3496 // MySQL doesn't natively support GENERATE_DATE_ARRAY (expand to recursive CTE)
3497 #[cfg(feature = "dialect-mysql")]
3498 DialectType::MySQL => {
3499 let expr = transforms::eliminate_qualify(expr)?;
3500 let expr = transforms::eliminate_full_outer_join(expr)?;
3501 let expr = transforms::eliminate_semi_and_anti_joins(expr)?;
3502 let expr = transforms::unnest_generate_date_array_using_recursive_cte(expr)?;
3503 Ok(expr)
3504 }
3505 // PostgreSQL doesn't support QUALIFY
3506 // PostgreSQL: UNNEST(GENERATE_SERIES) -> subquery wrapping
3507 // PostgreSQL: Normalize SET ... TO to SET ... = in CREATE FUNCTION
3508 #[cfg(feature = "dialect-postgresql")]
3509 DialectType::PostgreSQL => {
3510 let expr = transforms::eliminate_qualify(expr)?;
3511 let expr = transforms::eliminate_semi_and_anti_joins(expr)?;
3512 let expr = transforms::unwrap_unnest_generate_series_for_postgres(expr)?;
3513 // Normalize SET ... TO to SET ... = in CREATE FUNCTION
3514 // Only normalize when sqlglot would fully parse (no body) —
3515 // sqlglot falls back to Command for complex function bodies,
3516 // preserving the original text including TO.
3517 let expr = if let Expression::CreateFunction(mut cf) = expr {
3518 if cf.body.is_none() {
3519 for opt in &mut cf.set_options {
3520 if let crate::expressions::FunctionSetValue::Value { use_to, .. } =
3521 &mut opt.value
3522 {
3523 *use_to = false;
3524 }
3525 }
3526 }
3527 Expression::CreateFunction(cf)
3528 } else {
3529 expr
3530 };
3531 Ok(expr)
3532 }
3533 // BigQuery doesn't support DISTINCT ON or CTE column aliases
3534 #[cfg(feature = "dialect-bigquery")]
3535 DialectType::BigQuery => {
3536 let expr = transforms::eliminate_semi_and_anti_joins(expr)?;
3537 let expr = transforms::pushdown_cte_column_names(expr)?;
3538 let expr = transforms::explode_projection_to_unnest(expr, DialectType::BigQuery)?;
3539 Ok(expr)
3540 }
3541 // Snowflake
3542 #[cfg(feature = "dialect-snowflake")]
3543 DialectType::Snowflake => {
3544 let expr = transforms::eliminate_semi_and_anti_joins(expr)?;
3545 let expr = transforms::eliminate_window_clause(expr)?;
3546 let expr = transforms::snowflake_flatten_projection_to_unnest(expr)?;
3547 Ok(expr)
3548 }
3549 // TSQL doesn't support QUALIFY
3550 // TSQL requires boolean expressions in WHERE/HAVING (no implicit truthiness)
3551 // TSQL doesn't support CTEs in subqueries (hoist to top level)
3552 // NOTE: no_limit_order_by_union is handled in cross_dialect_normalize (not preprocess)
3553 // to avoid breaking TSQL identity tests where ORDER BY on UNION is valid
3554 #[cfg(feature = "dialect-tsql")]
3555 DialectType::TSQL => {
3556 let expr = transforms::eliminate_qualify(expr)?;
3557 let expr = transforms::eliminate_semi_and_anti_joins(expr)?;
3558 let expr = transforms::ensure_bools(expr)?;
3559 let expr = transforms::unnest_generate_date_array_using_recursive_cte(expr)?;
3560 let expr = transforms::move_ctes_to_top_level(expr)?;
3561 let expr = transforms::qualify_derived_table_outputs(expr)?;
3562 Ok(expr)
3563 }
3564 // Spark doesn't support QUALIFY (but Databricks does)
3565 // Spark doesn't support CTEs in subqueries (hoist to top level)
3566 #[cfg(feature = "dialect-spark")]
3567 DialectType::Spark => {
3568 let expr = transforms::eliminate_qualify(expr)?;
3569 let expr = transforms::add_auto_table_alias(expr)?;
3570 let expr = transforms::simplify_nested_paren_values(expr)?;
3571 let expr = transforms::move_ctes_to_top_level(expr)?;
3572 Ok(expr)
3573 }
3574 // Databricks supports QUALIFY natively
3575 // Databricks doesn't support CTEs in subqueries (hoist to top level)
3576 #[cfg(feature = "dialect-databricks")]
3577 DialectType::Databricks => {
3578 let expr = transforms::add_auto_table_alias(expr)?;
3579 let expr = transforms::simplify_nested_paren_values(expr)?;
3580 let expr = transforms::move_ctes_to_top_level(expr)?;
3581 Ok(expr)
3582 }
3583 // Hive doesn't support QUALIFY or CTEs in subqueries
3584 #[cfg(feature = "dialect-hive")]
3585 DialectType::Hive => {
3586 let expr = transforms::eliminate_qualify(expr)?;
3587 let expr = transforms::move_ctes_to_top_level(expr)?;
3588 Ok(expr)
3589 }
3590 // SQLite doesn't support QUALIFY
3591 #[cfg(feature = "dialect-sqlite")]
3592 DialectType::SQLite => {
3593 let expr = transforms::eliminate_qualify(expr)?;
3594 Ok(expr)
3595 }
3596 // Trino doesn't support QUALIFY
3597 #[cfg(feature = "dialect-trino")]
3598 DialectType::Trino => {
3599 let expr = transforms::eliminate_qualify(expr)?;
3600 let expr = transforms::explode_projection_to_unnest(expr, DialectType::Trino)?;
3601 Ok(expr)
3602 }
3603 // Presto doesn't support QUALIFY or WINDOW clause
3604 #[cfg(feature = "dialect-presto")]
3605 DialectType::Presto => {
3606 let expr = transforms::eliminate_qualify(expr)?;
3607 let expr = transforms::eliminate_window_clause(expr)?;
3608 let expr = transforms::explode_projection_to_unnest(expr, DialectType::Presto)?;
3609 Ok(expr)
3610 }
3611 // DuckDB supports QUALIFY - no elimination needed
3612 // Expand POSEXPLODE to GENERATE_SUBSCRIPTS + UNNEST
3613 // Expand LIKE ANY / ILIKE ANY to OR chains (DuckDB doesn't support quantifiers)
3614 #[cfg(feature = "dialect-duckdb")]
3615 DialectType::DuckDB => {
3616 let expr = transforms::expand_posexplode_duckdb(expr)?;
3617 let expr = transforms::expand_like_any(expr)?;
3618 Ok(expr)
3619 }
3620 // Redshift doesn't support QUALIFY, WINDOW clause, or GENERATE_DATE_ARRAY
3621 #[cfg(feature = "dialect-redshift")]
3622 DialectType::Redshift => {
3623 let expr = transforms::eliminate_qualify(expr)?;
3624 let expr = transforms::eliminate_window_clause(expr)?;
3625 let expr = transforms::unnest_generate_date_array_using_recursive_cte(expr)?;
3626 Ok(expr)
3627 }
3628 // StarRocks doesn't support BETWEEN in DELETE statements or QUALIFY
3629 #[cfg(feature = "dialect-starrocks")]
3630 DialectType::StarRocks => {
3631 let expr = transforms::eliminate_qualify(expr)?;
3632 let expr = transforms::expand_between_in_delete(expr)?;
3633 let expr = transforms::eliminate_distinct_on_for_dialect(
3634 expr,
3635 Some(DialectType::StarRocks),
3636 Some(DialectType::StarRocks),
3637 )?;
3638 let expr = transforms::unnest_generate_date_array_using_recursive_cte(expr)?;
3639 Ok(expr)
3640 }
3641 // DataFusion supports QUALIFY and semi/anti joins natively
3642 #[cfg(feature = "dialect-datafusion")]
3643 DialectType::DataFusion => Ok(expr),
3644 // Oracle doesn't support QUALIFY
3645 #[cfg(feature = "dialect-oracle")]
3646 DialectType::Oracle => {
3647 let expr = transforms::eliminate_qualify(expr)?;
3648 Ok(expr)
3649 }
3650 // Drill - no special preprocessing needed
3651 #[cfg(feature = "dialect-drill")]
3652 DialectType::Drill => Ok(expr),
3653 // Teradata - no special preprocessing needed
3654 #[cfg(feature = "dialect-teradata")]
3655 DialectType::Teradata => Ok(expr),
3656 // ClickHouse doesn't support ORDER BY/LIMIT directly on UNION
3657 #[cfg(feature = "dialect-clickhouse")]
3658 DialectType::ClickHouse => {
3659 let expr = transforms::no_limit_order_by_union(expr)?;
3660 Ok(expr)
3661 }
3662 // Other dialects - no preprocessing
3663 _ => Ok(expr),
3664 }
3665 }
3666
3667 /// Transpile SQL from this dialect to the given target dialect.
3668 ///
3669 /// The target may be specified as either a built-in [`DialectType`] enum variant
3670 /// or as a reference to a [`Dialect`] handle (built-in or custom). Both work:
3671 ///
3672 /// ```rust,ignore
3673 /// let pg = Dialect::get(DialectType::PostgreSQL);
3674 /// pg.transpile("SELECT NOW()", DialectType::BigQuery)?; // enum
3675 /// pg.transpile("SELECT NOW()", &custom_dialect)?; // handle
3676 /// ```
3677 ///
3678 /// For pretty-printing or other options, use [`transpile_with`](Self::transpile_with).
3679 pub fn transpile<T: TranspileTarget>(&self, sql: &str, target: T) -> Result<Vec<String>> {
3680 self.transpile_with(sql, target, TranspileOptions::default())
3681 }
3682
3683 /// Transpile SQL with configurable [`TranspileOptions`] (e.g. pretty-printing).
3684 pub fn transpile_with<T: TranspileTarget>(
3685 &self,
3686 sql: &str,
3687 target: T,
3688 opts: TranspileOptions,
3689 ) -> Result<Vec<String>> {
3690 target.with_dialect(|td| self.transpile_inner(sql, td, opts.pretty))
3691 }
3692
3693 #[cfg(not(feature = "transpile"))]
3694 fn transpile_inner(
3695 &self,
3696 sql: &str,
3697 target_dialect: &Dialect,
3698 pretty: bool,
3699 ) -> Result<Vec<String>> {
3700 let target = target_dialect.dialect_type;
3701 // Without the transpile feature, only same-dialect or to/from generic is supported
3702 if self.dialect_type != target
3703 && self.dialect_type != DialectType::Generic
3704 && target != DialectType::Generic
3705 {
3706 return Err(crate::error::Error::parse(
3707 "Cross-dialect transpilation not available in this build",
3708 0,
3709 0,
3710 0,
3711 0,
3712 ));
3713 }
3714
3715 let expressions = self.parse(sql)?;
3716 let generic_identity =
3717 self.dialect_type == DialectType::Generic && target == DialectType::Generic;
3718
3719 if generic_identity {
3720 return expressions
3721 .into_iter()
3722 .map(|expr| {
3723 if pretty {
3724 target_dialect.generate_pretty_with_source(&expr, self.dialect_type)
3725 } else {
3726 target_dialect.generate_with_source(&expr, self.dialect_type)
3727 }
3728 })
3729 .collect();
3730 }
3731
3732 expressions
3733 .into_iter()
3734 .map(|expr| {
3735 let transformed = target_dialect.transform(expr)?;
3736 if pretty {
3737 target_dialect.generate_pretty_with_source(&transformed, self.dialect_type)
3738 } else {
3739 target_dialect.generate_with_source(&transformed, self.dialect_type)
3740 }
3741 })
3742 .collect()
3743 }
3744
3745 #[cfg(feature = "transpile")]
3746 fn transpile_inner(
3747 &self,
3748 sql: &str,
3749 target_dialect: &Dialect,
3750 pretty: bool,
3751 ) -> Result<Vec<String>> {
3752 let target = target_dialect.dialect_type;
3753 let expressions = self.parse(sql)?;
3754 let generic_identity =
3755 self.dialect_type == DialectType::Generic && target == DialectType::Generic;
3756
3757 if generic_identity {
3758 return expressions
3759 .into_iter()
3760 .map(|expr| {
3761 if pretty {
3762 target_dialect.generate_pretty_with_source(&expr, self.dialect_type)
3763 } else {
3764 target_dialect.generate_with_source(&expr, self.dialect_type)
3765 }
3766 })
3767 .collect();
3768 }
3769
3770 expressions
3771 .into_iter()
3772 .map(|expr| {
3773 // DuckDB source: normalize VARCHAR/CHAR to TEXT (DuckDB doesn't support
3774 // VARCHAR length constraints). This emulates Python sqlglot's DuckDB parser
3775 // where VARCHAR_LENGTH = None and VARCHAR maps to TEXT.
3776 let expr = if matches!(self.dialect_type, DialectType::DuckDB) {
3777 use crate::expressions::DataType as DT;
3778 transform_recursive(expr, &|e| match e {
3779 Expression::DataType(DT::VarChar { .. }) => {
3780 Ok(Expression::DataType(DT::Text))
3781 }
3782 Expression::DataType(DT::Char { .. }) => Ok(Expression::DataType(DT::Text)),
3783 _ => Ok(e),
3784 })?
3785 } else {
3786 expr
3787 };
3788
3789 // When source and target differ, first normalize the source dialect's
3790 // AST constructs to standard SQL, so that the target dialect can handle them.
3791 // This handles cases like Snowflake's SQUARE -> POWER, DIV0 -> CASE, etc.
3792 let normalized =
3793 if self.dialect_type != target && self.dialect_type != DialectType::Generic {
3794 self.transform(expr)?
3795 } else {
3796 expr
3797 };
3798
3799 // For TSQL source targeting non-TSQL: unwrap ISNULL(JSON_QUERY(...), JSON_VALUE(...))
3800 // to just JSON_QUERY(...) so cross_dialect_normalize can convert it cleanly.
3801 // The TSQL read transform wraps JsonQuery in ISNULL for identity, but for
3802 // cross-dialect transpilation we need the unwrapped JSON_QUERY.
3803 let normalized =
3804 if matches!(self.dialect_type, DialectType::TSQL | DialectType::Fabric)
3805 && !matches!(target, DialectType::TSQL | DialectType::Fabric)
3806 {
3807 transform_recursive(normalized, &|e| {
3808 if let Expression::Function(ref f) = e {
3809 if f.name.eq_ignore_ascii_case("ISNULL") && f.args.len() == 2 {
3810 // Check if first arg is JSON_QUERY and second is JSON_VALUE
3811 if let (
3812 Expression::Function(ref jq),
3813 Expression::Function(ref jv),
3814 ) = (&f.args[0], &f.args[1])
3815 {
3816 if jq.name.eq_ignore_ascii_case("JSON_QUERY")
3817 && jv.name.eq_ignore_ascii_case("JSON_VALUE")
3818 {
3819 // Unwrap: return just JSON_QUERY(...)
3820 return Ok(f.args[0].clone());
3821 }
3822 }
3823 }
3824 }
3825 Ok(e)
3826 })?
3827 } else {
3828 normalized
3829 };
3830
3831 // Snowflake source to non-Snowflake target: CURRENT_TIME -> LOCALTIME
3832 // Snowflake's CURRENT_TIME is equivalent to LOCALTIME in other dialects.
3833 // Python sqlglot parses Snowflake's CURRENT_TIME as Localtime expression.
3834 let normalized = if matches!(self.dialect_type, DialectType::Snowflake)
3835 && !matches!(target, DialectType::Snowflake)
3836 {
3837 transform_recursive(normalized, &|e| {
3838 if let Expression::Function(ref f) = e {
3839 if f.name.eq_ignore_ascii_case("CURRENT_TIME") {
3840 return Ok(Expression::Localtime(Box::new(
3841 crate::expressions::Localtime { this: None },
3842 )));
3843 }
3844 }
3845 Ok(e)
3846 })?
3847 } else {
3848 normalized
3849 };
3850
3851 // Snowflake source to DuckDB target: REPEAT(' ', n) -> REPEAT(' ', CAST(n AS BIGINT))
3852 // Snowflake's SPACE(n) is converted to REPEAT(' ', n) by the Snowflake source
3853 // transform. DuckDB requires the count argument to be BIGINT.
3854 let normalized = if matches!(self.dialect_type, DialectType::Snowflake)
3855 && matches!(target, DialectType::DuckDB)
3856 {
3857 transform_recursive(normalized, &|e| {
3858 if let Expression::Function(ref f) = e {
3859 if f.name.eq_ignore_ascii_case("REPEAT") && f.args.len() == 2 {
3860 // Check if first arg is space string literal
3861 if let Expression::Literal(ref lit) = f.args[0] {
3862 if let crate::expressions::Literal::String(ref s) = lit.as_ref()
3863 {
3864 if s == " " {
3865 // Wrap second arg in CAST(... AS BIGINT) if not already
3866 if !matches!(f.args[1], Expression::Cast(_)) {
3867 let mut new_args = f.args.clone();
3868 new_args[1] = Expression::Cast(Box::new(
3869 crate::expressions::Cast {
3870 this: new_args[1].clone(),
3871 to: crate::expressions::DataType::BigInt {
3872 length: None,
3873 },
3874 trailing_comments: Vec::new(),
3875 double_colon_syntax: false,
3876 format: None,
3877 default: None,
3878 inferred_type: None,
3879 },
3880 ));
3881 return Ok(Expression::Function(Box::new(
3882 crate::expressions::Function {
3883 name: f.name.clone(),
3884 args: new_args,
3885 distinct: f.distinct,
3886 trailing_comments: f
3887 .trailing_comments
3888 .clone(),
3889 use_bracket_syntax: f.use_bracket_syntax,
3890 no_parens: f.no_parens,
3891 quoted: f.quoted,
3892 span: None,
3893 inferred_type: None,
3894 },
3895 )));
3896 }
3897 }
3898 }
3899 }
3900 }
3901 }
3902 Ok(e)
3903 })?
3904 } else {
3905 normalized
3906 };
3907
3908 // Propagate struct field names in arrays (for BigQuery source to non-BigQuery target)
3909 // BigQuery->BigQuery should NOT propagate names (BigQuery handles implicit inheritance)
3910 let normalized = if matches!(self.dialect_type, DialectType::BigQuery)
3911 && !matches!(target, DialectType::BigQuery)
3912 {
3913 crate::transforms::propagate_struct_field_names(normalized)?
3914 } else {
3915 normalized
3916 };
3917
3918 // Snowflake source to DuckDB target: RANDOM()/RANDOM(seed) -> scaled RANDOM()
3919 // Snowflake RANDOM() returns integer in [-2^63, 2^63-1], DuckDB RANDOM() returns float [0, 1)
3920 // Skip RANDOM inside UNIFORM/NORMAL/ZIPF/RANDSTR generator args since those
3921 // functions handle their generator args differently (as float seeds).
3922 let normalized = if matches!(self.dialect_type, DialectType::Snowflake)
3923 && matches!(target, DialectType::DuckDB)
3924 {
3925 fn make_scaled_random() -> Expression {
3926 let lower =
3927 Expression::Literal(Box::new(crate::expressions::Literal::Number(
3928 "-9.223372036854776E+18".to_string(),
3929 )));
3930 let upper =
3931 Expression::Literal(Box::new(crate::expressions::Literal::Number(
3932 "9.223372036854776e+18".to_string(),
3933 )));
3934 let random_call = Expression::Random(crate::expressions::Random);
3935 let range_size = Expression::Paren(Box::new(crate::expressions::Paren {
3936 this: Expression::Sub(Box::new(crate::expressions::BinaryOp {
3937 left: upper,
3938 right: lower.clone(),
3939 left_comments: vec![],
3940 operator_comments: vec![],
3941 trailing_comments: vec![],
3942 inferred_type: None,
3943 })),
3944 trailing_comments: vec![],
3945 }));
3946 let scaled = Expression::Mul(Box::new(crate::expressions::BinaryOp {
3947 left: random_call,
3948 right: range_size,
3949 left_comments: vec![],
3950 operator_comments: vec![],
3951 trailing_comments: vec![],
3952 inferred_type: None,
3953 }));
3954 let shifted = Expression::Add(Box::new(crate::expressions::BinaryOp {
3955 left: lower,
3956 right: scaled,
3957 left_comments: vec![],
3958 operator_comments: vec![],
3959 trailing_comments: vec![],
3960 inferred_type: None,
3961 }));
3962 Expression::Cast(Box::new(crate::expressions::Cast {
3963 this: shifted,
3964 to: crate::expressions::DataType::BigInt { length: None },
3965 trailing_comments: vec![],
3966 double_colon_syntax: false,
3967 format: None,
3968 default: None,
3969 inferred_type: None,
3970 }))
3971 }
3972
3973 // Pre-process: protect seeded RANDOM(seed) inside UNIFORM/NORMAL/ZIPF/RANDSTR
3974 // by converting Rand{seed: Some(s)} to Function{name:"RANDOM", args:[s]}.
3975 // This prevents transform_recursive (which is bottom-up) from expanding
3976 // seeded RANDOM into make_scaled_random() and losing the seed value.
3977 // Unseeded RANDOM()/Rand{seed:None} is left as-is so it gets expanded
3978 // and then un-expanded back to Expression::Random by the code below.
3979 let normalized = transform_recursive(normalized, &|e| {
3980 if let Expression::Function(ref f) = e {
3981 let n = f.name.to_ascii_uppercase();
3982 if n == "UNIFORM" || n == "NORMAL" || n == "ZIPF" || n == "RANDSTR" {
3983 if let Expression::Function(mut f) = e {
3984 for arg in f.args.iter_mut() {
3985 if let Expression::Rand(ref r) = arg {
3986 if r.lower.is_none() && r.upper.is_none() {
3987 if let Some(ref seed) = r.seed {
3988 // Convert Rand{seed: Some(s)} to Function("RANDOM", [s])
3989 // so it won't be expanded by the RANDOM expansion below
3990 *arg = Expression::Function(Box::new(
3991 crate::expressions::Function::new(
3992 "RANDOM".to_string(),
3993 vec![*seed.clone()],
3994 ),
3995 ));
3996 }
3997 }
3998 }
3999 }
4000 return Ok(Expression::Function(f));
4001 }
4002 }
4003 }
4004 Ok(e)
4005 })?;
4006
4007 // transform_recursive processes bottom-up, so RANDOM() (unseeded) inside
4008 // generator functions (UNIFORM, NORMAL, ZIPF) gets expanded before
4009 // we see the parent. We detect this and undo the expansion by replacing
4010 // the expanded pattern back with Expression::Random.
4011 // Seeded RANDOM(seed) was already protected above as Function("RANDOM", [seed]).
4012 // Note: RANDSTR is NOT included here — it needs the expanded form for unseeded
4013 // RANDOM() since the DuckDB handler uses the expanded SQL as-is in the hash.
4014 transform_recursive(normalized, &|e| {
4015 if let Expression::Function(ref f) = e {
4016 let n = f.name.to_ascii_uppercase();
4017 if n == "UNIFORM" || n == "NORMAL" || n == "ZIPF" {
4018 if let Expression::Function(mut f) = e {
4019 for arg in f.args.iter_mut() {
4020 // Detect expanded RANDOM pattern: CAST(-9.22... + RANDOM() * (...) AS BIGINT)
4021 if let Expression::Cast(ref cast) = arg {
4022 if matches!(
4023 cast.to,
4024 crate::expressions::DataType::BigInt { .. }
4025 ) {
4026 if let Expression::Add(ref add) = cast.this {
4027 if let Expression::Literal(ref lit) = add.left {
4028 if let crate::expressions::Literal::Number(
4029 ref num,
4030 ) = lit.as_ref()
4031 {
4032 if num == "-9.223372036854776E+18" {
4033 *arg = Expression::Random(
4034 crate::expressions::Random,
4035 );
4036 }
4037 }
4038 }
4039 }
4040 }
4041 }
4042 }
4043 return Ok(Expression::Function(f));
4044 }
4045 return Ok(e);
4046 }
4047 }
4048 match e {
4049 Expression::Random(_) => Ok(make_scaled_random()),
4050 // Rand(seed) with no bounds: drop seed and expand
4051 // (DuckDB RANDOM doesn't support seeds)
4052 Expression::Rand(ref r) if r.lower.is_none() && r.upper.is_none() => {
4053 Ok(make_scaled_random())
4054 }
4055 _ => Ok(e),
4056 }
4057 })?
4058 } else {
4059 normalized
4060 };
4061
4062 // Apply cross-dialect semantic normalizations
4063 let normalized =
4064 Self::cross_dialect_normalize(normalized, self.dialect_type, target)?;
4065
4066 // For DuckDB target from BigQuery source: wrap UNNEST of struct arrays in
4067 // (SELECT UNNEST(..., max_depth => 2)) subquery
4068 // Must run BEFORE unnest_alias_to_column_alias since it changes alias structure
4069 let normalized = if matches!(self.dialect_type, DialectType::BigQuery)
4070 && matches!(target, DialectType::DuckDB)
4071 {
4072 crate::transforms::wrap_duckdb_unnest_struct(normalized)?
4073 } else {
4074 normalized
4075 };
4076
4077 // Convert BigQuery UNNEST aliases to column-alias format for DuckDB/Presto/Spark
4078 // UNNEST(arr) AS x -> UNNEST(arr) AS _t0(x)
4079 let normalized = if matches!(self.dialect_type, DialectType::BigQuery)
4080 && matches!(
4081 target,
4082 DialectType::DuckDB
4083 | DialectType::Presto
4084 | DialectType::Trino
4085 | DialectType::Athena
4086 | DialectType::Spark
4087 | DialectType::Databricks
4088 ) {
4089 crate::transforms::unnest_alias_to_column_alias(normalized)?
4090 } else if matches!(self.dialect_type, DialectType::BigQuery)
4091 && matches!(target, DialectType::BigQuery | DialectType::Redshift)
4092 {
4093 // For BigQuery/Redshift targets: move UNNEST FROM items to CROSS JOINs
4094 // but don't convert alias format (no _t0 wrapper)
4095 let result = crate::transforms::unnest_from_to_cross_join(normalized)?;
4096 // For Redshift: strip UNNEST when arg is a column reference path
4097 if matches!(target, DialectType::Redshift) {
4098 crate::transforms::strip_unnest_column_refs(result)?
4099 } else {
4100 result
4101 }
4102 } else {
4103 normalized
4104 };
4105
4106 // For Presto/Trino targets from PostgreSQL/Redshift source:
4107 // Wrap UNNEST aliases from GENERATE_SERIES conversion: AS s -> AS _u(s)
4108 let normalized = if matches!(
4109 self.dialect_type,
4110 DialectType::PostgreSQL | DialectType::Redshift
4111 ) && matches!(
4112 target,
4113 DialectType::Presto | DialectType::Trino | DialectType::Athena
4114 ) {
4115 crate::transforms::wrap_unnest_join_aliases(normalized)?
4116 } else {
4117 normalized
4118 };
4119
4120 // Eliminate DISTINCT ON with target-dialect awareness
4121 // This must happen after source transform (which may produce DISTINCT ON)
4122 // and before target transform, with knowledge of the target dialect's NULL ordering behavior
4123 let normalized = crate::transforms::eliminate_distinct_on_for_dialect(
4124 normalized,
4125 Some(target),
4126 Some(self.dialect_type),
4127 )?;
4128
4129 // GENERATE_DATE_ARRAY in UNNEST -> Snowflake ARRAY_GENERATE_RANGE + DATEADD
4130 let normalized = if matches!(target, DialectType::Snowflake) {
4131 Self::transform_generate_date_array_snowflake(normalized)?
4132 } else {
4133 normalized
4134 };
4135
4136 // CROSS JOIN UNNEST -> LATERAL VIEW EXPLODE/INLINE for Spark/Hive/Databricks
4137 let normalized = if matches!(
4138 target,
4139 DialectType::Spark | DialectType::Databricks | DialectType::Hive
4140 ) {
4141 crate::transforms::unnest_to_explode_select(normalized)?
4142 } else {
4143 normalized
4144 };
4145
4146 // Wrap UNION with ORDER BY/LIMIT in a subquery for dialects that require it
4147 let normalized = if matches!(target, DialectType::ClickHouse | DialectType::TSQL) {
4148 crate::transforms::no_limit_order_by_union(normalized)?
4149 } else {
4150 normalized
4151 };
4152
4153 // TSQL: Convert COUNT(*) -> COUNT_BIG(*) when source is not TSQL/Fabric
4154 // Python sqlglot does this in the TSQL generator, but we can't do it there
4155 // because it would break TSQL -> TSQL identity
4156 let normalized = if matches!(target, DialectType::TSQL | DialectType::Fabric)
4157 && !matches!(self.dialect_type, DialectType::TSQL | DialectType::Fabric)
4158 {
4159 transform_recursive(normalized, &|e| {
4160 if let Expression::Count(ref c) = e {
4161 // Build COUNT_BIG(...) as an AggregateFunction
4162 let args = if c.star {
4163 vec![Expression::Star(crate::expressions::Star {
4164 table: None,
4165 except: None,
4166 replace: None,
4167 rename: None,
4168 trailing_comments: Vec::new(),
4169 span: None,
4170 })]
4171 } else if let Some(ref this) = c.this {
4172 vec![this.clone()]
4173 } else {
4174 vec![]
4175 };
4176 Ok(Expression::AggregateFunction(Box::new(
4177 crate::expressions::AggregateFunction {
4178 name: "COUNT_BIG".to_string(),
4179 args,
4180 distinct: c.distinct,
4181 filter: c.filter.clone(),
4182 order_by: Vec::new(),
4183 limit: None,
4184 ignore_nulls: None,
4185 inferred_type: None,
4186 },
4187 )))
4188 } else {
4189 Ok(e)
4190 }
4191 })?
4192 } else {
4193 normalized
4194 };
4195
4196 let transformed = target_dialect.transform(normalized)?;
4197
4198 // DuckDB target: when FROM is RANGE(n), replace SEQ's ROW_NUMBER pattern with `range`
4199 let transformed = if matches!(target, DialectType::DuckDB) {
4200 Self::seq_rownum_to_range(transformed)?
4201 } else {
4202 transformed
4203 };
4204
4205 let mut sql = if pretty {
4206 target_dialect.generate_pretty_with_source(&transformed, self.dialect_type)?
4207 } else {
4208 target_dialect.generate_with_source(&transformed, self.dialect_type)?
4209 };
4210
4211 // Align a known Snowflake pretty-print edge case with Python sqlglot output.
4212 if pretty && target == DialectType::Snowflake {
4213 sql = Self::normalize_snowflake_pretty(sql);
4214 }
4215
4216 Ok(sql)
4217 })
4218 .collect()
4219 }
4220}
4221
4222// Transpile-only methods: cross-dialect normalization and helpers
4223#[cfg(feature = "transpile")]
4224impl Dialect {
4225 /// For DuckDB target: when FROM clause contains RANGE(n), replace
4226 /// `(ROW_NUMBER() OVER (ORDER BY 1 NULLS FIRST) - 1)` with `range` in select expressions.
4227 /// This handles SEQ1/2/4/8 → RANGE transpilation from Snowflake.
4228 fn seq_rownum_to_range(expr: Expression) -> Result<Expression> {
4229 if let Expression::Select(mut select) = expr {
4230 // Check if FROM contains a RANGE function
4231 let has_range_from = if let Some(ref from) = select.from {
4232 from.expressions.iter().any(|e| {
4233 // Check for direct RANGE(...) or aliased RANGE(...)
4234 match e {
4235 Expression::Function(f) => f.name.eq_ignore_ascii_case("RANGE"),
4236 Expression::Alias(a) => {
4237 matches!(&a.this, Expression::Function(f) if f.name.eq_ignore_ascii_case("RANGE"))
4238 }
4239 _ => false,
4240 }
4241 })
4242 } else {
4243 false
4244 };
4245
4246 if has_range_from {
4247 // Replace the ROW_NUMBER pattern in select expressions
4248 select.expressions = select
4249 .expressions
4250 .into_iter()
4251 .map(|e| Self::replace_rownum_with_range(e))
4252 .collect();
4253 }
4254
4255 Ok(Expression::Select(select))
4256 } else {
4257 Ok(expr)
4258 }
4259 }
4260
4261 /// Replace `(ROW_NUMBER() OVER (...) - 1)` with `range` column reference
4262 fn replace_rownum_with_range(expr: Expression) -> Expression {
4263 match expr {
4264 // Match: (ROW_NUMBER() OVER (...) - 1) % N → range % N
4265 Expression::Mod(op) => {
4266 let new_left = Self::try_replace_rownum_paren(&op.left);
4267 Expression::Mod(Box::new(crate::expressions::BinaryOp {
4268 left: new_left,
4269 right: op.right,
4270 left_comments: op.left_comments,
4271 operator_comments: op.operator_comments,
4272 trailing_comments: op.trailing_comments,
4273 inferred_type: op.inferred_type,
4274 }))
4275 }
4276 // Match: (CASE WHEN (ROW...) % N >= ... THEN ... ELSE ... END)
4277 Expression::Paren(p) => {
4278 let inner = Self::replace_rownum_with_range(p.this);
4279 Expression::Paren(Box::new(crate::expressions::Paren {
4280 this: inner,
4281 trailing_comments: p.trailing_comments,
4282 }))
4283 }
4284 Expression::Case(mut c) => {
4285 // Replace ROW_NUMBER in WHEN conditions and THEN expressions
4286 c.whens = c
4287 .whens
4288 .into_iter()
4289 .map(|(cond, then)| {
4290 (
4291 Self::replace_rownum_with_range(cond),
4292 Self::replace_rownum_with_range(then),
4293 )
4294 })
4295 .collect();
4296 if let Some(else_) = c.else_ {
4297 c.else_ = Some(Self::replace_rownum_with_range(else_));
4298 }
4299 Expression::Case(c)
4300 }
4301 Expression::Gte(op) => Expression::Gte(Box::new(crate::expressions::BinaryOp {
4302 left: Self::replace_rownum_with_range(op.left),
4303 right: op.right,
4304 left_comments: op.left_comments,
4305 operator_comments: op.operator_comments,
4306 trailing_comments: op.trailing_comments,
4307 inferred_type: op.inferred_type,
4308 })),
4309 Expression::Sub(op) => Expression::Sub(Box::new(crate::expressions::BinaryOp {
4310 left: Self::replace_rownum_with_range(op.left),
4311 right: op.right,
4312 left_comments: op.left_comments,
4313 operator_comments: op.operator_comments,
4314 trailing_comments: op.trailing_comments,
4315 inferred_type: op.inferred_type,
4316 })),
4317 Expression::Alias(mut a) => {
4318 a.this = Self::replace_rownum_with_range(a.this);
4319 Expression::Alias(a)
4320 }
4321 other => other,
4322 }
4323 }
4324
4325 /// Check if an expression is `(ROW_NUMBER() OVER (...) - 1)` and replace with `range`
4326 fn try_replace_rownum_paren(expr: &Expression) -> Expression {
4327 if let Expression::Paren(ref p) = expr {
4328 if let Expression::Sub(ref sub) = p.this {
4329 if let Expression::WindowFunction(ref wf) = sub.left {
4330 if let Expression::Function(ref f) = wf.this {
4331 if f.name.eq_ignore_ascii_case("ROW_NUMBER") {
4332 if let Expression::Literal(ref lit) = sub.right {
4333 if let crate::expressions::Literal::Number(ref n) = lit.as_ref() {
4334 if n == "1" {
4335 return Expression::column("range");
4336 }
4337 }
4338 }
4339 }
4340 }
4341 }
4342 }
4343 }
4344 expr.clone()
4345 }
4346
4347 /// Transform BigQuery GENERATE_DATE_ARRAY in UNNEST for Snowflake target.
4348 /// Converts:
4349 /// SELECT ..., alias, ... FROM t CROSS JOIN UNNEST(GENERATE_DATE_ARRAY(start, end, INTERVAL '1' unit)) AS alias
4350 /// To:
4351 /// SELECT ..., DATEADD(unit, CAST(alias AS INT), CAST(start AS DATE)) AS alias, ...
4352 /// FROM t, LATERAL FLATTEN(INPUT => ARRAY_GENERATE_RANGE(0, DATEDIFF(unit, start, end) + 1)) AS _t0(seq, key, path, index, alias, this)
4353 fn transform_generate_date_array_snowflake(expr: Expression) -> Result<Expression> {
4354 use crate::expressions::*;
4355 transform_recursive(expr, &|e| {
4356 // Handle ARRAY_SIZE(GENERATE_DATE_ARRAY(...)) -> ARRAY_SIZE((SELECT ARRAY_AGG(*) FROM subquery))
4357 if let Expression::ArraySize(ref af) = e {
4358 if let Expression::Function(ref f) = af.this {
4359 if f.name.eq_ignore_ascii_case("GENERATE_DATE_ARRAY") && f.args.len() >= 2 {
4360 let result = Self::convert_array_size_gda_snowflake(f)?;
4361 return Ok(result);
4362 }
4363 }
4364 }
4365
4366 let Expression::Select(mut sel) = e else {
4367 return Ok(e);
4368 };
4369
4370 // Find joins with UNNEST containing GenerateSeries (from GENERATE_DATE_ARRAY conversion)
4371 let mut gda_info: Option<(String, Expression, Expression, String)> = None; // (alias_name, start_expr, end_expr, unit)
4372 let mut gda_join_idx: Option<usize> = None;
4373
4374 for (idx, join) in sel.joins.iter().enumerate() {
4375 // The join.this may be:
4376 // 1. Unnest(UnnestFunc { alias: Some("mnth"), ... })
4377 // 2. Alias(Alias { this: Unnest(UnnestFunc { alias: None, ... }), alias: "mnth", ... })
4378 let (unnest_ref, alias_name) = match &join.this {
4379 Expression::Unnest(ref unnest) => {
4380 let alias = unnest.alias.as_ref().map(|id| id.name.clone());
4381 (Some(unnest.as_ref()), alias)
4382 }
4383 Expression::Alias(ref a) => {
4384 if let Expression::Unnest(ref unnest) = a.this {
4385 (Some(unnest.as_ref()), Some(a.alias.name.clone()))
4386 } else {
4387 (None, None)
4388 }
4389 }
4390 _ => (None, None),
4391 };
4392
4393 if let (Some(unnest), Some(alias)) = (unnest_ref, alias_name) {
4394 // Check the main expression (this) of the UNNEST for GENERATE_DATE_ARRAY function
4395 if let Expression::Function(ref f) = unnest.this {
4396 if f.name.eq_ignore_ascii_case("GENERATE_DATE_ARRAY") && f.args.len() >= 2 {
4397 let start_expr = f.args[0].clone();
4398 let end_expr = f.args[1].clone();
4399 let step = f.args.get(2).cloned();
4400
4401 // Extract unit from step interval
4402 let unit = if let Some(Expression::Interval(ref iv)) = step {
4403 if let Some(IntervalUnitSpec::Simple { ref unit, .. }) = iv.unit {
4404 Some(format!("{:?}", unit).to_ascii_uppercase())
4405 } else if let Some(ref this) = iv.this {
4406 // The interval may be stored as a string like "1 MONTH"
4407 if let Expression::Literal(lit) = this {
4408 if let Literal::String(ref s) = lit.as_ref() {
4409 let parts: Vec<&str> = s.split_whitespace().collect();
4410 if parts.len() == 2 {
4411 Some(parts[1].to_ascii_uppercase())
4412 } else if parts.len() == 1 {
4413 // Single word like "MONTH" or just "1"
4414 let upper = parts[0].to_ascii_uppercase();
4415 if matches!(
4416 upper.as_str(),
4417 "YEAR"
4418 | "QUARTER"
4419 | "MONTH"
4420 | "WEEK"
4421 | "DAY"
4422 | "HOUR"
4423 | "MINUTE"
4424 | "SECOND"
4425 ) {
4426 Some(upper)
4427 } else {
4428 None
4429 }
4430 } else {
4431 None
4432 }
4433 } else {
4434 None
4435 }
4436 } else {
4437 None
4438 }
4439 } else {
4440 None
4441 }
4442 } else {
4443 None
4444 };
4445
4446 if let Some(unit_str) = unit {
4447 gda_info = Some((alias, start_expr, end_expr, unit_str));
4448 gda_join_idx = Some(idx);
4449 }
4450 }
4451 }
4452 }
4453 if gda_info.is_some() {
4454 break;
4455 }
4456 }
4457
4458 let Some((alias_name, start_expr, end_expr, unit_str)) = gda_info else {
4459 // Also check FROM clause for UNNEST(GENERATE_DATE_ARRAY(...)) patterns
4460 // This handles Generic->Snowflake where GENERATE_DATE_ARRAY is in FROM, not in JOIN
4461 let result = Self::try_transform_from_gda_snowflake(sel);
4462 return result;
4463 };
4464 let join_idx = gda_join_idx.unwrap();
4465
4466 // Build ARRAY_GENERATE_RANGE(0, DATEDIFF(unit, start, end) + 1)
4467 // ARRAY_GENERATE_RANGE uses exclusive end, and we need DATEDIFF + 1 values
4468 // (inclusive date range), so the exclusive end is DATEDIFF + 1.
4469 let datediff = Expression::Function(Box::new(Function::new(
4470 "DATEDIFF".to_string(),
4471 vec![
4472 Expression::boxed_column(Column {
4473 name: Identifier::new(&unit_str),
4474 table: None,
4475 join_mark: false,
4476 trailing_comments: vec![],
4477 span: None,
4478 inferred_type: None,
4479 }),
4480 start_expr.clone(),
4481 end_expr.clone(),
4482 ],
4483 )));
4484 let datediff_plus_one = Expression::Add(Box::new(BinaryOp {
4485 left: datediff,
4486 right: Expression::Literal(Box::new(Literal::Number("1".to_string()))),
4487 left_comments: vec![],
4488 operator_comments: vec![],
4489 trailing_comments: vec![],
4490 inferred_type: None,
4491 }));
4492
4493 let array_gen_range = Expression::Function(Box::new(Function::new(
4494 "ARRAY_GENERATE_RANGE".to_string(),
4495 vec![
4496 Expression::Literal(Box::new(Literal::Number("0".to_string()))),
4497 datediff_plus_one,
4498 ],
4499 )));
4500
4501 // Build FLATTEN(INPUT => ARRAY_GENERATE_RANGE(...))
4502 let flatten_input = Expression::NamedArgument(Box::new(NamedArgument {
4503 name: Identifier::new("INPUT"),
4504 value: array_gen_range,
4505 separator: crate::expressions::NamedArgSeparator::DArrow,
4506 }));
4507 let flatten = Expression::Function(Box::new(Function::new(
4508 "FLATTEN".to_string(),
4509 vec![flatten_input],
4510 )));
4511
4512 // Build LATERAL FLATTEN(...) AS _t0(seq, key, path, index, alias, this)
4513 let alias_table = Alias {
4514 this: flatten,
4515 alias: Identifier::new("_t0"),
4516 column_aliases: vec![
4517 Identifier::new("seq"),
4518 Identifier::new("key"),
4519 Identifier::new("path"),
4520 Identifier::new("index"),
4521 Identifier::new(&alias_name),
4522 Identifier::new("this"),
4523 ],
4524 pre_alias_comments: vec![],
4525 trailing_comments: vec![],
4526 inferred_type: None,
4527 };
4528 let lateral_expr = Expression::Lateral(Box::new(Lateral {
4529 this: Box::new(Expression::Alias(Box::new(alias_table))),
4530 view: None,
4531 outer: None,
4532 alias: None,
4533 alias_quoted: false,
4534 cross_apply: None,
4535 ordinality: None,
4536 column_aliases: vec![],
4537 }));
4538
4539 // Remove the original join and add to FROM expressions
4540 sel.joins.remove(join_idx);
4541 if let Some(ref mut from) = sel.from {
4542 from.expressions.push(lateral_expr);
4543 }
4544
4545 // Build DATEADD(unit, CAST(alias AS INT), CAST(start AS DATE))
4546 let dateadd_expr = Expression::Function(Box::new(Function::new(
4547 "DATEADD".to_string(),
4548 vec![
4549 Expression::boxed_column(Column {
4550 name: Identifier::new(&unit_str),
4551 table: None,
4552 join_mark: false,
4553 trailing_comments: vec![],
4554 span: None,
4555 inferred_type: None,
4556 }),
4557 Expression::Cast(Box::new(Cast {
4558 this: Expression::boxed_column(Column {
4559 name: Identifier::new(&alias_name),
4560 table: None,
4561 join_mark: false,
4562 trailing_comments: vec![],
4563 span: None,
4564 inferred_type: None,
4565 }),
4566 to: DataType::Int {
4567 length: None,
4568 integer_spelling: false,
4569 },
4570 trailing_comments: vec![],
4571 double_colon_syntax: false,
4572 format: None,
4573 default: None,
4574 inferred_type: None,
4575 })),
4576 Expression::Cast(Box::new(Cast {
4577 this: start_expr.clone(),
4578 to: DataType::Date,
4579 trailing_comments: vec![],
4580 double_colon_syntax: false,
4581 format: None,
4582 default: None,
4583 inferred_type: None,
4584 })),
4585 ],
4586 )));
4587
4588 // Replace references to the alias in the SELECT list
4589 let new_exprs: Vec<Expression> = sel
4590 .expressions
4591 .iter()
4592 .map(|expr| Self::replace_column_ref_with_dateadd(expr, &alias_name, &dateadd_expr))
4593 .collect();
4594 sel.expressions = new_exprs;
4595
4596 Ok(Expression::Select(sel))
4597 })
4598 }
4599
4600 /// Helper: replace column references to `alias_name` with dateadd expression
4601 fn replace_column_ref_with_dateadd(
4602 expr: &Expression,
4603 alias_name: &str,
4604 dateadd: &Expression,
4605 ) -> Expression {
4606 use crate::expressions::*;
4607 match expr {
4608 Expression::Column(c) if c.name.name == alias_name && c.table.is_none() => {
4609 // Plain column reference -> DATEADD(...) AS alias_name
4610 Expression::Alias(Box::new(Alias {
4611 this: dateadd.clone(),
4612 alias: Identifier::new(alias_name),
4613 column_aliases: vec![],
4614 pre_alias_comments: vec![],
4615 trailing_comments: vec![],
4616 inferred_type: None,
4617 }))
4618 }
4619 Expression::Alias(a) => {
4620 // Check if the inner expression references the alias
4621 let new_this = Self::replace_column_ref_inner(&a.this, alias_name, dateadd);
4622 Expression::Alias(Box::new(Alias {
4623 this: new_this,
4624 alias: a.alias.clone(),
4625 column_aliases: a.column_aliases.clone(),
4626 pre_alias_comments: a.pre_alias_comments.clone(),
4627 trailing_comments: a.trailing_comments.clone(),
4628 inferred_type: None,
4629 }))
4630 }
4631 _ => expr.clone(),
4632 }
4633 }
4634
4635 /// Helper: replace column references in inner expression (not top-level)
4636 fn replace_column_ref_inner(
4637 expr: &Expression,
4638 alias_name: &str,
4639 dateadd: &Expression,
4640 ) -> Expression {
4641 use crate::expressions::*;
4642 match expr {
4643 Expression::Column(c) if c.name.name == alias_name && c.table.is_none() => {
4644 dateadd.clone()
4645 }
4646 Expression::Add(op) => {
4647 let left = Self::replace_column_ref_inner(&op.left, alias_name, dateadd);
4648 let right = Self::replace_column_ref_inner(&op.right, alias_name, dateadd);
4649 Expression::Add(Box::new(BinaryOp {
4650 left,
4651 right,
4652 left_comments: op.left_comments.clone(),
4653 operator_comments: op.operator_comments.clone(),
4654 trailing_comments: op.trailing_comments.clone(),
4655 inferred_type: None,
4656 }))
4657 }
4658 Expression::Sub(op) => {
4659 let left = Self::replace_column_ref_inner(&op.left, alias_name, dateadd);
4660 let right = Self::replace_column_ref_inner(&op.right, alias_name, dateadd);
4661 Expression::Sub(Box::new(BinaryOp {
4662 left,
4663 right,
4664 left_comments: op.left_comments.clone(),
4665 operator_comments: op.operator_comments.clone(),
4666 trailing_comments: op.trailing_comments.clone(),
4667 inferred_type: None,
4668 }))
4669 }
4670 Expression::Mul(op) => {
4671 let left = Self::replace_column_ref_inner(&op.left, alias_name, dateadd);
4672 let right = Self::replace_column_ref_inner(&op.right, alias_name, dateadd);
4673 Expression::Mul(Box::new(BinaryOp {
4674 left,
4675 right,
4676 left_comments: op.left_comments.clone(),
4677 operator_comments: op.operator_comments.clone(),
4678 trailing_comments: op.trailing_comments.clone(),
4679 inferred_type: None,
4680 }))
4681 }
4682 _ => expr.clone(),
4683 }
4684 }
4685
4686 /// Handle UNNEST(GENERATE_DATE_ARRAY(...)) in FROM clause for Snowflake target.
4687 /// Converts to a subquery with DATEADD + TABLE(FLATTEN(ARRAY_GENERATE_RANGE(...))).
4688 fn try_transform_from_gda_snowflake(
4689 mut sel: Box<crate::expressions::Select>,
4690 ) -> Result<Expression> {
4691 use crate::expressions::*;
4692
4693 // Extract GDA info from FROM clause
4694 let mut gda_info: Option<(
4695 usize,
4696 String,
4697 Expression,
4698 Expression,
4699 String,
4700 Option<(String, Vec<Identifier>)>,
4701 )> = None; // (from_idx, col_name, start, end, unit, outer_alias)
4702
4703 if let Some(ref from) = sel.from {
4704 for (idx, table_expr) in from.expressions.iter().enumerate() {
4705 // Pattern 1: UNNEST(GENERATE_DATE_ARRAY(...))
4706 // Pattern 2: Alias(UNNEST(GENERATE_DATE_ARRAY(...))) AS _q(date_week)
4707 let (unnest_opt, outer_alias_info) = match table_expr {
4708 Expression::Unnest(ref unnest) => (Some(unnest.as_ref()), None),
4709 Expression::Alias(ref a) => {
4710 if let Expression::Unnest(ref unnest) = a.this {
4711 let alias_info = (a.alias.name.clone(), a.column_aliases.clone());
4712 (Some(unnest.as_ref()), Some(alias_info))
4713 } else {
4714 (None, None)
4715 }
4716 }
4717 _ => (None, None),
4718 };
4719
4720 if let Some(unnest) = unnest_opt {
4721 // Check for GENERATE_DATE_ARRAY function
4722 let func_opt = match &unnest.this {
4723 Expression::Function(ref f)
4724 if f.name.eq_ignore_ascii_case("GENERATE_DATE_ARRAY")
4725 && f.args.len() >= 2 =>
4726 {
4727 Some(f)
4728 }
4729 // Also check for GenerateSeries (from earlier normalization)
4730 _ => None,
4731 };
4732
4733 if let Some(f) = func_opt {
4734 let start_expr = f.args[0].clone();
4735 let end_expr = f.args[1].clone();
4736 let step = f.args.get(2).cloned();
4737
4738 // Extract unit and column name
4739 let unit = Self::extract_interval_unit_str(&step);
4740 let col_name = outer_alias_info
4741 .as_ref()
4742 .and_then(|(_, cols)| cols.first().map(|id| id.name.clone()))
4743 .unwrap_or_else(|| "value".to_string());
4744
4745 if let Some(unit_str) = unit {
4746 gda_info = Some((
4747 idx,
4748 col_name,
4749 start_expr,
4750 end_expr,
4751 unit_str,
4752 outer_alias_info,
4753 ));
4754 break;
4755 }
4756 }
4757 }
4758 }
4759 }
4760
4761 let Some((from_idx, col_name, start_expr, end_expr, unit_str, outer_alias_info)) = gda_info
4762 else {
4763 return Ok(Expression::Select(sel));
4764 };
4765
4766 // Build the Snowflake subquery:
4767 // (SELECT DATEADD(unit, CAST(col_name AS INT), CAST(start AS DATE)) AS col_name
4768 // FROM TABLE(FLATTEN(INPUT => ARRAY_GENERATE_RANGE(0, DATEDIFF(unit, start, end) + 1))) AS _t0(seq, key, path, index, col_name, this))
4769
4770 // DATEDIFF(unit, start, end)
4771 let datediff = Expression::Function(Box::new(Function::new(
4772 "DATEDIFF".to_string(),
4773 vec![
4774 Expression::boxed_column(Column {
4775 name: Identifier::new(&unit_str),
4776 table: None,
4777 join_mark: false,
4778 trailing_comments: vec![],
4779 span: None,
4780 inferred_type: None,
4781 }),
4782 start_expr.clone(),
4783 end_expr.clone(),
4784 ],
4785 )));
4786 // DATEDIFF(...) + 1
4787 let datediff_plus_one = Expression::Add(Box::new(BinaryOp {
4788 left: datediff,
4789 right: Expression::Literal(Box::new(Literal::Number("1".to_string()))),
4790 left_comments: vec![],
4791 operator_comments: vec![],
4792 trailing_comments: vec![],
4793 inferred_type: None,
4794 }));
4795
4796 let array_gen_range = Expression::Function(Box::new(Function::new(
4797 "ARRAY_GENERATE_RANGE".to_string(),
4798 vec![
4799 Expression::Literal(Box::new(Literal::Number("0".to_string()))),
4800 datediff_plus_one,
4801 ],
4802 )));
4803
4804 // TABLE(FLATTEN(INPUT => ...))
4805 let flatten_input = Expression::NamedArgument(Box::new(NamedArgument {
4806 name: Identifier::new("INPUT"),
4807 value: array_gen_range,
4808 separator: crate::expressions::NamedArgSeparator::DArrow,
4809 }));
4810 let flatten = Expression::Function(Box::new(Function::new(
4811 "FLATTEN".to_string(),
4812 vec![flatten_input],
4813 )));
4814
4815 // Determine alias name for the table: use outer alias or _t0
4816 let table_alias_name = outer_alias_info
4817 .as_ref()
4818 .map(|(name, _)| name.clone())
4819 .unwrap_or_else(|| "_t0".to_string());
4820
4821 // TABLE(FLATTEN(...)) AS _t0(seq, key, path, index, col_name, this)
4822 let table_func =
4823 Expression::Function(Box::new(Function::new("TABLE".to_string(), vec![flatten])));
4824 let flatten_aliased = Expression::Alias(Box::new(Alias {
4825 this: table_func,
4826 alias: Identifier::new(&table_alias_name),
4827 column_aliases: vec![
4828 Identifier::new("seq"),
4829 Identifier::new("key"),
4830 Identifier::new("path"),
4831 Identifier::new("index"),
4832 Identifier::new(&col_name),
4833 Identifier::new("this"),
4834 ],
4835 pre_alias_comments: vec![],
4836 trailing_comments: vec![],
4837 inferred_type: None,
4838 }));
4839
4840 // SELECT DATEADD(unit, CAST(col_name AS INT), CAST(start AS DATE)) AS col_name
4841 let dateadd_expr = Expression::Function(Box::new(Function::new(
4842 "DATEADD".to_string(),
4843 vec![
4844 Expression::boxed_column(Column {
4845 name: Identifier::new(&unit_str),
4846 table: None,
4847 join_mark: false,
4848 trailing_comments: vec![],
4849 span: None,
4850 inferred_type: None,
4851 }),
4852 Expression::Cast(Box::new(Cast {
4853 this: Expression::boxed_column(Column {
4854 name: Identifier::new(&col_name),
4855 table: None,
4856 join_mark: false,
4857 trailing_comments: vec![],
4858 span: None,
4859 inferred_type: None,
4860 }),
4861 to: DataType::Int {
4862 length: None,
4863 integer_spelling: false,
4864 },
4865 trailing_comments: vec![],
4866 double_colon_syntax: false,
4867 format: None,
4868 default: None,
4869 inferred_type: None,
4870 })),
4871 // Use start_expr directly - it's already been normalized (DATE literal -> CAST)
4872 start_expr.clone(),
4873 ],
4874 )));
4875 let dateadd_aliased = Expression::Alias(Box::new(Alias {
4876 this: dateadd_expr,
4877 alias: Identifier::new(&col_name),
4878 column_aliases: vec![],
4879 pre_alias_comments: vec![],
4880 trailing_comments: vec![],
4881 inferred_type: None,
4882 }));
4883
4884 // Build inner SELECT
4885 let mut inner_select = Select::new();
4886 inner_select.expressions = vec![dateadd_aliased];
4887 inner_select.from = Some(From {
4888 expressions: vec![flatten_aliased],
4889 });
4890
4891 let inner_select_expr = Expression::Select(Box::new(inner_select));
4892 let subquery = Expression::Subquery(Box::new(Subquery {
4893 this: inner_select_expr,
4894 alias: None,
4895 column_aliases: vec![],
4896 order_by: None,
4897 limit: None,
4898 offset: None,
4899 distribute_by: None,
4900 sort_by: None,
4901 cluster_by: None,
4902 lateral: false,
4903 modifiers_inside: false,
4904 trailing_comments: vec![],
4905 inferred_type: None,
4906 }));
4907
4908 // If there was an outer alias (e.g., AS _q(date_week)), wrap with alias
4909 let replacement = if let Some((alias_name, col_aliases)) = outer_alias_info {
4910 Expression::Alias(Box::new(Alias {
4911 this: subquery,
4912 alias: Identifier::new(&alias_name),
4913 column_aliases: col_aliases,
4914 pre_alias_comments: vec![],
4915 trailing_comments: vec![],
4916 inferred_type: None,
4917 }))
4918 } else {
4919 subquery
4920 };
4921
4922 // Replace the FROM expression
4923 if let Some(ref mut from) = sel.from {
4924 from.expressions[from_idx] = replacement;
4925 }
4926
4927 Ok(Expression::Select(sel))
4928 }
4929
4930 /// Convert ARRAY_SIZE(GENERATE_DATE_ARRAY(start, end, step)) for Snowflake.
4931 /// Produces: ARRAY_SIZE((SELECT ARRAY_AGG(*) FROM (SELECT DATEADD(unit, CAST(value AS INT), start) AS value
4932 /// FROM TABLE(FLATTEN(INPUT => ARRAY_GENERATE_RANGE(0, DATEDIFF(unit, start, end) + 1))) AS _t0(...))))
4933 fn convert_array_size_gda_snowflake(f: &crate::expressions::Function) -> Result<Expression> {
4934 use crate::expressions::*;
4935
4936 let start_expr = f.args[0].clone();
4937 let end_expr = f.args[1].clone();
4938 let step = f.args.get(2).cloned();
4939 let unit_str = Self::extract_interval_unit_str(&step).unwrap_or_else(|| "DAY".to_string());
4940 let col_name = "value";
4941
4942 // Build the inner subquery: same as try_transform_from_gda_snowflake
4943 let datediff = Expression::Function(Box::new(Function::new(
4944 "DATEDIFF".to_string(),
4945 vec![
4946 Expression::boxed_column(Column {
4947 name: Identifier::new(&unit_str),
4948 table: None,
4949 join_mark: false,
4950 trailing_comments: vec![],
4951 span: None,
4952 inferred_type: None,
4953 }),
4954 start_expr.clone(),
4955 end_expr.clone(),
4956 ],
4957 )));
4958 // DATEDIFF(...) + 1
4959 let datediff_plus_one = Expression::Add(Box::new(BinaryOp {
4960 left: datediff,
4961 right: Expression::Literal(Box::new(Literal::Number("1".to_string()))),
4962 left_comments: vec![],
4963 operator_comments: vec![],
4964 trailing_comments: vec![],
4965 inferred_type: None,
4966 }));
4967
4968 let array_gen_range = Expression::Function(Box::new(Function::new(
4969 "ARRAY_GENERATE_RANGE".to_string(),
4970 vec![
4971 Expression::Literal(Box::new(Literal::Number("0".to_string()))),
4972 datediff_plus_one,
4973 ],
4974 )));
4975
4976 let flatten_input = Expression::NamedArgument(Box::new(NamedArgument {
4977 name: Identifier::new("INPUT"),
4978 value: array_gen_range,
4979 separator: crate::expressions::NamedArgSeparator::DArrow,
4980 }));
4981 let flatten = Expression::Function(Box::new(Function::new(
4982 "FLATTEN".to_string(),
4983 vec![flatten_input],
4984 )));
4985
4986 let table_func =
4987 Expression::Function(Box::new(Function::new("TABLE".to_string(), vec![flatten])));
4988 let flatten_aliased = Expression::Alias(Box::new(Alias {
4989 this: table_func,
4990 alias: Identifier::new("_t0"),
4991 column_aliases: vec![
4992 Identifier::new("seq"),
4993 Identifier::new("key"),
4994 Identifier::new("path"),
4995 Identifier::new("index"),
4996 Identifier::new(col_name),
4997 Identifier::new("this"),
4998 ],
4999 pre_alias_comments: vec![],
5000 trailing_comments: vec![],
5001 inferred_type: None,
5002 }));
5003
5004 let dateadd_expr = Expression::Function(Box::new(Function::new(
5005 "DATEADD".to_string(),
5006 vec![
5007 Expression::boxed_column(Column {
5008 name: Identifier::new(&unit_str),
5009 table: None,
5010 join_mark: false,
5011 trailing_comments: vec![],
5012 span: None,
5013 inferred_type: None,
5014 }),
5015 Expression::Cast(Box::new(Cast {
5016 this: Expression::boxed_column(Column {
5017 name: Identifier::new(col_name),
5018 table: None,
5019 join_mark: false,
5020 trailing_comments: vec![],
5021 span: None,
5022 inferred_type: None,
5023 }),
5024 to: DataType::Int {
5025 length: None,
5026 integer_spelling: false,
5027 },
5028 trailing_comments: vec![],
5029 double_colon_syntax: false,
5030 format: None,
5031 default: None,
5032 inferred_type: None,
5033 })),
5034 start_expr.clone(),
5035 ],
5036 )));
5037 let dateadd_aliased = Expression::Alias(Box::new(Alias {
5038 this: dateadd_expr,
5039 alias: Identifier::new(col_name),
5040 column_aliases: vec![],
5041 pre_alias_comments: vec![],
5042 trailing_comments: vec![],
5043 inferred_type: None,
5044 }));
5045
5046 // Inner SELECT: SELECT DATEADD(...) AS value FROM TABLE(FLATTEN(...)) AS _t0(...)
5047 let mut inner_select = Select::new();
5048 inner_select.expressions = vec![dateadd_aliased];
5049 inner_select.from = Some(From {
5050 expressions: vec![flatten_aliased],
5051 });
5052
5053 // Wrap in subquery for the inner part
5054 let inner_subquery = Expression::Subquery(Box::new(Subquery {
5055 this: Expression::Select(Box::new(inner_select)),
5056 alias: None,
5057 column_aliases: vec![],
5058 order_by: None,
5059 limit: None,
5060 offset: None,
5061 distribute_by: None,
5062 sort_by: None,
5063 cluster_by: None,
5064 lateral: false,
5065 modifiers_inside: false,
5066 trailing_comments: vec![],
5067 inferred_type: None,
5068 }));
5069
5070 // Outer: SELECT ARRAY_AGG(*) FROM (inner_subquery)
5071 let star = Expression::Star(Star {
5072 table: None,
5073 except: None,
5074 replace: None,
5075 rename: None,
5076 trailing_comments: vec![],
5077 span: None,
5078 });
5079 let array_agg = Expression::ArrayAgg(Box::new(AggFunc {
5080 this: star,
5081 distinct: false,
5082 filter: None,
5083 order_by: vec![],
5084 name: Some("ARRAY_AGG".to_string()),
5085 ignore_nulls: None,
5086 having_max: None,
5087 limit: None,
5088 inferred_type: None,
5089 }));
5090
5091 let mut outer_select = Select::new();
5092 outer_select.expressions = vec![array_agg];
5093 outer_select.from = Some(From {
5094 expressions: vec![inner_subquery],
5095 });
5096
5097 // Wrap in a subquery
5098 let outer_subquery = Expression::Subquery(Box::new(Subquery {
5099 this: Expression::Select(Box::new(outer_select)),
5100 alias: None,
5101 column_aliases: vec![],
5102 order_by: None,
5103 limit: None,
5104 offset: None,
5105 distribute_by: None,
5106 sort_by: None,
5107 cluster_by: None,
5108 lateral: false,
5109 modifiers_inside: false,
5110 trailing_comments: vec![],
5111 inferred_type: None,
5112 }));
5113
5114 // ARRAY_SIZE(subquery)
5115 Ok(Expression::ArraySize(Box::new(UnaryFunc::new(
5116 outer_subquery,
5117 ))))
5118 }
5119
5120 /// Extract interval unit string from an optional step expression.
5121 fn extract_interval_unit_str(step: &Option<Expression>) -> Option<String> {
5122 use crate::expressions::*;
5123 if let Some(Expression::Interval(ref iv)) = step {
5124 if let Some(IntervalUnitSpec::Simple { ref unit, .. }) = iv.unit {
5125 return Some(format!("{:?}", unit).to_ascii_uppercase());
5126 }
5127 if let Some(ref this) = iv.this {
5128 if let Expression::Literal(lit) = this {
5129 if let Literal::String(ref s) = lit.as_ref() {
5130 let parts: Vec<&str> = s.split_whitespace().collect();
5131 if parts.len() == 2 {
5132 return Some(parts[1].to_ascii_uppercase());
5133 } else if parts.len() == 1 {
5134 let upper = parts[0].to_ascii_uppercase();
5135 if matches!(
5136 upper.as_str(),
5137 "YEAR"
5138 | "QUARTER"
5139 | "MONTH"
5140 | "WEEK"
5141 | "DAY"
5142 | "HOUR"
5143 | "MINUTE"
5144 | "SECOND"
5145 ) {
5146 return Some(upper);
5147 }
5148 }
5149 }
5150 }
5151 }
5152 }
5153 // Default to DAY if no step or no interval
5154 if step.is_none() {
5155 return Some("DAY".to_string());
5156 }
5157 None
5158 }
5159
5160 fn normalize_snowflake_pretty(mut sql: String) -> String {
5161 if sql.contains("LATERAL IFF(_u.pos = _u_2.pos_2, _u_2.entity, NULL) AS datasource(SEQ, KEY, PATH, INDEX, VALUE, THIS)")
5162 && sql.contains("ARRAY_GENERATE_RANGE(0, (GREATEST(ARRAY_SIZE(INPUT => PARSE_JSON(flags))) - 1) + 1)")
5163 {
5164 sql = sql.replace(
5165 "AND uc.user_id <> ALL (SELECT DISTINCT\n _id\n FROM users, LATERAL IFF(_u.pos = _u_2.pos_2, _u_2.entity, NULL) AS datasource(SEQ, KEY, PATH, INDEX, VALUE, THIS)\n WHERE\n GET_PATH(datasource.value, 'name') = 'something')",
5166 "AND uc.user_id <> ALL (\n SELECT DISTINCT\n _id\n FROM users, LATERAL IFF(_u.pos = _u_2.pos_2, _u_2.entity, NULL) AS datasource(SEQ, KEY, PATH, INDEX, VALUE, THIS)\n WHERE\n GET_PATH(datasource.value, 'name') = 'something'\n )",
5167 );
5168
5169 sql = sql.replace(
5170 "CROSS JOIN TABLE(FLATTEN(INPUT => ARRAY_GENERATE_RANGE(0, (GREATEST(ARRAY_SIZE(INPUT => PARSE_JSON(flags))) - 1) + 1))) AS _u(seq, key, path, index, pos, this)",
5171 "CROSS JOIN TABLE(FLATTEN(INPUT => ARRAY_GENERATE_RANGE(0, (\n GREATEST(ARRAY_SIZE(INPUT => PARSE_JSON(flags))) - 1\n) + 1))) AS _u(seq, key, path, index, pos, this)",
5172 );
5173
5174 sql = sql.replace(
5175 "OR (_u.pos > (ARRAY_SIZE(INPUT => PARSE_JSON(flags)) - 1)\n AND _u_2.pos_2 = (ARRAY_SIZE(INPUT => PARSE_JSON(flags)) - 1))",
5176 "OR (\n _u.pos > (\n ARRAY_SIZE(INPUT => PARSE_JSON(flags)) - 1\n )\n AND _u_2.pos_2 = (\n ARRAY_SIZE(INPUT => PARSE_JSON(flags)) - 1\n )\n )",
5177 );
5178 }
5179
5180 sql
5181 }
5182
5183 /// Apply cross-dialect semantic normalizations that depend on knowing both source and target.
5184 /// This handles cases where the same syntax has different semantics across dialects.
5185 fn cross_dialect_normalize(
5186 expr: Expression,
5187 source: DialectType,
5188 target: DialectType,
5189 ) -> Result<Expression> {
5190 use crate::expressions::{
5191 AggFunc, BinaryOp, Case, Cast, ConvertTimezone, DataType, DateTimeField, DateTruncFunc,
5192 Function, Identifier, IsNull, Literal, Null, Paren,
5193 };
5194
5195 // Helper to tag which kind of transform to apply
5196 #[derive(Debug)]
5197 enum Action {
5198 None,
5199 GreatestLeastNull,
5200 ArrayGenerateRange,
5201 Div0TypedDivision,
5202 ArrayAggCollectList,
5203 ArrayAggWithinGroupFilter,
5204 ArrayAggFilter,
5205 CastTimestampToDatetime,
5206 DateTruncWrapCast,
5207 ToDateToCast,
5208 ConvertTimezoneToExpr,
5209 SetToVariable,
5210 RegexpReplaceSnowflakeToDuckDB,
5211 BigQueryFunctionNormalize,
5212 BigQuerySafeDivide,
5213 BigQueryCastType,
5214 BigQueryToHexBare, // _BQ_TO_HEX(x) with no LOWER/UPPER wrapper
5215 BigQueryToHexLower, // LOWER(_BQ_TO_HEX(x))
5216 BigQueryToHexUpper, // UPPER(_BQ_TO_HEX(x))
5217 BigQueryLastDayStripUnit, // LAST_DAY(date, MONTH) -> LAST_DAY(date)
5218 BigQueryCastFormat, // CAST(x AS type FORMAT 'fmt') -> PARSE_DATE/PARSE_TIMESTAMP etc.
5219 BigQueryAnyValueHaving, // ANY_VALUE(x HAVING MAX/MIN y) -> ARG_MAX_NULL/ARG_MIN_NULL for DuckDB
5220 BigQueryApproxQuantiles, // APPROX_QUANTILES(x, n) -> APPROX_QUANTILE(x, [quantiles]) for DuckDB
5221 GenericFunctionNormalize, // Cross-dialect function renaming (non-BigQuery sources)
5222 RegexpLikeToDuckDB, // RegexpLike -> REGEXP_MATCHES for DuckDB target
5223 EpochConvert, // Expression::Epoch -> target-specific epoch function
5224 EpochMsConvert, // Expression::EpochMs -> target-specific epoch ms function
5225 TSQLTypeNormalize, // TSQL types (MONEY, SMALLMONEY, REAL, DATETIME2) -> standard types
5226 MySQLSafeDivide, // MySQL a/b -> a / NULLIF(b, 0) with optional CAST
5227 NullsOrdering, // Add NULLS FIRST/LAST for ORDER BY
5228 AlterTableRenameStripSchema, // ALTER TABLE db.t1 RENAME TO db.t2 -> ALTER TABLE db.t1 RENAME TO t2
5229 StringAggConvert, // STRING_AGG/WITHIN GROUP -> target-specific aggregate
5230 GroupConcatConvert, // GROUP_CONCAT -> target-specific aggregate
5231 TempTableHash, // TSQL #table -> temp table normalization
5232 ArrayLengthConvert, // CARDINALITY/ARRAY_LENGTH/ARRAY_SIZE -> target-specific
5233 DatePartUnquote, // DATE_PART('month', x) -> DATE_PART(month, x) for Snowflake target
5234 NvlClearOriginal, // Clear NVL original_name for cross-dialect transpilation
5235 HiveCastToTryCast, // Hive/Spark CAST -> TRY_CAST for targets that support it
5236 XorExpand, // MySQL XOR -> (a AND NOT b) OR (NOT a AND b) for non-XOR targets
5237 CastTimestampStripTz, // CAST(x AS TIMESTAMP WITH TIME ZONE) -> CAST(x AS TIMESTAMP) for Hive/Spark
5238 JsonExtractToGetJsonObject, // JSON_EXTRACT/JSON_EXTRACT_SCALAR -> GET_JSON_OBJECT for Hive/Spark
5239 JsonExtractScalarToGetJsonObject, // JSON_EXTRACT_SCALAR -> GET_JSON_OBJECT for Hive/Spark
5240 JsonQueryValueConvert, // JsonQuery/JsonValue -> target-specific (ISNULL wrapper for TSQL, GET_JSON_OBJECT for Spark, etc.)
5241 JsonLiteralToJsonParse, // JSON 'x' -> JSON_PARSE('x') for Presto, PARSE_JSON for Snowflake; also DuckDB CAST(x AS JSON)
5242 DuckDBCastJsonToVariant, // DuckDB CAST(x AS JSON) -> CAST(x AS VARIANT) for Snowflake
5243 DuckDBTryCastJsonToTryJsonParse, // DuckDB TRY_CAST(x AS JSON) -> TRY(JSON_PARSE(x)) for Trino/Presto/Athena
5244 DuckDBJsonFuncToJsonParse, // DuckDB json(x) -> JSON_PARSE(x) for Trino/Presto/Athena
5245 DuckDBJsonValidToIsJson, // DuckDB json_valid(x) -> x IS JSON for Trino/Presto/Athena
5246 ArraySyntaxConvert, // ARRAY[x] -> ARRAY(x) for Spark, [x] for BigQuery/DuckDB
5247 AtTimeZoneConvert, // AT TIME ZONE -> AT_TIMEZONE (Presto) / FROM_UTC_TIMESTAMP (Spark)
5248 DayOfWeekConvert, // DAY_OF_WEEK -> dialect-specific
5249 MaxByMinByConvert, // MAX_BY/MIN_BY -> argMax/argMin for ClickHouse
5250 ArrayAggToCollectList, // ARRAY_AGG(x ORDER BY ...) -> COLLECT_LIST(x) for Hive/Spark
5251 ArrayAggToGroupConcat, // ARRAY_AGG(x) -> GROUP_CONCAT(x) for MySQL-like targets
5252 ElementAtConvert, // ELEMENT_AT(arr, idx) -> arr[idx] for PostgreSQL, arr[SAFE_ORDINAL(idx)] for BigQuery
5253 CurrentUserParens, // CURRENT_USER -> CURRENT_USER() for Snowflake
5254 CastToJsonForSpark, // CAST(x AS JSON) -> TO_JSON(x) for Spark
5255 CastJsonToFromJson, // CAST(JSON_PARSE(literal) AS ARRAY/MAP) -> FROM_JSON(literal, type_string)
5256 ToJsonConvert, // TO_JSON(x) -> JSON_FORMAT(CAST(x AS JSON)) for Presto etc.
5257 ArrayAggNullFilter, // ARRAY_AGG(x) FILTER(WHERE cond) -> add AND NOT x IS NULL for DuckDB
5258 ArrayAggIgnoreNullsDuckDB, // ARRAY_AGG(x IGNORE NULLS ORDER BY ...) -> ARRAY_AGG(x ORDER BY a NULLS FIRST, ...) for DuckDB
5259 BigQueryPercentileContToDuckDB, // PERCENTILE_CONT(x, frac RESPECT NULLS) -> QUANTILE_CONT(x, frac) for DuckDB
5260 BigQueryArraySelectAsStructToSnowflake, // ARRAY(SELECT AS STRUCT ...) -> (SELECT ARRAY_AGG(OBJECT_CONSTRUCT(...)))
5261 CountDistinctMultiArg, // COUNT(DISTINCT a, b) -> COUNT(DISTINCT CASE WHEN ... END)
5262 VarianceToClickHouse, // Expression::Variance -> varSamp for ClickHouse
5263 StddevToClickHouse, // Expression::Stddev -> stddevSamp for ClickHouse
5264 ApproxQuantileConvert, // Expression::ApproxQuantile -> APPROX_PERCENTILE for Snowflake
5265 ArrayIndexConvert, // array[1] -> array[0] for BigQuery (1-based to 0-based)
5266 DollarParamConvert, // $foo -> @foo for BigQuery
5267 TablesampleReservoir, // TABLESAMPLE (n ROWS) -> TABLESAMPLE RESERVOIR (n ROWS) for DuckDB
5268 BitAggFloatCast, // BIT_OR/BIT_AND/BIT_XOR float arg -> CAST(ROUND(CAST(arg)) AS INT) for DuckDB
5269 BitAggSnowflakeRename, // BIT_OR -> BITORAGG, BIT_AND -> BITANDAGG etc. for Snowflake
5270 StrftimeCastTimestamp, // CAST TIMESTAMP -> TIMESTAMP_NTZ for Spark in STRFTIME
5271 AnyValueIgnoreNulls, // ANY_VALUE(x) -> ANY_VALUE(x) IGNORE NULLS for Spark
5272 CreateTableStripComment, // Strip COMMENT column constraint, USING, PARTITIONED BY for DuckDB
5273 EscapeStringNormalize, // e'Hello\nworld' literal newline -> \n
5274 AnyToExists, // PostgreSQL x <op> ANY(array) -> EXISTS(array, x -> ...)
5275 ArrayConcatBracketConvert, // [1,2] -> ARRAY[1,2] for PostgreSQL in ARRAY_CAT
5276 SnowflakeIntervalFormat, // INTERVAL '2' HOUR -> INTERVAL '2 HOUR' for Snowflake
5277 AlterTableToSpRename, // ALTER TABLE RENAME -> EXEC sp_rename for TSQL
5278 StraightJoinCase, // STRAIGHT_JOIN -> straight_join for DuckDB
5279 RespectNullsConvert, // RESPECT NULLS window function handling
5280 MysqlNullsOrdering, // MySQL doesn't support NULLS ordering
5281 MysqlNullsLastRewrite, // Add CASE WHEN to ORDER BY for DuckDB -> MySQL (NULLS LAST simulation)
5282 BigQueryNullsOrdering, // BigQuery doesn't support NULLS FIRST/LAST - strip
5283 SnowflakeFloatProtect, // Protect FLOAT from being converted to DOUBLE by Snowflake target transform
5284 JsonToGetPath, // JSON arrow -> GET_PATH/PARSE_JSON for Snowflake
5285 FilterToIff, // FILTER(WHERE) -> IFF wrapping for Snowflake
5286 AggFilterToIff, // AggFunc.filter -> IFF wrapping for Snowflake (e.g., AVG(x) FILTER(WHERE cond))
5287 StructToRow, // DuckDB struct -> Presto ROW / BigQuery STRUCT
5288 SparkStructConvert, // Spark STRUCT(x AS col1, ...) -> ROW/DuckDB struct
5289 DecimalDefaultPrecision, // DECIMAL -> DECIMAL(18, 3) for Snowflake in BIT agg
5290 ApproxCountDistinctToApproxDistinct, // APPROX_COUNT_DISTINCT -> APPROX_DISTINCT for Presto/Trino
5291 CollectListToArrayAgg, // COLLECT_LIST -> ARRAY_AGG for Presto/DuckDB
5292 CollectSetConvert, // COLLECT_SET -> SET_AGG/ARRAY_AGG(DISTINCT)/ARRAY_UNIQUE_AGG
5293 PercentileConvert, // PERCENTILE -> QUANTILE/APPROX_PERCENTILE
5294 CorrIsnanWrap, // CORR(a,b) -> CASE WHEN ISNAN(CORR(a,b)) THEN NULL ELSE CORR(a,b) END
5295 TruncToDateTrunc, // TRUNC(ts, unit) -> DATE_TRUNC(unit, ts)
5296 ArrayContainsConvert, // ARRAY_CONTAINS -> CONTAINS/target-specific
5297 StrPositionExpand, // StrPosition with position -> complex STRPOS expansion for Presto/DuckDB
5298 TablesampleSnowflakeStrip, // Strip method and PERCENT for Snowflake target
5299 FirstToAnyValue, // FIRST(col) IGNORE NULLS -> ANY_VALUE(col) for DuckDB
5300 MonthsBetweenConvert, // Expression::MonthsBetween -> target-specific
5301 CurrentUserSparkParens, // CURRENT_USER -> CURRENT_USER() for Spark
5302 SparkDateFuncCast, // MONTH/YEAR/DAY('str') -> MONTH/YEAR/DAY(CAST('str' AS DATE)) from Spark
5303 MapFromArraysConvert, // Expression::MapFromArrays -> MAP/OBJECT_CONSTRUCT/MAP_FROM_ARRAYS
5304 AddMonthsConvert, // Expression::AddMonths -> target-specific DATEADD/DATE_ADD
5305 PercentileContConvert, // PERCENTILE_CONT/DISC WITHIN GROUP -> APPROX_PERCENTILE/PERCENTILE_APPROX
5306 GenerateSeriesConvert, // GENERATE_SERIES -> SEQUENCE/UNNEST(SEQUENCE)/EXPLODE(SEQUENCE)
5307 ConcatCoalesceWrap, // CONCAT(a, b) -> CONCAT(COALESCE(CAST(a), ''), ...) for Presto/ClickHouse
5308 PipeConcatToConcat, // a || b -> CONCAT(CAST(a), CAST(b)) for Presto
5309 DivFuncConvert, // DIV(a, b) -> a // b for DuckDB, CAST for BigQuery
5310 JsonObjectAggConvert, // JSON_OBJECT_AGG -> JSON_GROUP_OBJECT for DuckDB
5311 JsonbExistsConvert, // JSONB_EXISTS -> JSON_EXISTS for DuckDB
5312 DateBinConvert, // DATE_BIN -> TIME_BUCKET for DuckDB
5313 MysqlCastCharToText, // MySQL CAST(x AS CHAR) -> CAST(x AS TEXT/VARCHAR/STRING) for targets
5314 SparkCastVarcharToString, // Spark CAST(x AS VARCHAR/CHAR) -> CAST(x AS STRING) for Spark targets
5315 JsonExtractToArrow, // JSON_EXTRACT(x, path) -> x -> path for SQLite/DuckDB
5316 JsonExtractToTsql, // JSON_EXTRACT/JSON_EXTRACT_SCALAR -> ISNULL(JSON_QUERY, JSON_VALUE) for TSQL
5317 JsonExtractToClickHouse, // JSON_EXTRACT/JSON_EXTRACT_SCALAR -> JSONExtractString for ClickHouse
5318 JsonExtractScalarConvert, // JSON_EXTRACT_SCALAR -> target-specific (PostgreSQL, Snowflake, SQLite)
5319 JsonPathNormalize, // Normalize JSON path format (brackets, wildcards, quotes) for various dialects
5320 MinMaxToLeastGreatest, // Multi-arg MIN(a,b,c) -> LEAST(a,b,c), MAX(a,b,c) -> GREATEST(a,b,c)
5321 ClickHouseUniqToApproxCountDistinct, // uniq(x) -> APPROX_COUNT_DISTINCT(x) for non-ClickHouse targets
5322 ClickHouseAnyToAnyValue, // any(x) -> ANY_VALUE(x) for non-ClickHouse targets
5323 OracleVarchar2ToVarchar, // VARCHAR2(N CHAR/BYTE) -> VARCHAR(N) for non-Oracle targets
5324 Nvl2Expand, // NVL2(a, b, c) -> CASE WHEN NOT a IS NULL THEN b ELSE c END
5325 IfnullToCoalesce, // IFNULL(a, b) -> COALESCE(a, b)
5326 IsAsciiConvert, // IS_ASCII(x) -> dialect-specific ASCII check
5327 StrPositionConvert, // STR_POSITION(haystack, needle[, pos]) -> dialect-specific
5328 DecodeSimplify, // DECODE with null-safe -> simple = comparison
5329 ArraySumConvert, // ARRAY_SUM -> target-specific
5330 ArraySizeConvert, // ARRAY_SIZE -> target-specific
5331 ArrayAnyConvert, // ARRAY_ANY -> target-specific
5332 CastTimestamptzToFunc, // CAST(x AS TIMESTAMPTZ) -> TIMESTAMP(x) for MySQL/StarRocks
5333 TsOrDsToDateConvert, // TS_OR_DS_TO_DATE(x[, fmt]) -> dialect-specific
5334 TsOrDsToDateStrConvert, // TS_OR_DS_TO_DATE_STR(x) -> SUBSTRING(CAST(x AS type), 1, 10)
5335 DateStrToDateConvert, // DATE_STR_TO_DATE(x) -> CAST(x AS DATE)
5336 TimeStrToDateConvert, // TIME_STR_TO_DATE(x) -> CAST(x AS DATE)
5337 TimeStrToTimeConvert, // TIME_STR_TO_TIME(x) -> CAST(x AS TIMESTAMP)
5338 DateToDateStrConvert, // DATE_TO_DATE_STR(x) -> CAST(x AS TEXT/VARCHAR/STRING)
5339 DateToDiConvert, // DATE_TO_DI(x) -> dialect-specific (CAST date to YYYYMMDD integer)
5340 DiToDateConvert, // DI_TO_DATE(x) -> dialect-specific (integer YYYYMMDD to date)
5341 TsOrDiToDiConvert, // TS_OR_DI_TO_DI(x) -> dialect-specific
5342 UnixToStrConvert, // UNIX_TO_STR(x, fmt) -> dialect-specific
5343 UnixToTimeConvert, // UNIX_TO_TIME(x) -> dialect-specific
5344 UnixToTimeStrConvert, // UNIX_TO_TIME_STR(x) -> dialect-specific
5345 TimeToUnixConvert, // TIME_TO_UNIX(x) -> dialect-specific
5346 TimeToStrConvert, // TIME_TO_STR(x, fmt) -> dialect-specific
5347 StrToUnixConvert, // STR_TO_UNIX(x, fmt) -> dialect-specific
5348 DateTruncSwapArgs, // DATE_TRUNC('unit', x) -> DATE_TRUNC(x, unit) / TRUNC(x, unit)
5349 TimestampTruncConvert, // TIMESTAMP_TRUNC(x, UNIT[, tz]) -> dialect-specific
5350 StrToDateConvert, // STR_TO_DATE(x, fmt) from Generic -> CAST(StrToTime(x,fmt) AS DATE)
5351 TsOrDsAddConvert, // TS_OR_DS_ADD(x, n, 'UNIT') from Generic -> DATE_ADD per dialect
5352 DateFromUnixDateConvert, // DATE_FROM_UNIX_DATE(n) -> DATEADD(DAY, n, '1970-01-01')
5353 TimeStrToUnixConvert, // TIME_STR_TO_UNIX(x) -> dialect-specific
5354 TimeToTimeStrConvert, // TIME_TO_TIME_STR(x) -> CAST(x AS type)
5355 CreateTableLikeToCtas, // CREATE TABLE a LIKE b -> CREATE TABLE a AS SELECT * FROM b LIMIT 0
5356 CreateTableLikeToSelectInto, // CREATE TABLE a LIKE b -> SELECT TOP 0 * INTO a FROM b AS temp
5357 CreateTableLikeToAs, // CREATE TABLE a LIKE b -> CREATE TABLE a AS b (ClickHouse)
5358 ArrayRemoveConvert, // ARRAY_REMOVE(arr, target) -> LIST_FILTER/arrayFilter/ARRAY subquery
5359 ArrayReverseConvert, // ARRAY_REVERSE(x) -> arrayReverse(x) for ClickHouse
5360 JsonKeysConvert, // JSON_KEYS -> JSON_OBJECT_KEYS/OBJECT_KEYS
5361 ParseJsonStrip, // PARSE_JSON(x) -> x (strip wrapper)
5362 ArraySizeDrill, // ARRAY_SIZE -> REPEATED_COUNT for Drill
5363 WeekOfYearToWeekIso, // WEEKOFYEAR -> WEEKISO for Snowflake cross-dialect
5364 RegexpSubstrSnowflakeToDuckDB, // REGEXP_SUBSTR(s, p, ...) -> REGEXP_EXTRACT variants for DuckDB
5365 RegexpSubstrSnowflakeIdentity, // REGEXP_SUBSTR/REGEXP_SUBSTR_ALL strip trailing group=0 for Snowflake identity
5366 RegexpSubstrAllSnowflakeToDuckDB, // REGEXP_SUBSTR_ALL(s, p, ...) -> REGEXP_EXTRACT_ALL variants for DuckDB
5367 RegexpCountSnowflakeToDuckDB, // REGEXP_COUNT(s, p, ...) -> LENGTH(REGEXP_EXTRACT_ALL(...)) for DuckDB
5368 RegexpInstrSnowflakeToDuckDB, // REGEXP_INSTR(s, p, ...) -> complex CASE expression for DuckDB
5369 RegexpReplacePositionSnowflakeToDuckDB, // REGEXP_REPLACE(s, p, r, pos, occ) -> DuckDB form
5370 RlikeSnowflakeToDuckDB, // RLIKE(a, b[, flags]) -> REGEXP_FULL_MATCH(a, b[, flags]) for DuckDB
5371 RegexpExtractAllToSnowflake, // BigQuery REGEXP_EXTRACT_ALL -> REGEXP_SUBSTR_ALL for Snowflake
5372 ArrayExceptConvert, // ARRAY_EXCEPT -> DuckDB complex CASE / Snowflake ARRAY_EXCEPT / Presto ARRAY_EXCEPT
5373 ArrayPositionSnowflakeSwap, // ARRAY_POSITION(arr, elem) -> ARRAY_POSITION(elem, arr) for Snowflake
5374 RegexpLikeExasolAnchor, // RegexpLike -> Exasol REGEXP_LIKE with .*pattern.* anchoring
5375 ArrayDistinctConvert, // ARRAY_DISTINCT -> DuckDB LIST_DISTINCT with NULL-aware CASE
5376 ArrayDistinctClickHouse, // ARRAY_DISTINCT -> arrayDistinct for ClickHouse
5377 ArrayContainsDuckDBConvert, // ARRAY_CONTAINS -> DuckDB CASE with NULL-aware check
5378 SnowflakeWindowFrameStrip, // Strip default ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING for Snowflake target
5379 SnowflakeWindowFrameAdd, // Add default ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING for non-Snowflake target
5380 SnowflakeArrayPositionToDuckDB, // ARRAY_POSITION(val, arr) -> ARRAY_POSITION(arr, val) - 1 for DuckDB
5381 }
5382
5383 // Handle SELECT INTO -> CREATE TABLE AS for DuckDB/Snowflake/etc.
5384 let expr = if matches!(source, DialectType::TSQL | DialectType::Fabric) {
5385 Self::transform_select_into(expr, source, target)
5386 } else {
5387 expr
5388 };
5389
5390 // Strip OFFSET ROWS for non-TSQL/Oracle targets
5391 let expr = if !matches!(
5392 target,
5393 DialectType::TSQL | DialectType::Oracle | DialectType::Fabric
5394 ) {
5395 if let Expression::Select(mut select) = expr {
5396 if let Some(ref mut offset) = select.offset {
5397 offset.rows = None;
5398 }
5399 Expression::Select(select)
5400 } else {
5401 expr
5402 }
5403 } else {
5404 expr
5405 };
5406
5407 // Oracle: LIMIT -> FETCH FIRST, OFFSET -> OFFSET ROWS
5408 let expr = if matches!(target, DialectType::Oracle) {
5409 if let Expression::Select(mut select) = expr {
5410 if let Some(limit) = select.limit.take() {
5411 // Convert LIMIT to FETCH FIRST n ROWS ONLY
5412 select.fetch = Some(crate::expressions::Fetch {
5413 direction: "FIRST".to_string(),
5414 count: Some(limit.this),
5415 percent: false,
5416 rows: true,
5417 with_ties: false,
5418 });
5419 }
5420 // Add ROWS to OFFSET if present
5421 if let Some(ref mut offset) = select.offset {
5422 offset.rows = Some(true);
5423 }
5424 Expression::Select(select)
5425 } else {
5426 expr
5427 }
5428 } else {
5429 expr
5430 };
5431
5432 // Handle CreateTable WITH properties transformation before recursive transforms
5433 let expr = if let Expression::CreateTable(mut ct) = expr {
5434 Self::transform_create_table_properties(&mut ct, source, target);
5435
5436 // Handle Hive-style PARTITIONED BY (col_name type, ...) -> target-specific
5437 // When the PARTITIONED BY clause contains column definitions, merge them into the
5438 // main column list and adjust the PARTITIONED BY clause for the target dialect.
5439 if matches!(
5440 source,
5441 DialectType::Hive | DialectType::Spark | DialectType::Databricks
5442 ) {
5443 let mut partition_col_names: Vec<String> = Vec::new();
5444 let mut partition_col_defs: Vec<crate::expressions::ColumnDef> = Vec::new();
5445 let mut has_col_def_partitions = false;
5446
5447 // Check if any PARTITIONED BY property contains ColumnDef expressions
5448 for prop in &ct.properties {
5449 if let Expression::PartitionedByProperty(ref pbp) = prop {
5450 if let Expression::Tuple(ref tuple) = *pbp.this {
5451 for expr in &tuple.expressions {
5452 if let Expression::ColumnDef(ref cd) = expr {
5453 has_col_def_partitions = true;
5454 partition_col_names.push(cd.name.name.clone());
5455 partition_col_defs.push(*cd.clone());
5456 }
5457 }
5458 }
5459 }
5460 }
5461
5462 if has_col_def_partitions && !matches!(target, DialectType::Hive) {
5463 // Merge partition columns into main column list
5464 for cd in partition_col_defs {
5465 ct.columns.push(cd);
5466 }
5467
5468 // Replace PARTITIONED BY property with column-name-only version
5469 ct.properties
5470 .retain(|p| !matches!(p, Expression::PartitionedByProperty(_)));
5471
5472 if matches!(
5473 target,
5474 DialectType::Presto | DialectType::Trino | DialectType::Athena
5475 ) {
5476 // Presto: WITH (PARTITIONED_BY=ARRAY['y', 'z'])
5477 let array_elements: Vec<String> = partition_col_names
5478 .iter()
5479 .map(|n| format!("'{}'", n))
5480 .collect();
5481 let array_value = format!("ARRAY[{}]", array_elements.join(", "));
5482 ct.with_properties
5483 .push(("PARTITIONED_BY".to_string(), array_value));
5484 } else if matches!(target, DialectType::Spark | DialectType::Databricks) {
5485 // Spark: PARTITIONED BY (y, z) - just column names
5486 let name_exprs: Vec<Expression> = partition_col_names
5487 .iter()
5488 .map(|n| {
5489 Expression::Column(Box::new(crate::expressions::Column {
5490 name: crate::expressions::Identifier::new(n.clone()),
5491 table: None,
5492 join_mark: false,
5493 trailing_comments: Vec::new(),
5494 span: None,
5495 inferred_type: None,
5496 }))
5497 })
5498 .collect();
5499 ct.properties.insert(
5500 0,
5501 Expression::PartitionedByProperty(Box::new(
5502 crate::expressions::PartitionedByProperty {
5503 this: Box::new(Expression::Tuple(Box::new(
5504 crate::expressions::Tuple {
5505 expressions: name_exprs,
5506 },
5507 ))),
5508 },
5509 )),
5510 );
5511 }
5512 // For DuckDB and other targets, just drop the PARTITIONED BY (already retained above)
5513 }
5514
5515 // Note: Non-ColumnDef partitions (e.g., function expressions like MONTHS(y))
5516 // are handled by transform_create_table_properties which runs first
5517 }
5518
5519 // Strip LOCATION property for Presto/Trino (not supported)
5520 if matches!(
5521 target,
5522 DialectType::Presto | DialectType::Trino | DialectType::Athena
5523 ) {
5524 ct.properties
5525 .retain(|p| !matches!(p, Expression::LocationProperty(_)));
5526 }
5527
5528 // Strip table-level constraints for Spark/Hive/Databricks
5529 // Keep PRIMARY KEY and LIKE constraints but strip TSQL-specific modifiers; remove all others
5530 if matches!(
5531 target,
5532 DialectType::Spark | DialectType::Databricks | DialectType::Hive
5533 ) {
5534 ct.constraints.retain(|c| {
5535 matches!(
5536 c,
5537 crate::expressions::TableConstraint::PrimaryKey { .. }
5538 | crate::expressions::TableConstraint::Like { .. }
5539 )
5540 });
5541 for constraint in &mut ct.constraints {
5542 if let crate::expressions::TableConstraint::PrimaryKey {
5543 columns,
5544 modifiers,
5545 ..
5546 } = constraint
5547 {
5548 // Strip ASC/DESC from column names
5549 for col in columns.iter_mut() {
5550 if col.name.ends_with(" ASC") {
5551 col.name = col.name[..col.name.len() - 4].to_string();
5552 } else if col.name.ends_with(" DESC") {
5553 col.name = col.name[..col.name.len() - 5].to_string();
5554 }
5555 }
5556 // Strip TSQL-specific modifiers
5557 modifiers.clustered = None;
5558 modifiers.with_options.clear();
5559 modifiers.on_filegroup = None;
5560 }
5561 }
5562 }
5563
5564 // Databricks: IDENTITY columns with INT/INTEGER -> BIGINT
5565 if matches!(target, DialectType::Databricks) {
5566 for col in &mut ct.columns {
5567 if col.auto_increment {
5568 if matches!(col.data_type, crate::expressions::DataType::Int { .. }) {
5569 col.data_type = crate::expressions::DataType::BigInt { length: None };
5570 }
5571 }
5572 }
5573 }
5574
5575 // Spark/Databricks: INTEGER -> INT in column definitions
5576 // Python sqlglot always outputs INT for Spark/Databricks
5577 if matches!(target, DialectType::Spark | DialectType::Databricks) {
5578 for col in &mut ct.columns {
5579 if let crate::expressions::DataType::Int {
5580 integer_spelling, ..
5581 } = &mut col.data_type
5582 {
5583 *integer_spelling = false;
5584 }
5585 }
5586 }
5587
5588 // Strip explicit NULL constraints for Hive/Spark (B INTEGER NULL -> B INTEGER)
5589 if matches!(target, DialectType::Hive | DialectType::Spark) {
5590 for col in &mut ct.columns {
5591 // If nullable is explicitly true (NULL), change to None (omit it)
5592 if col.nullable == Some(true) {
5593 col.nullable = None;
5594 }
5595 // Also remove from constraints if stored there
5596 col.constraints
5597 .retain(|c| !matches!(c, crate::expressions::ColumnConstraint::Null));
5598 }
5599 }
5600
5601 // Strip TSQL ON filegroup for non-TSQL/Fabric targets
5602 if ct.on_property.is_some()
5603 && !matches!(target, DialectType::TSQL | DialectType::Fabric)
5604 {
5605 ct.on_property = None;
5606 }
5607
5608 // Snowflake: strip ARRAY type parameters (ARRAY<INT> -> ARRAY, ARRAY<ARRAY<INT>> -> ARRAY)
5609 // Snowflake doesn't support typed arrays in DDL
5610 if matches!(target, DialectType::Snowflake) {
5611 fn strip_array_type_params(dt: &mut crate::expressions::DataType) {
5612 if let crate::expressions::DataType::Array { .. } = dt {
5613 *dt = crate::expressions::DataType::Custom {
5614 name: "ARRAY".to_string(),
5615 };
5616 }
5617 }
5618 for col in &mut ct.columns {
5619 strip_array_type_params(&mut col.data_type);
5620 }
5621 }
5622
5623 // PostgreSQL target: ensure IDENTITY columns have NOT NULL
5624 // If NOT NULL was explicit in source (present in constraint_order), preserve original order.
5625 // If NOT NULL was not explicit, add it after IDENTITY (GENERATED BY DEFAULT AS IDENTITY NOT NULL).
5626 if matches!(target, DialectType::PostgreSQL) {
5627 for col in &mut ct.columns {
5628 if col.auto_increment && !col.constraint_order.is_empty() {
5629 use crate::expressions::ConstraintType;
5630 let has_explicit_not_null = col
5631 .constraint_order
5632 .iter()
5633 .any(|ct| *ct == ConstraintType::NotNull);
5634
5635 if has_explicit_not_null {
5636 // Source had explicit NOT NULL - preserve original order
5637 // Just ensure nullable is set
5638 if col.nullable != Some(false) {
5639 col.nullable = Some(false);
5640 }
5641 } else {
5642 // Source didn't have explicit NOT NULL - build order with
5643 // AutoIncrement + NotNull first, then remaining constraints
5644 let mut new_order = Vec::new();
5645 // Put AutoIncrement (IDENTITY) first, followed by synthetic NotNull
5646 new_order.push(ConstraintType::AutoIncrement);
5647 new_order.push(ConstraintType::NotNull);
5648 // Add remaining constraints in original order (except AutoIncrement)
5649 for ct_type in &col.constraint_order {
5650 if *ct_type != ConstraintType::AutoIncrement {
5651 new_order.push(ct_type.clone());
5652 }
5653 }
5654 col.constraint_order = new_order;
5655 col.nullable = Some(false);
5656 }
5657 }
5658 }
5659 }
5660
5661 Expression::CreateTable(ct)
5662 } else {
5663 expr
5664 };
5665
5666 // Handle CreateView column stripping for Presto/Trino target
5667 let expr = if let Expression::CreateView(mut cv) = expr {
5668 // Presto/Trino: drop column list when view has a SELECT body
5669 if matches!(target, DialectType::Presto | DialectType::Trino) && !cv.columns.is_empty()
5670 {
5671 if !matches!(&cv.query, Expression::Null(_)) {
5672 cv.columns.clear();
5673 }
5674 }
5675 Expression::CreateView(cv)
5676 } else {
5677 expr
5678 };
5679
5680 // Wrap bare VALUES in CTE bodies with SELECT * FROM (...) AS _values for generic/non-Presto targets
5681 let expr = if !matches!(
5682 target,
5683 DialectType::Presto | DialectType::Trino | DialectType::Athena
5684 ) {
5685 if let Expression::Select(mut select) = expr {
5686 if let Some(ref mut with) = select.with {
5687 for cte in &mut with.ctes {
5688 if let Expression::Values(ref vals) = cte.this {
5689 // Build: SELECT * FROM (VALUES ...) AS _values
5690 let values_subquery =
5691 Expression::Subquery(Box::new(crate::expressions::Subquery {
5692 this: Expression::Values(vals.clone()),
5693 alias: Some(Identifier::new("_values".to_string())),
5694 column_aliases: Vec::new(),
5695 order_by: None,
5696 limit: None,
5697 offset: None,
5698 distribute_by: None,
5699 sort_by: None,
5700 cluster_by: None,
5701 lateral: false,
5702 modifiers_inside: false,
5703 trailing_comments: Vec::new(),
5704 inferred_type: None,
5705 }));
5706 let mut new_select = crate::expressions::Select::new();
5707 new_select.expressions =
5708 vec![Expression::Star(crate::expressions::Star {
5709 table: None,
5710 except: None,
5711 replace: None,
5712 rename: None,
5713 trailing_comments: Vec::new(),
5714 span: None,
5715 })];
5716 new_select.from = Some(crate::expressions::From {
5717 expressions: vec![values_subquery],
5718 });
5719 cte.this = Expression::Select(Box::new(new_select));
5720 }
5721 }
5722 }
5723 Expression::Select(select)
5724 } else {
5725 expr
5726 }
5727 } else {
5728 expr
5729 };
5730
5731 // PostgreSQL CREATE INDEX: add NULLS FIRST to index columns that don't have nulls ordering
5732 let expr = if matches!(target, DialectType::PostgreSQL) {
5733 if let Expression::CreateIndex(mut ci) = expr {
5734 for col in &mut ci.columns {
5735 if col.nulls_first.is_none() {
5736 col.nulls_first = Some(true);
5737 }
5738 }
5739 Expression::CreateIndex(ci)
5740 } else {
5741 expr
5742 }
5743 } else {
5744 expr
5745 };
5746
5747 transform_recursive(expr, &|e| {
5748 // BigQuery CAST(ARRAY[STRUCT(...)] AS STRUCT_TYPE[]) -> DuckDB: convert unnamed Structs to ROW()
5749 // This converts auto-named struct literals {'_0': x, '_1': y} inside typed arrays to ROW(x, y)
5750 if matches!(source, DialectType::BigQuery) && matches!(target, DialectType::DuckDB) {
5751 if let Expression::Cast(ref c) = e {
5752 // Check if this is a CAST of an array to a struct array type
5753 let is_struct_array_cast =
5754 matches!(&c.to, crate::expressions::DataType::Array { .. });
5755 if is_struct_array_cast {
5756 let has_auto_named_structs = match &c.this {
5757 Expression::Array(arr) => arr.expressions.iter().any(|elem| {
5758 if let Expression::Struct(s) = elem {
5759 s.fields.iter().all(|(name, _)| {
5760 name.as_ref().map_or(true, |n| {
5761 n.starts_with('_') && n[1..].parse::<usize>().is_ok()
5762 })
5763 })
5764 } else {
5765 false
5766 }
5767 }),
5768 Expression::ArrayFunc(arr) => arr.expressions.iter().any(|elem| {
5769 if let Expression::Struct(s) = elem {
5770 s.fields.iter().all(|(name, _)| {
5771 name.as_ref().map_or(true, |n| {
5772 n.starts_with('_') && n[1..].parse::<usize>().is_ok()
5773 })
5774 })
5775 } else {
5776 false
5777 }
5778 }),
5779 _ => false,
5780 };
5781 if has_auto_named_structs {
5782 let convert_struct_to_row = |elem: Expression| -> Expression {
5783 if let Expression::Struct(s) = elem {
5784 let row_args: Vec<Expression> =
5785 s.fields.into_iter().map(|(_, v)| v).collect();
5786 Expression::Function(Box::new(Function::new(
5787 "ROW".to_string(),
5788 row_args,
5789 )))
5790 } else {
5791 elem
5792 }
5793 };
5794 let mut c_clone = c.as_ref().clone();
5795 match &mut c_clone.this {
5796 Expression::Array(arr) => {
5797 arr.expressions = arr
5798 .expressions
5799 .drain(..)
5800 .map(convert_struct_to_row)
5801 .collect();
5802 }
5803 Expression::ArrayFunc(arr) => {
5804 arr.expressions = arr
5805 .expressions
5806 .drain(..)
5807 .map(convert_struct_to_row)
5808 .collect();
5809 }
5810 _ => {}
5811 }
5812 return Ok(Expression::Cast(Box::new(c_clone)));
5813 }
5814 }
5815 }
5816 }
5817
5818 // BigQuery SELECT AS STRUCT -> DuckDB struct literal {'key': value, ...}
5819 if matches!(source, DialectType::BigQuery) && matches!(target, DialectType::DuckDB) {
5820 if let Expression::Select(ref sel) = e {
5821 if sel.kind.as_deref() == Some("STRUCT") {
5822 let mut fields = Vec::new();
5823 for expr in &sel.expressions {
5824 match expr {
5825 Expression::Alias(a) => {
5826 fields.push((Some(a.alias.name.clone()), a.this.clone()));
5827 }
5828 Expression::Column(c) => {
5829 fields.push((Some(c.name.name.clone()), expr.clone()));
5830 }
5831 _ => {
5832 fields.push((None, expr.clone()));
5833 }
5834 }
5835 }
5836 let struct_lit =
5837 Expression::Struct(Box::new(crate::expressions::Struct { fields }));
5838 let mut new_select = sel.as_ref().clone();
5839 new_select.kind = None;
5840 new_select.expressions = vec![struct_lit];
5841 return Ok(Expression::Select(Box::new(new_select)));
5842 }
5843 }
5844 }
5845
5846 // Convert @variable -> ${variable} for Spark/Hive/Databricks
5847 if matches!(source, DialectType::TSQL | DialectType::Fabric)
5848 && matches!(
5849 target,
5850 DialectType::Spark | DialectType::Databricks | DialectType::Hive
5851 )
5852 {
5853 if let Expression::Parameter(ref p) = e {
5854 if p.style == crate::expressions::ParameterStyle::At {
5855 if let Some(ref name) = p.name {
5856 return Ok(Expression::Parameter(Box::new(
5857 crate::expressions::Parameter {
5858 name: Some(name.clone()),
5859 index: p.index,
5860 style: crate::expressions::ParameterStyle::DollarBrace,
5861 quoted: p.quoted,
5862 string_quoted: p.string_quoted,
5863 expression: None,
5864 },
5865 )));
5866 }
5867 }
5868 }
5869 // Also handle Column("@x") -> Parameter("x", DollarBrace) for TSQL vars
5870 if let Expression::Column(ref col) = e {
5871 if col.name.name.starts_with('@') && col.table.is_none() {
5872 let var_name = col.name.name.trim_start_matches('@').to_string();
5873 return Ok(Expression::Parameter(Box::new(
5874 crate::expressions::Parameter {
5875 name: Some(var_name),
5876 index: None,
5877 style: crate::expressions::ParameterStyle::DollarBrace,
5878 quoted: false,
5879 string_quoted: false,
5880 expression: None,
5881 },
5882 )));
5883 }
5884 }
5885 }
5886
5887 // Convert @variable -> variable in SET statements for Spark/Databricks
5888 if matches!(source, DialectType::TSQL | DialectType::Fabric)
5889 && matches!(target, DialectType::Spark | DialectType::Databricks)
5890 {
5891 if let Expression::SetStatement(ref s) = e {
5892 let mut new_items = s.items.clone();
5893 let mut changed = false;
5894 for item in &mut new_items {
5895 // Strip @ from the SET name (Parameter style)
5896 if let Expression::Parameter(ref p) = item.name {
5897 if p.style == crate::expressions::ParameterStyle::At {
5898 if let Some(ref name) = p.name {
5899 item.name = Expression::Identifier(Identifier::new(name));
5900 changed = true;
5901 }
5902 }
5903 }
5904 // Strip @ from the SET name (Identifier style - SET parser)
5905 if let Expression::Identifier(ref id) = item.name {
5906 if id.name.starts_with('@') {
5907 let var_name = id.name.trim_start_matches('@').to_string();
5908 item.name = Expression::Identifier(Identifier::new(&var_name));
5909 changed = true;
5910 }
5911 }
5912 // Strip @ from the SET name (Column style - alternative parsing)
5913 if let Expression::Column(ref col) = item.name {
5914 if col.name.name.starts_with('@') && col.table.is_none() {
5915 let var_name = col.name.name.trim_start_matches('@').to_string();
5916 item.name = Expression::Identifier(Identifier::new(&var_name));
5917 changed = true;
5918 }
5919 }
5920 }
5921 if changed {
5922 let mut new_set = (**s).clone();
5923 new_set.items = new_items;
5924 return Ok(Expression::SetStatement(Box::new(new_set)));
5925 }
5926 }
5927 }
5928
5929 // Strip NOLOCK hint for non-TSQL targets
5930 if matches!(source, DialectType::TSQL | DialectType::Fabric)
5931 && !matches!(target, DialectType::TSQL | DialectType::Fabric)
5932 {
5933 if let Expression::Table(ref tr) = e {
5934 if !tr.hints.is_empty() {
5935 let mut new_tr = tr.clone();
5936 new_tr.hints.clear();
5937 return Ok(Expression::Table(new_tr));
5938 }
5939 }
5940 }
5941
5942 // Snowflake: TRUE IS TRUE -> TRUE, FALSE IS FALSE -> FALSE
5943 // Snowflake simplifies IS TRUE/IS FALSE on boolean literals
5944 if matches!(target, DialectType::Snowflake) {
5945 if let Expression::IsTrue(ref itf) = e {
5946 if let Expression::Boolean(ref b) = itf.this {
5947 if !itf.not {
5948 return Ok(Expression::Boolean(crate::expressions::BooleanLiteral {
5949 value: b.value,
5950 }));
5951 } else {
5952 return Ok(Expression::Boolean(crate::expressions::BooleanLiteral {
5953 value: !b.value,
5954 }));
5955 }
5956 }
5957 }
5958 if let Expression::IsFalse(ref itf) = e {
5959 if let Expression::Boolean(ref b) = itf.this {
5960 if !itf.not {
5961 return Ok(Expression::Boolean(crate::expressions::BooleanLiteral {
5962 value: !b.value,
5963 }));
5964 } else {
5965 return Ok(Expression::Boolean(crate::expressions::BooleanLiteral {
5966 value: b.value,
5967 }));
5968 }
5969 }
5970 }
5971 }
5972
5973 // BigQuery: split dotted backtick identifiers in table names
5974 // e.g., `a.b.c` -> "a"."b"."c" when source is BigQuery and target is not BigQuery
5975 if matches!(source, DialectType::BigQuery) && !matches!(target, DialectType::BigQuery) {
5976 if let Expression::CreateTable(ref ct) = e {
5977 let mut changed = false;
5978 let mut new_ct = ct.clone();
5979 // Split the table name
5980 if ct.name.schema.is_none() && ct.name.name.name.contains('.') {
5981 let parts: Vec<&str> = ct.name.name.name.split('.').collect();
5982 // Use quoted identifiers when the original was quoted (backtick in BigQuery)
5983 let was_quoted = ct.name.name.quoted;
5984 let mk_id = |s: &str| {
5985 if was_quoted {
5986 Identifier::quoted(s)
5987 } else {
5988 Identifier::new(s)
5989 }
5990 };
5991 if parts.len() == 3 {
5992 new_ct.name.catalog = Some(mk_id(parts[0]));
5993 new_ct.name.schema = Some(mk_id(parts[1]));
5994 new_ct.name.name = mk_id(parts[2]);
5995 changed = true;
5996 } else if parts.len() == 2 {
5997 new_ct.name.schema = Some(mk_id(parts[0]));
5998 new_ct.name.name = mk_id(parts[1]);
5999 changed = true;
6000 }
6001 }
6002 // Split the clone source name
6003 if let Some(ref clone_src) = ct.clone_source {
6004 if clone_src.schema.is_none() && clone_src.name.name.contains('.') {
6005 let parts: Vec<&str> = clone_src.name.name.split('.').collect();
6006 let was_quoted = clone_src.name.quoted;
6007 let mk_id = |s: &str| {
6008 if was_quoted {
6009 Identifier::quoted(s)
6010 } else {
6011 Identifier::new(s)
6012 }
6013 };
6014 let mut new_src = clone_src.clone();
6015 if parts.len() == 3 {
6016 new_src.catalog = Some(mk_id(parts[0]));
6017 new_src.schema = Some(mk_id(parts[1]));
6018 new_src.name = mk_id(parts[2]);
6019 new_ct.clone_source = Some(new_src);
6020 changed = true;
6021 } else if parts.len() == 2 {
6022 new_src.schema = Some(mk_id(parts[0]));
6023 new_src.name = mk_id(parts[1]);
6024 new_ct.clone_source = Some(new_src);
6025 changed = true;
6026 }
6027 }
6028 }
6029 if changed {
6030 return Ok(Expression::CreateTable(new_ct));
6031 }
6032 }
6033 }
6034
6035 // BigQuery array subscript: a[1], b[OFFSET(1)], c[ORDINAL(1)], d[SAFE_OFFSET(1)], e[SAFE_ORDINAL(1)]
6036 // -> DuckDB/Presto: convert 0-based to 1-based, handle SAFE_* -> ELEMENT_AT for Presto
6037 if matches!(source, DialectType::BigQuery)
6038 && matches!(
6039 target,
6040 DialectType::DuckDB
6041 | DialectType::Presto
6042 | DialectType::Trino
6043 | DialectType::Athena
6044 )
6045 {
6046 if let Expression::Subscript(ref sub) = e {
6047 let (new_index, is_safe) = match &sub.index {
6048 // a[1] -> a[1+1] = a[2] (plain index is 0-based in BQ)
6049 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(_)) => {
6050 let Literal::Number(n) = lit.as_ref() else {
6051 unreachable!()
6052 };
6053 if let Ok(val) = n.parse::<i64>() {
6054 (
6055 Some(Expression::Literal(Box::new(Literal::Number(
6056 (val + 1).to_string(),
6057 )))),
6058 false,
6059 )
6060 } else {
6061 (None, false)
6062 }
6063 }
6064 // OFFSET(n) -> n+1 (0-based)
6065 Expression::Function(ref f)
6066 if f.name.eq_ignore_ascii_case("OFFSET") && f.args.len() == 1 =>
6067 {
6068 if let Expression::Literal(lit) = &f.args[0] {
6069 if let Literal::Number(n) = lit.as_ref() {
6070 if let Ok(val) = n.parse::<i64>() {
6071 (
6072 Some(Expression::Literal(Box::new(Literal::Number(
6073 (val + 1).to_string(),
6074 )))),
6075 false,
6076 )
6077 } else {
6078 (
6079 Some(Expression::Add(Box::new(
6080 crate::expressions::BinaryOp::new(
6081 f.args[0].clone(),
6082 Expression::number(1),
6083 ),
6084 ))),
6085 false,
6086 )
6087 }
6088 } else {
6089 (None, false)
6090 }
6091 } else {
6092 (
6093 Some(Expression::Add(Box::new(
6094 crate::expressions::BinaryOp::new(
6095 f.args[0].clone(),
6096 Expression::number(1),
6097 ),
6098 ))),
6099 false,
6100 )
6101 }
6102 }
6103 // ORDINAL(n) -> n (already 1-based)
6104 Expression::Function(ref f)
6105 if f.name.eq_ignore_ascii_case("ORDINAL") && f.args.len() == 1 =>
6106 {
6107 (Some(f.args[0].clone()), false)
6108 }
6109 // SAFE_OFFSET(n) -> n+1 (0-based, safe)
6110 Expression::Function(ref f)
6111 if f.name.eq_ignore_ascii_case("SAFE_OFFSET") && f.args.len() == 1 =>
6112 {
6113 if let Expression::Literal(lit) = &f.args[0] {
6114 if let Literal::Number(n) = lit.as_ref() {
6115 if let Ok(val) = n.parse::<i64>() {
6116 (
6117 Some(Expression::Literal(Box::new(Literal::Number(
6118 (val + 1).to_string(),
6119 )))),
6120 true,
6121 )
6122 } else {
6123 (
6124 Some(Expression::Add(Box::new(
6125 crate::expressions::BinaryOp::new(
6126 f.args[0].clone(),
6127 Expression::number(1),
6128 ),
6129 ))),
6130 true,
6131 )
6132 }
6133 } else {
6134 (None, false)
6135 }
6136 } else {
6137 (
6138 Some(Expression::Add(Box::new(
6139 crate::expressions::BinaryOp::new(
6140 f.args[0].clone(),
6141 Expression::number(1),
6142 ),
6143 ))),
6144 true,
6145 )
6146 }
6147 }
6148 // SAFE_ORDINAL(n) -> n (already 1-based, safe)
6149 Expression::Function(ref f)
6150 if f.name.eq_ignore_ascii_case("SAFE_ORDINAL") && f.args.len() == 1 =>
6151 {
6152 (Some(f.args[0].clone()), true)
6153 }
6154 _ => (None, false),
6155 };
6156 if let Some(idx) = new_index {
6157 if is_safe
6158 && matches!(
6159 target,
6160 DialectType::Presto | DialectType::Trino | DialectType::Athena
6161 )
6162 {
6163 // Presto: SAFE_OFFSET/SAFE_ORDINAL -> ELEMENT_AT(arr, idx)
6164 return Ok(Expression::Function(Box::new(Function::new(
6165 "ELEMENT_AT".to_string(),
6166 vec![sub.this.clone(), idx],
6167 ))));
6168 } else {
6169 // DuckDB or non-safe: just use subscript with converted index
6170 return Ok(Expression::Subscript(Box::new(
6171 crate::expressions::Subscript {
6172 this: sub.this.clone(),
6173 index: idx,
6174 },
6175 )));
6176 }
6177 }
6178 }
6179 }
6180
6181 // BigQuery LENGTH(x) -> DuckDB CASE TYPEOF(x) WHEN 'BLOB' THEN OCTET_LENGTH(...) ELSE LENGTH(...) END
6182 if matches!(source, DialectType::BigQuery) && matches!(target, DialectType::DuckDB) {
6183 if let Expression::Length(ref uf) = e {
6184 let arg = uf.this.clone();
6185 let typeof_func = Expression::Function(Box::new(Function::new(
6186 "TYPEOF".to_string(),
6187 vec![arg.clone()],
6188 )));
6189 let blob_cast = Expression::Cast(Box::new(Cast {
6190 this: arg.clone(),
6191 to: DataType::VarBinary { length: None },
6192 trailing_comments: vec![],
6193 double_colon_syntax: false,
6194 format: None,
6195 default: None,
6196 inferred_type: None,
6197 }));
6198 let octet_length = Expression::Function(Box::new(Function::new(
6199 "OCTET_LENGTH".to_string(),
6200 vec![blob_cast],
6201 )));
6202 let text_cast = Expression::Cast(Box::new(Cast {
6203 this: arg,
6204 to: DataType::Text,
6205 trailing_comments: vec![],
6206 double_colon_syntax: false,
6207 format: None,
6208 default: None,
6209 inferred_type: None,
6210 }));
6211 let length_text = Expression::Length(Box::new(crate::expressions::UnaryFunc {
6212 this: text_cast,
6213 original_name: None,
6214 inferred_type: None,
6215 }));
6216 return Ok(Expression::Case(Box::new(Case {
6217 operand: Some(typeof_func),
6218 whens: vec![(
6219 Expression::Literal(Box::new(Literal::String("BLOB".to_string()))),
6220 octet_length,
6221 )],
6222 else_: Some(length_text),
6223 comments: Vec::new(),
6224 inferred_type: None,
6225 })));
6226 }
6227 }
6228
6229 // BigQuery UNNEST alias handling (only for non-BigQuery sources):
6230 // UNNEST(...) AS x -> UNNEST(...) (drop unused table alias)
6231 // UNNEST(...) AS x(y) -> UNNEST(...) AS y (use column alias as main alias)
6232 if matches!(target, DialectType::BigQuery) && !matches!(source, DialectType::BigQuery) {
6233 if let Expression::Alias(ref a) = e {
6234 if matches!(&a.this, Expression::Unnest(_)) {
6235 if a.column_aliases.is_empty() {
6236 // Drop the entire alias, return just the UNNEST expression
6237 return Ok(a.this.clone());
6238 } else {
6239 // Use first column alias as the main alias
6240 let mut new_alias = a.as_ref().clone();
6241 new_alias.alias = a.column_aliases[0].clone();
6242 new_alias.column_aliases.clear();
6243 return Ok(Expression::Alias(Box::new(new_alias)));
6244 }
6245 }
6246 }
6247 }
6248
6249 // BigQuery IN UNNEST(expr) -> IN (SELECT UNNEST/EXPLODE(expr)) for non-BigQuery targets
6250 if matches!(source, DialectType::BigQuery) && !matches!(target, DialectType::BigQuery) {
6251 if let Expression::In(ref in_expr) = e {
6252 if let Some(ref unnest_inner) = in_expr.unnest {
6253 // Build the function call for the target dialect
6254 let func_expr = if matches!(
6255 target,
6256 DialectType::Hive | DialectType::Spark | DialectType::Databricks
6257 ) {
6258 // Use EXPLODE for Hive/Spark
6259 Expression::Function(Box::new(Function::new(
6260 "EXPLODE".to_string(),
6261 vec![*unnest_inner.clone()],
6262 )))
6263 } else {
6264 // Use UNNEST for Presto/Trino/DuckDB/etc.
6265 Expression::Unnest(Box::new(crate::expressions::UnnestFunc {
6266 this: *unnest_inner.clone(),
6267 expressions: Vec::new(),
6268 with_ordinality: false,
6269 alias: None,
6270 offset_alias: None,
6271 }))
6272 };
6273
6274 // Wrap in SELECT
6275 let mut inner_select = crate::expressions::Select::new();
6276 inner_select.expressions = vec![func_expr];
6277
6278 let subquery_expr = Expression::Select(Box::new(inner_select));
6279
6280 return Ok(Expression::In(Box::new(crate::expressions::In {
6281 this: in_expr.this.clone(),
6282 expressions: Vec::new(),
6283 query: Some(subquery_expr),
6284 not: in_expr.not,
6285 global: in_expr.global,
6286 unnest: None,
6287 is_field: false,
6288 })));
6289 }
6290 }
6291 }
6292
6293 // SQLite: GENERATE_SERIES AS t(i) -> (SELECT value AS i FROM GENERATE_SERIES(...)) AS t
6294 // This handles the subquery wrapping for RANGE -> GENERATE_SERIES in FROM context
6295 if matches!(target, DialectType::SQLite) && matches!(source, DialectType::DuckDB) {
6296 if let Expression::Alias(ref a) = e {
6297 if let Expression::Function(ref f) = a.this {
6298 if f.name.eq_ignore_ascii_case("GENERATE_SERIES")
6299 && !a.column_aliases.is_empty()
6300 {
6301 // Build: (SELECT value AS col_alias FROM GENERATE_SERIES(start, end)) AS table_alias
6302 let col_alias = a.column_aliases[0].clone();
6303 let mut inner_select = crate::expressions::Select::new();
6304 inner_select.expressions =
6305 vec![Expression::Alias(Box::new(crate::expressions::Alias::new(
6306 Expression::Identifier(Identifier::new("value".to_string())),
6307 col_alias,
6308 )))];
6309 inner_select.from = Some(crate::expressions::From {
6310 expressions: vec![a.this.clone()],
6311 });
6312 let subquery =
6313 Expression::Subquery(Box::new(crate::expressions::Subquery {
6314 this: Expression::Select(Box::new(inner_select)),
6315 alias: Some(a.alias.clone()),
6316 column_aliases: Vec::new(),
6317 order_by: None,
6318 limit: None,
6319 offset: None,
6320 lateral: false,
6321 modifiers_inside: false,
6322 trailing_comments: Vec::new(),
6323 distribute_by: None,
6324 sort_by: None,
6325 cluster_by: None,
6326 inferred_type: None,
6327 }));
6328 return Ok(subquery);
6329 }
6330 }
6331 }
6332 }
6333
6334 // BigQuery implicit UNNEST: comma-join on array path -> CROSS JOIN UNNEST
6335 // e.g., SELECT results FROM Coordinates, Coordinates.position AS results
6336 // -> SELECT results FROM Coordinates CROSS JOIN UNNEST(Coordinates.position) AS results
6337 if matches!(source, DialectType::BigQuery) {
6338 if let Expression::Select(ref s) = e {
6339 if let Some(ref from) = s.from {
6340 if from.expressions.len() >= 2 {
6341 // Collect table names from first expression
6342 let first_tables: Vec<String> = from
6343 .expressions
6344 .iter()
6345 .take(1)
6346 .filter_map(|expr| {
6347 if let Expression::Table(t) = expr {
6348 Some(t.name.name.to_ascii_lowercase())
6349 } else {
6350 None
6351 }
6352 })
6353 .collect();
6354
6355 // Check if any subsequent FROM expressions are schema-qualified with a matching table name
6356 // or have a dotted name matching a table
6357 let mut needs_rewrite = false;
6358 for expr in from.expressions.iter().skip(1) {
6359 if let Expression::Table(t) = expr {
6360 if let Some(ref schema) = t.schema {
6361 if first_tables.contains(&schema.name.to_ascii_lowercase())
6362 {
6363 needs_rewrite = true;
6364 break;
6365 }
6366 }
6367 // Also check dotted names in quoted identifiers (e.g., `Coordinates.position`)
6368 if t.schema.is_none() && t.name.name.contains('.') {
6369 let parts: Vec<&str> = t.name.name.split('.').collect();
6370 if parts.len() >= 2
6371 && first_tables.contains(&parts[0].to_ascii_lowercase())
6372 {
6373 needs_rewrite = true;
6374 break;
6375 }
6376 }
6377 }
6378 }
6379
6380 if needs_rewrite {
6381 let mut new_select = s.clone();
6382 let mut new_from_exprs = vec![from.expressions[0].clone()];
6383 let mut new_joins = s.joins.clone();
6384
6385 for expr in from.expressions.iter().skip(1) {
6386 if let Expression::Table(ref t) = expr {
6387 if let Some(ref schema) = t.schema {
6388 if first_tables
6389 .contains(&schema.name.to_ascii_lowercase())
6390 {
6391 // This is an array path reference, convert to CROSS JOIN UNNEST
6392 let col_expr = Expression::Column(Box::new(
6393 crate::expressions::Column {
6394 name: t.name.clone(),
6395 table: Some(schema.clone()),
6396 join_mark: false,
6397 trailing_comments: vec![],
6398 span: None,
6399 inferred_type: None,
6400 },
6401 ));
6402 let unnest_expr = Expression::Unnest(Box::new(
6403 crate::expressions::UnnestFunc {
6404 this: col_expr,
6405 expressions: Vec::new(),
6406 with_ordinality: false,
6407 alias: None,
6408 offset_alias: None,
6409 },
6410 ));
6411 let join_this = if let Some(ref alias) = t.alias {
6412 if matches!(
6413 target,
6414 DialectType::Presto
6415 | DialectType::Trino
6416 | DialectType::Athena
6417 ) {
6418 // Presto: UNNEST(x) AS _t0(results)
6419 Expression::Alias(Box::new(
6420 crate::expressions::Alias {
6421 this: unnest_expr,
6422 alias: Identifier::new("_t0"),
6423 column_aliases: vec![alias.clone()],
6424 pre_alias_comments: vec![],
6425 trailing_comments: vec![],
6426 inferred_type: None,
6427 },
6428 ))
6429 } else {
6430 // BigQuery: UNNEST(x) AS results
6431 Expression::Alias(Box::new(
6432 crate::expressions::Alias {
6433 this: unnest_expr,
6434 alias: alias.clone(),
6435 column_aliases: vec![],
6436 pre_alias_comments: vec![],
6437 trailing_comments: vec![],
6438 inferred_type: None,
6439 },
6440 ))
6441 }
6442 } else {
6443 unnest_expr
6444 };
6445 new_joins.push(crate::expressions::Join {
6446 kind: crate::expressions::JoinKind::Cross,
6447 this: join_this,
6448 on: None,
6449 using: Vec::new(),
6450 use_inner_keyword: false,
6451 use_outer_keyword: false,
6452 deferred_condition: false,
6453 join_hint: None,
6454 match_condition: None,
6455 pivots: Vec::new(),
6456 comments: Vec::new(),
6457 nesting_group: 0,
6458 directed: false,
6459 });
6460 } else {
6461 new_from_exprs.push(expr.clone());
6462 }
6463 } else if t.schema.is_none() && t.name.name.contains('.') {
6464 // Dotted name in quoted identifier: `Coordinates.position`
6465 let parts: Vec<&str> = t.name.name.split('.').collect();
6466 if parts.len() >= 2
6467 && first_tables
6468 .contains(&parts[0].to_ascii_lowercase())
6469 {
6470 let join_this =
6471 if matches!(target, DialectType::BigQuery) {
6472 // BigQuery: keep as single quoted identifier, just convert comma -> CROSS JOIN
6473 Expression::Table(t.clone())
6474 } else {
6475 // Other targets: split into "schema"."name"
6476 let mut new_t = t.clone();
6477 new_t.schema =
6478 Some(Identifier::quoted(parts[0]));
6479 new_t.name = Identifier::quoted(parts[1]);
6480 Expression::Table(new_t)
6481 };
6482 new_joins.push(crate::expressions::Join {
6483 kind: crate::expressions::JoinKind::Cross,
6484 this: join_this,
6485 on: None,
6486 using: Vec::new(),
6487 use_inner_keyword: false,
6488 use_outer_keyword: false,
6489 deferred_condition: false,
6490 join_hint: None,
6491 match_condition: None,
6492 pivots: Vec::new(),
6493 comments: Vec::new(),
6494 nesting_group: 0,
6495 directed: false,
6496 });
6497 } else {
6498 new_from_exprs.push(expr.clone());
6499 }
6500 } else {
6501 new_from_exprs.push(expr.clone());
6502 }
6503 } else {
6504 new_from_exprs.push(expr.clone());
6505 }
6506 }
6507
6508 new_select.from = Some(crate::expressions::From {
6509 expressions: new_from_exprs,
6510 ..from.clone()
6511 });
6512 new_select.joins = new_joins;
6513 return Ok(Expression::Select(new_select));
6514 }
6515 }
6516 }
6517 }
6518 }
6519
6520 // CROSS JOIN UNNEST -> LATERAL VIEW EXPLODE for Hive/Spark
6521 if matches!(
6522 target,
6523 DialectType::Hive | DialectType::Spark | DialectType::Databricks
6524 ) {
6525 if let Expression::Select(ref s) = e {
6526 // Check if any joins are CROSS JOIN with UNNEST/EXPLODE
6527 let is_unnest_or_explode_expr = |expr: &Expression| -> bool {
6528 matches!(expr, Expression::Unnest(_))
6529 || matches!(expr, Expression::Function(f) if f.name.eq_ignore_ascii_case("EXPLODE"))
6530 };
6531 let has_unnest_join = s.joins.iter().any(|j| {
6532 j.kind == crate::expressions::JoinKind::Cross && (
6533 matches!(&j.this, Expression::Alias(a) if is_unnest_or_explode_expr(&a.this))
6534 || is_unnest_or_explode_expr(&j.this)
6535 )
6536 });
6537 if has_unnest_join {
6538 let mut select = s.clone();
6539 let mut new_joins = Vec::new();
6540 for join in select.joins.drain(..) {
6541 if join.kind == crate::expressions::JoinKind::Cross {
6542 // Extract the UNNEST/EXPLODE from the join
6543 let (func_expr, table_alias, col_aliases) = match &join.this {
6544 Expression::Alias(a) => {
6545 let ta = if a.alias.is_empty() {
6546 None
6547 } else {
6548 Some(a.alias.clone())
6549 };
6550 let cas = a.column_aliases.clone();
6551 match &a.this {
6552 Expression::Unnest(u) => {
6553 // Multi-arg UNNEST(y, z) -> INLINE(ARRAYS_ZIP(y, z))
6554 if !u.expressions.is_empty() {
6555 let mut all_args = vec![u.this.clone()];
6556 all_args.extend(u.expressions.clone());
6557 let arrays_zip =
6558 Expression::Function(Box::new(
6559 crate::expressions::Function::new(
6560 "ARRAYS_ZIP".to_string(),
6561 all_args,
6562 ),
6563 ));
6564 let inline = Expression::Function(Box::new(
6565 crate::expressions::Function::new(
6566 "INLINE".to_string(),
6567 vec![arrays_zip],
6568 ),
6569 ));
6570 (Some(inline), ta, a.column_aliases.clone())
6571 } else {
6572 // Convert UNNEST(x) to EXPLODE(x) or POSEXPLODE(x)
6573 let func_name = if u.with_ordinality {
6574 "POSEXPLODE"
6575 } else {
6576 "EXPLODE"
6577 };
6578 let explode = Expression::Function(Box::new(
6579 crate::expressions::Function::new(
6580 func_name.to_string(),
6581 vec![u.this.clone()],
6582 ),
6583 ));
6584 // For POSEXPLODE, add 'pos' to column aliases
6585 let cas = if u.with_ordinality {
6586 let mut pos_aliases =
6587 vec![Identifier::new(
6588 "pos".to_string(),
6589 )];
6590 pos_aliases
6591 .extend(a.column_aliases.clone());
6592 pos_aliases
6593 } else {
6594 a.column_aliases.clone()
6595 };
6596 (Some(explode), ta, cas)
6597 }
6598 }
6599 Expression::Function(f)
6600 if f.name.eq_ignore_ascii_case("EXPLODE") =>
6601 {
6602 (Some(Expression::Function(f.clone())), ta, cas)
6603 }
6604 _ => (None, None, Vec::new()),
6605 }
6606 }
6607 Expression::Unnest(u) => {
6608 let func_name = if u.with_ordinality {
6609 "POSEXPLODE"
6610 } else {
6611 "EXPLODE"
6612 };
6613 let explode = Expression::Function(Box::new(
6614 crate::expressions::Function::new(
6615 func_name.to_string(),
6616 vec![u.this.clone()],
6617 ),
6618 ));
6619 let ta = u.alias.clone();
6620 let col_aliases = if u.with_ordinality {
6621 vec![Identifier::new("pos".to_string())]
6622 } else {
6623 Vec::new()
6624 };
6625 (Some(explode), ta, col_aliases)
6626 }
6627 _ => (None, None, Vec::new()),
6628 };
6629 if let Some(func) = func_expr {
6630 select.lateral_views.push(crate::expressions::LateralView {
6631 this: func,
6632 table_alias,
6633 column_aliases: col_aliases,
6634 outer: false,
6635 });
6636 } else {
6637 new_joins.push(join);
6638 }
6639 } else {
6640 new_joins.push(join);
6641 }
6642 }
6643 select.joins = new_joins;
6644 return Ok(Expression::Select(select));
6645 }
6646 }
6647 }
6648
6649 // UNNEST expansion: DuckDB SELECT UNNEST(arr) in SELECT list -> expanded query
6650 // for BigQuery, Presto/Trino, Snowflake
6651 if matches!(source, DialectType::DuckDB | DialectType::PostgreSQL)
6652 && matches!(
6653 target,
6654 DialectType::BigQuery
6655 | DialectType::Presto
6656 | DialectType::Trino
6657 | DialectType::Snowflake
6658 )
6659 {
6660 if let Expression::Select(ref s) = e {
6661 // Check if any SELECT expressions contain UNNEST
6662 // Note: UNNEST can appear as Expression::Unnest OR Expression::Function("UNNEST")
6663 let has_unnest_in_select = s.expressions.iter().any(|expr| {
6664 fn contains_unnest(e: &Expression) -> bool {
6665 match e {
6666 Expression::Unnest(_) => true,
6667 Expression::Function(f)
6668 if f.name.eq_ignore_ascii_case("UNNEST") =>
6669 {
6670 true
6671 }
6672 Expression::Alias(a) => contains_unnest(&a.this),
6673 Expression::Add(op)
6674 | Expression::Sub(op)
6675 | Expression::Mul(op)
6676 | Expression::Div(op) => {
6677 contains_unnest(&op.left) || contains_unnest(&op.right)
6678 }
6679 _ => false,
6680 }
6681 }
6682 contains_unnest(expr)
6683 });
6684
6685 if has_unnest_in_select {
6686 let rewritten = Self::rewrite_unnest_expansion(s, target);
6687 if let Some(new_select) = rewritten {
6688 return Ok(Expression::Select(Box::new(new_select)));
6689 }
6690 }
6691 }
6692 }
6693
6694 // BigQuery -> PostgreSQL: convert escape sequences in string literals to actual characters
6695 // BigQuery '\n' -> PostgreSQL literal newline in string
6696 if matches!(source, DialectType::BigQuery) && matches!(target, DialectType::PostgreSQL)
6697 {
6698 if let Expression::Literal(ref lit) = e {
6699 if let Literal::String(ref s) = lit.as_ref() {
6700 if s.contains("\\n")
6701 || s.contains("\\t")
6702 || s.contains("\\r")
6703 || s.contains("\\\\")
6704 {
6705 let converted = s
6706 .replace("\\n", "\n")
6707 .replace("\\t", "\t")
6708 .replace("\\r", "\r")
6709 .replace("\\\\", "\\");
6710 return Ok(Expression::Literal(Box::new(Literal::String(converted))));
6711 }
6712 }
6713 }
6714 }
6715
6716 // Cross-dialect: convert Literal::Timestamp to target-specific CAST form
6717 // when source != target (identity tests keep the Literal::Timestamp for native handling)
6718 if source != target {
6719 if let Expression::Literal(ref lit) = e {
6720 if let Literal::Timestamp(ref s) = lit.as_ref() {
6721 let s = s.clone();
6722 // MySQL: TIMESTAMP handling depends on source dialect
6723 // BigQuery TIMESTAMP is timezone-aware -> TIMESTAMP() function in MySQL
6724 // Other sources' TIMESTAMP is non-timezone -> CAST('x' AS DATETIME) in MySQL
6725 if matches!(target, DialectType::MySQL) {
6726 if matches!(source, DialectType::BigQuery) {
6727 // BigQuery TIMESTAMP is timezone-aware -> MySQL TIMESTAMP() function
6728 return Ok(Expression::Function(Box::new(Function::new(
6729 "TIMESTAMP".to_string(),
6730 vec![Expression::Literal(Box::new(Literal::String(s)))],
6731 ))));
6732 } else {
6733 // Non-timezone TIMESTAMP -> CAST('x' AS DATETIME) in MySQL
6734 return Ok(Expression::Cast(Box::new(Cast {
6735 this: Expression::Literal(Box::new(Literal::String(s))),
6736 to: DataType::Custom {
6737 name: "DATETIME".to_string(),
6738 },
6739 trailing_comments: Vec::new(),
6740 double_colon_syntax: false,
6741 format: None,
6742 default: None,
6743 inferred_type: None,
6744 })));
6745 }
6746 }
6747 let dt = match target {
6748 DialectType::BigQuery | DialectType::StarRocks => DataType::Custom {
6749 name: "DATETIME".to_string(),
6750 },
6751 DialectType::Snowflake => {
6752 // BigQuery TIMESTAMP is timezone-aware -> use TIMESTAMPTZ for Snowflake
6753 if matches!(source, DialectType::BigQuery) {
6754 DataType::Custom {
6755 name: "TIMESTAMPTZ".to_string(),
6756 }
6757 } else if matches!(
6758 source,
6759 DialectType::PostgreSQL
6760 | DialectType::Redshift
6761 | DialectType::Snowflake
6762 ) {
6763 DataType::Timestamp {
6764 precision: None,
6765 timezone: false,
6766 }
6767 } else {
6768 DataType::Custom {
6769 name: "TIMESTAMPNTZ".to_string(),
6770 }
6771 }
6772 }
6773 DialectType::Spark | DialectType::Databricks => {
6774 // BigQuery TIMESTAMP is timezone-aware -> use plain TIMESTAMP for Spark/Databricks
6775 if matches!(source, DialectType::BigQuery) {
6776 DataType::Timestamp {
6777 precision: None,
6778 timezone: false,
6779 }
6780 } else {
6781 DataType::Custom {
6782 name: "TIMESTAMP_NTZ".to_string(),
6783 }
6784 }
6785 }
6786 DialectType::ClickHouse => DataType::Nullable {
6787 inner: Box::new(DataType::Custom {
6788 name: "DateTime".to_string(),
6789 }),
6790 },
6791 DialectType::TSQL | DialectType::Fabric => DataType::Custom {
6792 name: "DATETIME2".to_string(),
6793 },
6794 DialectType::DuckDB => {
6795 // DuckDB: use TIMESTAMPTZ when source is BigQuery (BQ TIMESTAMP is always UTC/tz-aware)
6796 // or when the timestamp string explicitly has timezone info
6797 if matches!(source, DialectType::BigQuery)
6798 || Self::timestamp_string_has_timezone(&s)
6799 {
6800 DataType::Custom {
6801 name: "TIMESTAMPTZ".to_string(),
6802 }
6803 } else {
6804 DataType::Timestamp {
6805 precision: None,
6806 timezone: false,
6807 }
6808 }
6809 }
6810 _ => DataType::Timestamp {
6811 precision: None,
6812 timezone: false,
6813 },
6814 };
6815 return Ok(Expression::Cast(Box::new(Cast {
6816 this: Expression::Literal(Box::new(Literal::String(s))),
6817 to: dt,
6818 trailing_comments: vec![],
6819 double_colon_syntax: false,
6820 format: None,
6821 default: None,
6822 inferred_type: None,
6823 })));
6824 }
6825 }
6826 }
6827
6828 // PostgreSQL DELETE requires explicit AS for table aliases
6829 if matches!(target, DialectType::PostgreSQL | DialectType::Redshift) {
6830 if let Expression::Delete(ref del) = e {
6831 if del.alias.is_some() && !del.alias_explicit_as {
6832 let mut new_del = del.clone();
6833 new_del.alias_explicit_as = true;
6834 return Ok(Expression::Delete(new_del));
6835 }
6836 }
6837 }
6838
6839 // UNION/INTERSECT/EXCEPT DISTINCT handling:
6840 // Some dialects require explicit DISTINCT (BigQuery, ClickHouse),
6841 // while others don't support it (Presto, Spark, DuckDB, etc.)
6842 {
6843 let needs_distinct =
6844 matches!(target, DialectType::BigQuery | DialectType::ClickHouse);
6845 let drop_distinct = matches!(
6846 target,
6847 DialectType::Presto
6848 | DialectType::Trino
6849 | DialectType::Athena
6850 | DialectType::Spark
6851 | DialectType::Databricks
6852 | DialectType::DuckDB
6853 | DialectType::Hive
6854 | DialectType::MySQL
6855 | DialectType::PostgreSQL
6856 | DialectType::SQLite
6857 | DialectType::TSQL
6858 | DialectType::Redshift
6859 | DialectType::Snowflake
6860 | DialectType::Oracle
6861 | DialectType::Teradata
6862 | DialectType::Drill
6863 | DialectType::Doris
6864 | DialectType::StarRocks
6865 );
6866 match &e {
6867 Expression::Union(u) if !u.all && needs_distinct && !u.distinct => {
6868 let mut new_u = (**u).clone();
6869 new_u.distinct = true;
6870 return Ok(Expression::Union(Box::new(new_u)));
6871 }
6872 Expression::Intersect(i) if !i.all && needs_distinct && !i.distinct => {
6873 let mut new_i = (**i).clone();
6874 new_i.distinct = true;
6875 return Ok(Expression::Intersect(Box::new(new_i)));
6876 }
6877 Expression::Except(ex) if !ex.all && needs_distinct && !ex.distinct => {
6878 let mut new_ex = (**ex).clone();
6879 new_ex.distinct = true;
6880 return Ok(Expression::Except(Box::new(new_ex)));
6881 }
6882 Expression::Union(u) if u.distinct && drop_distinct => {
6883 let mut new_u = (**u).clone();
6884 new_u.distinct = false;
6885 return Ok(Expression::Union(Box::new(new_u)));
6886 }
6887 Expression::Intersect(i) if i.distinct && drop_distinct => {
6888 let mut new_i = (**i).clone();
6889 new_i.distinct = false;
6890 return Ok(Expression::Intersect(Box::new(new_i)));
6891 }
6892 Expression::Except(ex) if ex.distinct && drop_distinct => {
6893 let mut new_ex = (**ex).clone();
6894 new_ex.distinct = false;
6895 return Ok(Expression::Except(Box::new(new_ex)));
6896 }
6897 _ => {}
6898 }
6899 }
6900
6901 // ClickHouse: MAP('a', '1') -> map('a', '1') (lowercase function name)
6902 if matches!(target, DialectType::ClickHouse) {
6903 if let Expression::Function(ref f) = e {
6904 if f.name.eq_ignore_ascii_case("MAP") && !f.args.is_empty() {
6905 let mut new_f = f.as_ref().clone();
6906 new_f.name = "map".to_string();
6907 return Ok(Expression::Function(Box::new(new_f)));
6908 }
6909 }
6910 }
6911
6912 // ClickHouse: INTERSECT ALL -> INTERSECT (ClickHouse doesn't support ALL on INTERSECT)
6913 if matches!(target, DialectType::ClickHouse) {
6914 if let Expression::Intersect(ref i) = e {
6915 if i.all {
6916 let mut new_i = (**i).clone();
6917 new_i.all = false;
6918 return Ok(Expression::Intersect(Box::new(new_i)));
6919 }
6920 }
6921 }
6922
6923 // Integer division: a / b -> CAST(a AS DOUBLE) / b for dialects that need it
6924 // Only from Generic source, to prevent double-wrapping
6925 if matches!(source, DialectType::Generic) {
6926 if let Expression::Div(ref op) = e {
6927 let cast_type = match target {
6928 DialectType::TSQL | DialectType::Fabric => Some(DataType::Float {
6929 precision: None,
6930 scale: None,
6931 real_spelling: false,
6932 }),
6933 DialectType::Drill
6934 | DialectType::Trino
6935 | DialectType::Athena
6936 | DialectType::Presto => Some(DataType::Double {
6937 precision: None,
6938 scale: None,
6939 }),
6940 DialectType::PostgreSQL
6941 | DialectType::Redshift
6942 | DialectType::Materialize
6943 | DialectType::Teradata
6944 | DialectType::RisingWave => Some(DataType::Double {
6945 precision: None,
6946 scale: None,
6947 }),
6948 _ => None,
6949 };
6950 if let Some(dt) = cast_type {
6951 let cast_left = Expression::Cast(Box::new(Cast {
6952 this: op.left.clone(),
6953 to: dt,
6954 double_colon_syntax: false,
6955 trailing_comments: Vec::new(),
6956 format: None,
6957 default: None,
6958 inferred_type: None,
6959 }));
6960 let new_op = crate::expressions::BinaryOp {
6961 left: cast_left,
6962 right: op.right.clone(),
6963 left_comments: op.left_comments.clone(),
6964 operator_comments: op.operator_comments.clone(),
6965 trailing_comments: op.trailing_comments.clone(),
6966 inferred_type: None,
6967 };
6968 return Ok(Expression::Div(Box::new(new_op)));
6969 }
6970 }
6971 }
6972
6973 // CREATE DATABASE -> CREATE SCHEMA for DuckDB target
6974 if matches!(target, DialectType::DuckDB) {
6975 if let Expression::CreateDatabase(db) = e {
6976 let mut schema = crate::expressions::CreateSchema::new(db.name.name.clone());
6977 schema.if_not_exists = db.if_not_exists;
6978 return Ok(Expression::CreateSchema(Box::new(schema)));
6979 }
6980 if let Expression::DropDatabase(db) = e {
6981 let mut schema = crate::expressions::DropSchema::new(db.name.name.clone());
6982 schema.if_exists = db.if_exists;
6983 return Ok(Expression::DropSchema(Box::new(schema)));
6984 }
6985 }
6986
6987 // Strip ClickHouse Nullable(...) wrapper for non-ClickHouse targets
6988 if matches!(source, DialectType::ClickHouse)
6989 && !matches!(target, DialectType::ClickHouse)
6990 {
6991 if let Expression::Cast(ref c) = e {
6992 if let DataType::Custom { ref name } = c.to {
6993 if name.len() >= 9
6994 && name[..9].eq_ignore_ascii_case("NULLABLE(")
6995 && name.ends_with(")")
6996 {
6997 let inner = &name[9..name.len() - 1]; // strip "Nullable(" and ")"
6998 let inner_upper = inner.to_ascii_uppercase();
6999 let new_dt = match inner_upper.as_str() {
7000 "DATETIME" | "DATETIME64" => DataType::Timestamp {
7001 precision: None,
7002 timezone: false,
7003 },
7004 "DATE" => DataType::Date,
7005 "INT64" | "BIGINT" => DataType::BigInt { length: None },
7006 "INT32" | "INT" | "INTEGER" => DataType::Int {
7007 length: None,
7008 integer_spelling: false,
7009 },
7010 "FLOAT64" | "DOUBLE" => DataType::Double {
7011 precision: None,
7012 scale: None,
7013 },
7014 "STRING" => DataType::Text,
7015 _ => DataType::Custom {
7016 name: inner.to_string(),
7017 },
7018 };
7019 let mut new_cast = c.clone();
7020 new_cast.to = new_dt;
7021 return Ok(Expression::Cast(new_cast));
7022 }
7023 }
7024 }
7025 }
7026
7027 // ARRAY_CONCAT_AGG -> Snowflake: ARRAY_FLATTEN(ARRAY_AGG(...))
7028 if matches!(target, DialectType::Snowflake) {
7029 if let Expression::ArrayConcatAgg(ref agg) = e {
7030 let mut agg_clone = agg.as_ref().clone();
7031 agg_clone.name = None; // Clear name so generator uses default "ARRAY_AGG"
7032 let array_agg = Expression::ArrayAgg(Box::new(agg_clone));
7033 let flatten = Expression::Function(Box::new(Function::new(
7034 "ARRAY_FLATTEN".to_string(),
7035 vec![array_agg],
7036 )));
7037 return Ok(flatten);
7038 }
7039 }
7040
7041 // ARRAY_CONCAT_AGG -> others: keep as function for cross-dialect
7042 if !matches!(target, DialectType::BigQuery | DialectType::Snowflake) {
7043 if let Expression::ArrayConcatAgg(agg) = e {
7044 let arg = agg.this;
7045 return Ok(Expression::Function(Box::new(Function::new(
7046 "ARRAY_CONCAT_AGG".to_string(),
7047 vec![arg],
7048 ))));
7049 }
7050 }
7051
7052 // Determine what action to take by inspecting e immutably
7053 let action = {
7054 let source_propagates_nulls =
7055 matches!(source, DialectType::Snowflake | DialectType::BigQuery);
7056 let target_ignores_nulls =
7057 matches!(target, DialectType::DuckDB | DialectType::PostgreSQL);
7058
7059 match &e {
7060 Expression::Function(f) => {
7061 let name = f.name.to_ascii_uppercase();
7062 // DuckDB json(x) is a synonym for CAST(x AS JSON) — parses a string.
7063 // Map to JSON_PARSE(x) for Trino/Presto/Athena to preserve semantics.
7064 if name == "JSON"
7065 && f.args.len() == 1
7066 && matches!(source, DialectType::DuckDB)
7067 && matches!(
7068 target,
7069 DialectType::Presto | DialectType::Trino | DialectType::Athena
7070 )
7071 {
7072 Action::DuckDBJsonFuncToJsonParse
7073 // DuckDB json_valid(x) has no direct Trino equivalent; emit the
7074 // SQL:2016 `x IS JSON` predicate which has matching semantics.
7075 } else if name == "JSON_VALID"
7076 && f.args.len() == 1
7077 && matches!(source, DialectType::DuckDB)
7078 && matches!(
7079 target,
7080 DialectType::Presto | DialectType::Trino | DialectType::Athena
7081 )
7082 {
7083 Action::DuckDBJsonValidToIsJson
7084 // DATE_PART: strip quotes from first arg when target is Snowflake (source != Snowflake)
7085 } else if (name == "DATE_PART" || name == "DATEPART")
7086 && f.args.len() == 2
7087 && matches!(target, DialectType::Snowflake)
7088 && !matches!(source, DialectType::Snowflake)
7089 && matches!(
7090 &f.args[0],
7091 Expression::Literal(lit) if matches!(lit.as_ref(), crate::expressions::Literal::String(_))
7092 )
7093 {
7094 Action::DatePartUnquote
7095 } else if source_propagates_nulls
7096 && target_ignores_nulls
7097 && (name == "GREATEST" || name == "LEAST")
7098 && f.args.len() >= 2
7099 {
7100 Action::GreatestLeastNull
7101 } else if matches!(source, DialectType::Snowflake)
7102 && name == "ARRAY_GENERATE_RANGE"
7103 && f.args.len() >= 2
7104 {
7105 Action::ArrayGenerateRange
7106 } else if matches!(source, DialectType::Snowflake)
7107 && matches!(target, DialectType::DuckDB)
7108 && name == "DATE_TRUNC"
7109 && f.args.len() == 2
7110 {
7111 // Determine if DuckDB DATE_TRUNC needs CAST wrapping to preserve input type.
7112 // Logic based on Python sqlglot's input_type_preserved flag:
7113 // - DATE + non-date-unit (HOUR, MINUTE, etc.) -> wrap
7114 // - TIMESTAMP + date-unit (YEAR, QUARTER, MONTH, WEEK, DAY) -> wrap
7115 // - TIMESTAMPTZ/TIMESTAMPLTZ/TIME -> always wrap
7116 let unit_str = match &f.args[0] {
7117 Expression::Literal(lit) if matches!(lit.as_ref(), crate::expressions::Literal::String(_)) => {
7118 let crate::expressions::Literal::String(s) = lit.as_ref() else { unreachable!() };
7119 Some(s.to_ascii_uppercase())
7120 }
7121 _ => None,
7122 };
7123 let is_date_unit = unit_str.as_ref().map_or(false, |u| {
7124 matches!(u.as_str(), "YEAR" | "QUARTER" | "MONTH" | "WEEK" | "DAY")
7125 });
7126 match &f.args[1] {
7127 Expression::Cast(c) => match &c.to {
7128 DataType::Time { .. } => Action::DateTruncWrapCast,
7129 DataType::Custom { name }
7130 if name.eq_ignore_ascii_case("TIMESTAMPTZ")
7131 || name.eq_ignore_ascii_case("TIMESTAMPLTZ") =>
7132 {
7133 Action::DateTruncWrapCast
7134 }
7135 DataType::Timestamp { timezone: true, .. } => {
7136 Action::DateTruncWrapCast
7137 }
7138 DataType::Date if !is_date_unit => Action::DateTruncWrapCast,
7139 DataType::Timestamp {
7140 timezone: false, ..
7141 } if is_date_unit => Action::DateTruncWrapCast,
7142 _ => Action::None,
7143 },
7144 _ => Action::None,
7145 }
7146 } else if matches!(source, DialectType::Snowflake)
7147 && matches!(target, DialectType::DuckDB)
7148 && name == "TO_DATE"
7149 && f.args.len() == 1
7150 && !matches!(
7151 &f.args[0],
7152 Expression::Literal(lit) if matches!(lit.as_ref(), crate::expressions::Literal::String(_))
7153 )
7154 {
7155 Action::ToDateToCast
7156 } else if !matches!(source, DialectType::Redshift)
7157 && matches!(target, DialectType::Redshift)
7158 && name == "CONVERT_TIMEZONE"
7159 && (f.args.len() == 2 || f.args.len() == 3)
7160 {
7161 // Convert Function("CONVERT_TIMEZONE") to Expression::ConvertTimezone
7162 // so Redshift's transform_expr won't expand 2-arg to 3-arg with 'UTC'.
7163 // The Redshift parser adds 'UTC' as default source_tz, but when
7164 // transpiling from other dialects, we should preserve the original form.
7165 Action::ConvertTimezoneToExpr
7166 } else if matches!(source, DialectType::Snowflake)
7167 && matches!(target, DialectType::DuckDB)
7168 && name == "REGEXP_REPLACE"
7169 && f.args.len() == 4
7170 && !matches!(
7171 &f.args[3],
7172 Expression::Literal(lit) if matches!(lit.as_ref(), crate::expressions::Literal::String(_))
7173 )
7174 {
7175 // Snowflake REGEXP_REPLACE with position arg -> DuckDB needs 'g' flag
7176 Action::RegexpReplaceSnowflakeToDuckDB
7177 } else if matches!(source, DialectType::Snowflake)
7178 && matches!(target, DialectType::DuckDB)
7179 && name == "REGEXP_REPLACE"
7180 && f.args.len() == 5
7181 {
7182 // Snowflake REGEXP_REPLACE(s, p, r, pos, occ) -> DuckDB
7183 Action::RegexpReplacePositionSnowflakeToDuckDB
7184 } else if matches!(source, DialectType::Snowflake)
7185 && matches!(target, DialectType::DuckDB)
7186 && name == "REGEXP_SUBSTR"
7187 {
7188 // Snowflake REGEXP_SUBSTR -> DuckDB REGEXP_EXTRACT variants
7189 Action::RegexpSubstrSnowflakeToDuckDB
7190 } else if matches!(source, DialectType::Snowflake)
7191 && matches!(target, DialectType::Snowflake)
7192 && (name == "REGEXP_SUBSTR" || name == "REGEXP_SUBSTR_ALL")
7193 && f.args.len() == 6
7194 {
7195 // Snowflake identity: strip trailing group=0
7196 Action::RegexpSubstrSnowflakeIdentity
7197 } else if matches!(source, DialectType::Snowflake)
7198 && matches!(target, DialectType::DuckDB)
7199 && name == "REGEXP_SUBSTR_ALL"
7200 {
7201 // Snowflake REGEXP_SUBSTR_ALL -> DuckDB REGEXP_EXTRACT_ALL variants
7202 Action::RegexpSubstrAllSnowflakeToDuckDB
7203 } else if matches!(source, DialectType::Snowflake)
7204 && matches!(target, DialectType::DuckDB)
7205 && name == "REGEXP_COUNT"
7206 {
7207 // Snowflake REGEXP_COUNT -> DuckDB LENGTH(REGEXP_EXTRACT_ALL(...))
7208 Action::RegexpCountSnowflakeToDuckDB
7209 } else if matches!(source, DialectType::Snowflake)
7210 && matches!(target, DialectType::DuckDB)
7211 && name == "REGEXP_INSTR"
7212 {
7213 // Snowflake REGEXP_INSTR -> DuckDB complex CASE expression
7214 Action::RegexpInstrSnowflakeToDuckDB
7215 } else if matches!(source, DialectType::BigQuery)
7216 && matches!(target, DialectType::Snowflake)
7217 && name == "REGEXP_EXTRACT_ALL"
7218 {
7219 // BigQuery REGEXP_EXTRACT_ALL -> Snowflake REGEXP_SUBSTR_ALL
7220 Action::RegexpExtractAllToSnowflake
7221 } else if name == "_BQ_TO_HEX" {
7222 // Internal marker from TO_HEX conversion - bare (no LOWER/UPPER wrapper)
7223 Action::BigQueryToHexBare
7224 } else if matches!(source, DialectType::BigQuery)
7225 && !matches!(target, DialectType::BigQuery)
7226 {
7227 // BigQuery-specific functions that need to be converted to standard forms
7228 match name.as_str() {
7229 "TIMESTAMP_DIFF" | "DATETIME_DIFF" | "TIME_DIFF"
7230 | "DATE_DIFF"
7231 | "TIMESTAMP_ADD" | "TIMESTAMP_SUB"
7232 | "DATETIME_ADD" | "DATETIME_SUB"
7233 | "TIME_ADD" | "TIME_SUB"
7234 | "DATE_ADD" | "DATE_SUB"
7235 | "SAFE_DIVIDE"
7236 | "GENERATE_UUID"
7237 | "COUNTIF"
7238 | "EDIT_DISTANCE"
7239 | "TIMESTAMP_SECONDS" | "TIMESTAMP_MILLIS" | "TIMESTAMP_MICROS"
7240 | "TIMESTAMP_TRUNC" | "DATETIME_TRUNC" | "DATE_TRUNC"
7241 | "TO_HEX"
7242 | "TO_JSON_STRING"
7243 | "GENERATE_ARRAY" | "GENERATE_TIMESTAMP_ARRAY"
7244 | "DIV"
7245 | "UNIX_DATE" | "UNIX_SECONDS" | "UNIX_MILLIS" | "UNIX_MICROS"
7246 | "LAST_DAY"
7247 | "TIME" | "DATETIME" | "TIMESTAMP" | "STRING"
7248 | "REGEXP_CONTAINS"
7249 | "CONTAINS_SUBSTR"
7250 | "SAFE_ADD" | "SAFE_SUBTRACT" | "SAFE_MULTIPLY"
7251 | "SAFE_CAST"
7252 | "GENERATE_DATE_ARRAY"
7253 | "PARSE_DATE" | "PARSE_TIMESTAMP"
7254 | "FORMAT_DATE" | "FORMAT_DATETIME" | "FORMAT_TIMESTAMP"
7255 | "ARRAY_CONCAT"
7256 | "JSON_QUERY" | "JSON_VALUE_ARRAY"
7257 | "INSTR"
7258 | "MD5" | "SHA1" | "SHA256" | "SHA512"
7259 | "GENERATE_UUID()" // just in case
7260 | "REGEXP_EXTRACT_ALL"
7261 | "REGEXP_EXTRACT"
7262 | "INT64"
7263 | "ARRAY_CONCAT_AGG"
7264 | "DATE_DIFF(" // just in case
7265 | "TO_HEX_MD5" // internal
7266 | "MOD"
7267 | "CONCAT"
7268 | "CURRENT_TIMESTAMP" | "CURRENT_DATE" | "CURRENT_DATETIME" | "CURRENT_TIME"
7269 | "STRUCT"
7270 | "ROUND"
7271 | "MAKE_INTERVAL"
7272 | "ARRAY_TO_STRING"
7273 | "PERCENTILE_CONT"
7274 => Action::BigQueryFunctionNormalize,
7275 "ARRAY" if matches!(target, DialectType::Snowflake)
7276 && f.args.len() == 1
7277 && matches!(&f.args[0], Expression::Select(s) if s.kind.as_deref() == Some("STRUCT"))
7278 => Action::BigQueryArraySelectAsStructToSnowflake,
7279 _ => Action::None,
7280 }
7281 } else if matches!(source, DialectType::BigQuery)
7282 && matches!(target, DialectType::BigQuery)
7283 {
7284 // BigQuery -> BigQuery normalizations
7285 match name.as_str() {
7286 "TIMESTAMP_DIFF"
7287 | "DATETIME_DIFF"
7288 | "TIME_DIFF"
7289 | "DATE_DIFF"
7290 | "DATE_ADD"
7291 | "TO_HEX"
7292 | "CURRENT_TIMESTAMP"
7293 | "CURRENT_DATE"
7294 | "CURRENT_TIME"
7295 | "CURRENT_DATETIME"
7296 | "GENERATE_DATE_ARRAY"
7297 | "INSTR"
7298 | "FORMAT_DATETIME"
7299 | "DATETIME"
7300 | "MAKE_INTERVAL" => Action::BigQueryFunctionNormalize,
7301 _ => Action::None,
7302 }
7303 } else {
7304 // Generic function normalization for non-BigQuery sources
7305 match name.as_str() {
7306 "ARBITRARY" | "AGGREGATE"
7307 | "REGEXP_MATCHES" | "REGEXP_FULL_MATCH"
7308 | "STRUCT_EXTRACT"
7309 | "LIST_FILTER" | "LIST_TRANSFORM" | "LIST_SORT" | "LIST_REVERSE_SORT"
7310 | "STRING_TO_ARRAY" | "STR_SPLIT" | "STR_SPLIT_REGEX" | "SPLIT_TO_ARRAY"
7311 | "SUBSTRINGINDEX"
7312 | "ARRAY_LENGTH" | "SIZE" | "CARDINALITY"
7313 | "UNICODE"
7314 | "XOR"
7315 | "ARRAY_REVERSE_SORT"
7316 | "ENCODE" | "DECODE"
7317 | "QUANTILE"
7318 | "EPOCH" | "EPOCH_MS"
7319 | "HASHBYTES"
7320 | "JSON_EXTRACT_PATH" | "JSON_EXTRACT_PATH_TEXT"
7321 | "APPROX_DISTINCT"
7322 | "DATE_PARSE" | "FORMAT_DATETIME"
7323 | "REGEXP_EXTRACT" | "REGEXP_SUBSTR" | "TO_DAYS"
7324 | "RLIKE"
7325 | "DATEDIFF" | "DATE_DIFF" | "MONTHS_BETWEEN"
7326 | "ADD_MONTHS" | "DATEADD" | "DATE_ADD" | "DATE_SUB" | "DATETRUNC"
7327 | "LAST_DAY" | "LAST_DAY_OF_MONTH" | "EOMONTH"
7328 | "ARRAY_CONSTRUCT" | "ARRAY_CAT" | "ARRAY_COMPACT"
7329 | "ARRAY_FILTER" | "FILTER" | "REDUCE" | "ARRAY_REVERSE"
7330 | "MAP" | "MAP_FROM_ENTRIES"
7331 | "COLLECT_LIST" | "COLLECT_SET"
7332 | "ISNAN" | "IS_NAN"
7333 | "TO_UTC_TIMESTAMP" | "FROM_UTC_TIMESTAMP"
7334 | "FORMAT_NUMBER"
7335 | "TOMONDAY" | "TOSTARTOFWEEK" | "TOSTARTOFMONTH" | "TOSTARTOFYEAR"
7336 | "ELEMENT_AT"
7337 | "EXPLODE" | "EXPLODE_OUTER" | "POSEXPLODE"
7338 | "SPLIT_PART"
7339 // GENERATE_SERIES: handled separately below
7340 | "JSON_EXTRACT" | "JSON_EXTRACT_SCALAR"
7341 | "JSON_QUERY" | "JSON_VALUE"
7342 | "JSON_SEARCH"
7343 | "JSON_EXTRACT_JSON" | "BSON_EXTRACT_BSON"
7344 | "TO_UNIX_TIMESTAMP" | "UNIX_TIMESTAMP"
7345 | "CURDATE" | "CURTIME"
7346 | "ARRAY_TO_STRING"
7347 | "ARRAY_SORT" | "SORT_ARRAY"
7348 | "LEFT" | "RIGHT"
7349 | "MAP_FROM_ARRAYS"
7350 | "LIKE" | "ILIKE"
7351 | "ARRAY_CONCAT" | "LIST_CONCAT"
7352 | "QUANTILE_CONT" | "QUANTILE_DISC"
7353 | "PERCENTILE_CONT" | "PERCENTILE_DISC"
7354 | "PERCENTILE_APPROX" | "APPROX_PERCENTILE"
7355 | "LOCATE" | "STRPOS" | "INSTR"
7356 | "CHAR"
7357 // CONCAT: handled separately for COALESCE wrapping
7358 | "ARRAY_JOIN"
7359 | "ARRAY_CONTAINS" | "HAS" | "CONTAINS"
7360 | "ISNULL"
7361 | "MONTHNAME"
7362 | "TO_TIMESTAMP"
7363 | "TO_DATE"
7364 | "TO_JSON"
7365 | "REGEXP_SPLIT"
7366 | "SPLIT"
7367 | "FORMATDATETIME"
7368 | "ARRAYJOIN"
7369 | "SPLITBYSTRING" | "SPLITBYREGEXP"
7370 | "NVL"
7371 | "TO_CHAR"
7372 | "DBMS_RANDOM.VALUE"
7373 | "REGEXP_LIKE"
7374 | "REPLICATE"
7375 | "LEN"
7376 | "COUNT_BIG"
7377 | "DATEFROMPARTS"
7378 | "DATETIMEFROMPARTS"
7379 | "CONVERT" | "TRY_CONVERT"
7380 | "STRFTIME" | "STRPTIME"
7381 | "DATE_FORMAT" | "FORMAT_DATE"
7382 | "PARSE_TIMESTAMP" | "PARSE_DATE"
7383 | "FROM_BASE64" | "TO_BASE64"
7384 | "GETDATE"
7385 | "TO_HEX" | "FROM_HEX" | "UNHEX" | "HEX"
7386 | "TO_UTF8" | "FROM_UTF8"
7387 | "STARTS_WITH" | "STARTSWITH"
7388 | "APPROX_COUNT_DISTINCT"
7389 | "JSON_FORMAT"
7390 | "SYSDATE"
7391 | "LOGICAL_OR" | "LOGICAL_AND"
7392 | "MONTHS_ADD"
7393 | "SCHEMA_NAME"
7394 | "STRTOL"
7395 | "EDITDIST3"
7396 | "FORMAT"
7397 | "LIST_CONTAINS" | "LIST_HAS"
7398 | "VARIANCE" | "STDDEV"
7399 | "ISINF"
7400 | "TO_UNIXTIME"
7401 | "FROM_UNIXTIME"
7402 | "DATEPART" | "DATE_PART"
7403 | "DATENAME"
7404 | "STRING_AGG"
7405 | "JSON_ARRAYAGG"
7406 | "APPROX_QUANTILE"
7407 | "MAKE_DATE"
7408 | "LIST_HAS_ANY" | "ARRAY_HAS_ANY"
7409 | "RANGE"
7410 | "TRY_ELEMENT_AT"
7411 | "STR_TO_MAP"
7412 | "STRING"
7413 | "STR_TO_TIME"
7414 | "CURRENT_SCHEMA"
7415 | "LTRIM" | "RTRIM"
7416 | "UUID"
7417 | "FARM_FINGERPRINT"
7418 | "JSON_KEYS"
7419 | "WEEKOFYEAR"
7420 | "CONCAT_WS"
7421 | "TRY_DIVIDE"
7422 | "ARRAY_SLICE"
7423 | "ARRAY_PREPEND"
7424 | "ARRAY_REMOVE"
7425 | "GENERATE_DATE_ARRAY"
7426 | "PARSE_JSON"
7427 | "JSON_REMOVE"
7428 | "JSON_SET"
7429 | "LEVENSHTEIN"
7430 | "CURRENT_VERSION"
7431 | "ARRAY_MAX"
7432 | "ARRAY_MIN"
7433 | "JAROWINKLER_SIMILARITY"
7434 | "CURRENT_SCHEMAS"
7435 | "TO_VARIANT"
7436 | "JSON_GROUP_ARRAY" | "JSON_GROUP_OBJECT"
7437 | "ARRAYS_OVERLAP" | "ARRAY_INTERSECTION"
7438 => Action::GenericFunctionNormalize,
7439 // Canonical date functions -> dialect-specific
7440 "TS_OR_DS_TO_DATE" => Action::TsOrDsToDateConvert,
7441 "TS_OR_DS_TO_DATE_STR" if f.args.len() == 1 => Action::TsOrDsToDateStrConvert,
7442 "DATE_STR_TO_DATE" if f.args.len() == 1 => Action::DateStrToDateConvert,
7443 "TIME_STR_TO_DATE" if f.args.len() == 1 => Action::TimeStrToDateConvert,
7444 "TIME_STR_TO_TIME" if f.args.len() <= 2 => Action::TimeStrToTimeConvert,
7445 "TIME_STR_TO_UNIX" if f.args.len() == 1 => Action::TimeStrToUnixConvert,
7446 "TIME_TO_TIME_STR" if f.args.len() == 1 => Action::TimeToTimeStrConvert,
7447 "DATE_TO_DATE_STR" if f.args.len() == 1 => Action::DateToDateStrConvert,
7448 "DATE_TO_DI" if f.args.len() == 1 => Action::DateToDiConvert,
7449 "DI_TO_DATE" if f.args.len() == 1 => Action::DiToDateConvert,
7450 "TS_OR_DI_TO_DI" if f.args.len() == 1 => Action::TsOrDiToDiConvert,
7451 "UNIX_TO_STR" if f.args.len() == 2 => Action::UnixToStrConvert,
7452 "UNIX_TO_TIME" if f.args.len() == 1 => Action::UnixToTimeConvert,
7453 "UNIX_TO_TIME_STR" if f.args.len() == 1 => Action::UnixToTimeStrConvert,
7454 "TIME_TO_UNIX" if f.args.len() == 1 => Action::TimeToUnixConvert,
7455 "TIME_TO_STR" if f.args.len() == 2 => Action::TimeToStrConvert,
7456 "STR_TO_UNIX" if f.args.len() == 2 => Action::StrToUnixConvert,
7457 // STR_TO_DATE(x, fmt) -> dialect-specific
7458 "STR_TO_DATE" if f.args.len() == 2
7459 && matches!(source, DialectType::Generic) => Action::StrToDateConvert,
7460 "STR_TO_DATE" => Action::GenericFunctionNormalize,
7461 // TS_OR_DS_ADD(x, n, 'UNIT') from Generic -> dialect-specific DATE_ADD
7462 "TS_OR_DS_ADD" if f.args.len() == 3
7463 && matches!(source, DialectType::Generic) => Action::TsOrDsAddConvert,
7464 // DATE_FROM_UNIX_DATE(n) -> DATEADD(DAY, n, '1970-01-01')
7465 "DATE_FROM_UNIX_DATE" if f.args.len() == 1 => Action::DateFromUnixDateConvert,
7466 // NVL2(a, b, c) -> CASE WHEN NOT a IS NULL THEN b [ELSE c] END
7467 "NVL2" if (f.args.len() == 2 || f.args.len() == 3) => Action::Nvl2Expand,
7468 // IFNULL(a, b) -> COALESCE(a, b) when coming from Generic source
7469 "IFNULL" if f.args.len() == 2 => Action::IfnullToCoalesce,
7470 // IS_ASCII(x) -> dialect-specific
7471 "IS_ASCII" if f.args.len() == 1 => Action::IsAsciiConvert,
7472 // STR_POSITION(haystack, needle[, pos[, occ]]) -> dialect-specific
7473 "STR_POSITION" => Action::StrPositionConvert,
7474 // ARRAY_SUM -> dialect-specific
7475 "ARRAY_SUM" => Action::ArraySumConvert,
7476 // ARRAY_SIZE -> dialect-specific (Drill only)
7477 "ARRAY_SIZE" if matches!(target, DialectType::Drill) => Action::ArraySizeConvert,
7478 // ARRAY_ANY -> dialect-specific
7479 "ARRAY_ANY" if f.args.len() == 2 => Action::ArrayAnyConvert,
7480 // Functions needing specific cross-dialect transforms
7481 "MAX_BY" | "MIN_BY" if matches!(target, DialectType::ClickHouse | DialectType::Spark | DialectType::Databricks | DialectType::DuckDB) => Action::MaxByMinByConvert,
7482 "STRUCT" if matches!(source, DialectType::Spark | DialectType::Databricks)
7483 && !matches!(target, DialectType::Spark | DialectType::Databricks | DialectType::Hive) => Action::SparkStructConvert,
7484 "ARRAY" if matches!(source, DialectType::BigQuery)
7485 && matches!(target, DialectType::Snowflake)
7486 && f.args.len() == 1
7487 && matches!(&f.args[0], Expression::Select(s) if s.kind.as_deref() == Some("STRUCT")) => Action::BigQueryArraySelectAsStructToSnowflake,
7488 "ARRAY" if matches!(target, DialectType::Presto | DialectType::Trino | DialectType::Athena | DialectType::BigQuery | DialectType::DuckDB | DialectType::Snowflake | DialectType::ClickHouse | DialectType::StarRocks) => Action::ArraySyntaxConvert,
7489 "TRUNC" if f.args.len() == 2 && matches!(&f.args[1], Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_))) && matches!(target, DialectType::Presto | DialectType::Trino | DialectType::ClickHouse) => Action::TruncToDateTrunc,
7490 "TRUNC" | "TRUNCATE" if f.args.len() <= 2 && !f.args.get(1).map_or(false, |a| matches!(a, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)))) => Action::GenericFunctionNormalize,
7491 // DATE_TRUNC('unit', x) from Generic source -> arg swap for BigQuery/Doris/Spark/MySQL
7492 "DATE_TRUNC" if f.args.len() == 2
7493 && matches!(source, DialectType::Generic)
7494 && matches!(target, DialectType::BigQuery | DialectType::Doris | DialectType::StarRocks
7495 | DialectType::Spark | DialectType::Databricks | DialectType::MySQL) => Action::DateTruncSwapArgs,
7496 // TIMESTAMP_TRUNC(x, UNIT) from Generic source -> convert to per-dialect
7497 "TIMESTAMP_TRUNC" if f.args.len() >= 2
7498 && matches!(source, DialectType::Generic) => Action::TimestampTruncConvert,
7499 "UNIFORM" if matches!(target, DialectType::Snowflake) => Action::GenericFunctionNormalize,
7500 // GENERATE_SERIES -> SEQUENCE/UNNEST/EXPLODE for target dialects
7501 "GENERATE_SERIES" if matches!(source, DialectType::PostgreSQL | DialectType::Redshift)
7502 && !matches!(target, DialectType::PostgreSQL | DialectType::Redshift | DialectType::TSQL | DialectType::Fabric) => Action::GenerateSeriesConvert,
7503 // GENERATE_SERIES with interval normalization for PG target
7504 "GENERATE_SERIES" if f.args.len() >= 3
7505 && matches!(source, DialectType::PostgreSQL | DialectType::Redshift)
7506 && matches!(target, DialectType::PostgreSQL | DialectType::Redshift) => Action::GenerateSeriesConvert,
7507 "GENERATE_SERIES" => Action::None, // passthrough for other cases
7508 // CONCAT(a, b) -> COALESCE wrapping for Presto/ClickHouse from PostgreSQL
7509 "CONCAT" if matches!(source, DialectType::PostgreSQL | DialectType::Redshift)
7510 && matches!(target, DialectType::Presto | DialectType::Trino | DialectType::ClickHouse) => Action::ConcatCoalesceWrap,
7511 "CONCAT" => Action::GenericFunctionNormalize,
7512 // DIV(a, b) -> target-specific integer division
7513 "DIV" if f.args.len() == 2
7514 && matches!(source, DialectType::PostgreSQL)
7515 && matches!(target, DialectType::DuckDB | DialectType::BigQuery | DialectType::SQLite) => Action::DivFuncConvert,
7516 // JSON_OBJECT_AGG/JSONB_OBJECT_AGG -> JSON_GROUP_OBJECT for DuckDB
7517 "JSON_OBJECT_AGG" | "JSONB_OBJECT_AGG" if f.args.len() == 2
7518 && matches!(target, DialectType::DuckDB) => Action::JsonObjectAggConvert,
7519 // JSONB_EXISTS -> JSON_EXISTS for DuckDB
7520 "JSONB_EXISTS" if f.args.len() == 2
7521 && matches!(target, DialectType::DuckDB) => Action::JsonbExistsConvert,
7522 // DATE_BIN -> TIME_BUCKET for DuckDB
7523 "DATE_BIN" if matches!(target, DialectType::DuckDB) => Action::DateBinConvert,
7524 // Multi-arg MIN(a,b,c) -> LEAST, MAX(a,b,c) -> GREATEST
7525 "MIN" | "MAX" if f.args.len() > 1 && !matches!(target, DialectType::SQLite) => Action::MinMaxToLeastGreatest,
7526 // ClickHouse uniq -> APPROX_COUNT_DISTINCT for other dialects
7527 "UNIQ" if matches!(source, DialectType::ClickHouse) && !matches!(target, DialectType::ClickHouse) => Action::ClickHouseUniqToApproxCountDistinct,
7528 // ClickHouse any -> ANY_VALUE for other dialects
7529 "ANY" if f.args.len() == 1 && matches!(source, DialectType::ClickHouse) && !matches!(target, DialectType::ClickHouse) => Action::ClickHouseAnyToAnyValue,
7530 _ => Action::None,
7531 }
7532 }
7533 }
7534 Expression::AggregateFunction(af) => {
7535 let name = af.name.to_ascii_uppercase();
7536 match name.as_str() {
7537 "ARBITRARY" | "AGGREGATE" => Action::GenericFunctionNormalize,
7538 "JSON_ARRAYAGG" => Action::GenericFunctionNormalize,
7539 // JSON_OBJECT_AGG/JSONB_OBJECT_AGG -> JSON_GROUP_OBJECT for DuckDB
7540 "JSON_OBJECT_AGG" | "JSONB_OBJECT_AGG"
7541 if matches!(target, DialectType::DuckDB) =>
7542 {
7543 Action::JsonObjectAggConvert
7544 }
7545 "ARRAY_AGG"
7546 if matches!(
7547 target,
7548 DialectType::Hive
7549 | DialectType::Spark
7550 | DialectType::Databricks
7551 ) =>
7552 {
7553 Action::ArrayAggToCollectList
7554 }
7555 "MAX_BY" | "MIN_BY"
7556 if matches!(
7557 target,
7558 DialectType::ClickHouse
7559 | DialectType::Spark
7560 | DialectType::Databricks
7561 | DialectType::DuckDB
7562 ) =>
7563 {
7564 Action::MaxByMinByConvert
7565 }
7566 "COLLECT_LIST"
7567 if matches!(
7568 target,
7569 DialectType::Presto | DialectType::Trino | DialectType::DuckDB
7570 ) =>
7571 {
7572 Action::CollectListToArrayAgg
7573 }
7574 "COLLECT_SET"
7575 if matches!(
7576 target,
7577 DialectType::Presto
7578 | DialectType::Trino
7579 | DialectType::Snowflake
7580 | DialectType::DuckDB
7581 ) =>
7582 {
7583 Action::CollectSetConvert
7584 }
7585 "PERCENTILE"
7586 if matches!(
7587 target,
7588 DialectType::DuckDB | DialectType::Presto | DialectType::Trino
7589 ) =>
7590 {
7591 Action::PercentileConvert
7592 }
7593 // CORR -> CASE WHEN ISNAN(CORR(a,b)) THEN NULL ELSE CORR(a,b) END for DuckDB
7594 "CORR"
7595 if matches!(target, DialectType::DuckDB)
7596 && matches!(source, DialectType::Snowflake) =>
7597 {
7598 Action::CorrIsnanWrap
7599 }
7600 // BigQuery APPROX_QUANTILES(x, n) -> APPROX_QUANTILE(x, [quantiles]) for DuckDB
7601 "APPROX_QUANTILES"
7602 if matches!(source, DialectType::BigQuery)
7603 && matches!(target, DialectType::DuckDB) =>
7604 {
7605 Action::BigQueryApproxQuantiles
7606 }
7607 // BigQuery PERCENTILE_CONT(x, frac RESPECT NULLS) -> QUANTILE_CONT(x, frac) for DuckDB
7608 "PERCENTILE_CONT"
7609 if matches!(source, DialectType::BigQuery)
7610 && matches!(target, DialectType::DuckDB)
7611 && af.args.len() >= 2 =>
7612 {
7613 Action::BigQueryPercentileContToDuckDB
7614 }
7615 _ => Action::None,
7616 }
7617 }
7618 Expression::JSONArrayAgg(_) => match target {
7619 DialectType::PostgreSQL => Action::GenericFunctionNormalize,
7620 _ => Action::None,
7621 },
7622 Expression::ToNumber(tn) => {
7623 // TO_NUMBER(x) with 1 arg -> CAST(x AS DOUBLE) for most targets
7624 if tn.format.is_none() && tn.precision.is_none() && tn.scale.is_none() {
7625 match target {
7626 DialectType::Oracle
7627 | DialectType::Snowflake
7628 | DialectType::Teradata => Action::None,
7629 _ => Action::GenericFunctionNormalize,
7630 }
7631 } else {
7632 Action::None
7633 }
7634 }
7635 Expression::Nvl2(_) => {
7636 // NVL2(a, b, c) -> CASE WHEN NOT a IS NULL THEN b ELSE c END for most dialects
7637 // Keep as NVL2 for dialects that support it natively
7638 match target {
7639 DialectType::Oracle
7640 | DialectType::Snowflake
7641 | DialectType::Teradata
7642 | DialectType::Spark
7643 | DialectType::Databricks
7644 | DialectType::Redshift => Action::None,
7645 _ => Action::Nvl2Expand,
7646 }
7647 }
7648 Expression::Decode(_) | Expression::DecodeCase(_) => {
7649 // DECODE(a, b, c[, d, e[, ...]]) -> CASE WHEN with null-safe comparisons
7650 // Keep as DECODE for Oracle/Snowflake
7651 match target {
7652 DialectType::Oracle | DialectType::Snowflake => Action::None,
7653 _ => Action::DecodeSimplify,
7654 }
7655 }
7656 Expression::Coalesce(ref cf) => {
7657 // IFNULL(a, b) -> COALESCE(a, b): clear original_name for cross-dialect
7658 // BigQuery keeps IFNULL natively when source is also BigQuery
7659 if cf.original_name.as_deref() == Some("IFNULL")
7660 && !(matches!(source, DialectType::BigQuery)
7661 && matches!(target, DialectType::BigQuery))
7662 {
7663 Action::IfnullToCoalesce
7664 } else {
7665 Action::None
7666 }
7667 }
7668 Expression::IfFunc(if_func) => {
7669 if matches!(source, DialectType::Snowflake)
7670 && matches!(
7671 target,
7672 DialectType::Presto | DialectType::Trino | DialectType::SQLite
7673 )
7674 && matches!(if_func.false_value, Some(Expression::Div(_)))
7675 {
7676 Action::Div0TypedDivision
7677 } else {
7678 Action::None
7679 }
7680 }
7681 Expression::ToJson(_) => match target {
7682 DialectType::Presto | DialectType::Trino => Action::ToJsonConvert,
7683 DialectType::BigQuery => Action::ToJsonConvert,
7684 DialectType::DuckDB => Action::ToJsonConvert,
7685 _ => Action::None,
7686 },
7687 Expression::ArrayAgg(ref agg) => {
7688 if matches!(target, DialectType::MySQL | DialectType::SingleStore) {
7689 Action::ArrayAggToGroupConcat
7690 } else if matches!(
7691 target,
7692 DialectType::Hive | DialectType::Spark | DialectType::Databricks
7693 ) {
7694 // Any source -> Hive/Spark: convert ARRAY_AGG to COLLECT_LIST
7695 Action::ArrayAggToCollectList
7696 } else if matches!(
7697 source,
7698 DialectType::Spark | DialectType::Databricks | DialectType::Hive
7699 ) && matches!(target, DialectType::DuckDB)
7700 && agg.filter.is_some()
7701 {
7702 // Spark/Hive ARRAY_AGG excludes NULLs, DuckDB includes them
7703 // Need to add NOT x IS NULL to existing filter
7704 Action::ArrayAggNullFilter
7705 } else if matches!(target, DialectType::DuckDB)
7706 && agg.ignore_nulls == Some(true)
7707 && !agg.order_by.is_empty()
7708 {
7709 // BigQuery ARRAY_AGG(x IGNORE NULLS ORDER BY ...) -> DuckDB ARRAY_AGG(x ORDER BY a NULLS FIRST, ...)
7710 Action::ArrayAggIgnoreNullsDuckDB
7711 } else if !matches!(source, DialectType::Snowflake) {
7712 Action::None
7713 } else if matches!(target, DialectType::Spark | DialectType::Databricks) {
7714 let is_array_agg = agg.name.as_deref().map_or(false, |n| n.eq_ignore_ascii_case("ARRAY_AGG"))
7715 || agg.name.is_none();
7716 if is_array_agg {
7717 Action::ArrayAggCollectList
7718 } else {
7719 Action::None
7720 }
7721 } else if matches!(
7722 target,
7723 DialectType::DuckDB | DialectType::Presto | DialectType::Trino
7724 ) && agg.filter.is_none()
7725 {
7726 Action::ArrayAggFilter
7727 } else {
7728 Action::None
7729 }
7730 }
7731 Expression::WithinGroup(wg) => {
7732 if matches!(source, DialectType::Snowflake)
7733 && matches!(
7734 target,
7735 DialectType::DuckDB | DialectType::Presto | DialectType::Trino
7736 )
7737 && matches!(wg.this, Expression::ArrayAgg(_))
7738 {
7739 Action::ArrayAggWithinGroupFilter
7740 } else if matches!(&wg.this, Expression::AggregateFunction(af) if af.name.eq_ignore_ascii_case("STRING_AGG"))
7741 || matches!(&wg.this, Expression::Function(f) if f.name.eq_ignore_ascii_case("STRING_AGG"))
7742 || matches!(&wg.this, Expression::StringAgg(_))
7743 {
7744 Action::StringAggConvert
7745 } else if matches!(
7746 target,
7747 DialectType::Presto
7748 | DialectType::Trino
7749 | DialectType::Athena
7750 | DialectType::Spark
7751 | DialectType::Databricks
7752 ) && (matches!(&wg.this, Expression::Function(f) if f.name.eq_ignore_ascii_case("PERCENTILE_CONT") || f.name.eq_ignore_ascii_case("PERCENTILE_DISC"))
7753 || matches!(&wg.this, Expression::AggregateFunction(af) if af.name.eq_ignore_ascii_case("PERCENTILE_CONT") || af.name.eq_ignore_ascii_case("PERCENTILE_DISC"))
7754 || matches!(&wg.this, Expression::PercentileCont(_)))
7755 {
7756 Action::PercentileContConvert
7757 } else {
7758 Action::None
7759 }
7760 }
7761 // For BigQuery: CAST(x AS TIMESTAMP) -> CAST(x AS DATETIME)
7762 // because BigQuery's TIMESTAMP is really TIMESTAMPTZ, and
7763 // DATETIME is the timezone-unaware type
7764 Expression::Cast(ref c) => {
7765 if c.format.is_some()
7766 && (matches!(source, DialectType::BigQuery)
7767 || matches!(source, DialectType::Teradata))
7768 {
7769 Action::BigQueryCastFormat
7770 } else if matches!(target, DialectType::BigQuery)
7771 && !matches!(source, DialectType::BigQuery)
7772 && matches!(
7773 c.to,
7774 DataType::Timestamp {
7775 timezone: false,
7776 ..
7777 }
7778 )
7779 {
7780 Action::CastTimestampToDatetime
7781 } else if matches!(target, DialectType::MySQL | DialectType::StarRocks)
7782 && !matches!(source, DialectType::MySQL | DialectType::StarRocks)
7783 && matches!(
7784 c.to,
7785 DataType::Timestamp {
7786 timezone: false,
7787 ..
7788 }
7789 )
7790 {
7791 // Generic/other -> MySQL/StarRocks: CAST(x AS TIMESTAMP) -> CAST(x AS DATETIME)
7792 // but MySQL-native CAST(x AS TIMESTAMP) stays as TIMESTAMP(x) via transform_cast
7793 Action::CastTimestampToDatetime
7794 } else if matches!(
7795 source,
7796 DialectType::Hive | DialectType::Spark | DialectType::Databricks
7797 ) && matches!(
7798 target,
7799 DialectType::Presto
7800 | DialectType::Trino
7801 | DialectType::Athena
7802 | DialectType::DuckDB
7803 | DialectType::Snowflake
7804 | DialectType::BigQuery
7805 | DialectType::Databricks
7806 | DialectType::TSQL
7807 ) {
7808 Action::HiveCastToTryCast
7809 } else if matches!(c.to, DataType::Timestamp { timezone: true, .. })
7810 && matches!(target, DialectType::MySQL | DialectType::StarRocks)
7811 {
7812 // CAST(x AS TIMESTAMPTZ) -> TIMESTAMP(x) function for MySQL/StarRocks
7813 Action::CastTimestamptzToFunc
7814 } else if matches!(c.to, DataType::Timestamp { timezone: true, .. })
7815 && matches!(
7816 target,
7817 DialectType::Hive
7818 | DialectType::Spark
7819 | DialectType::Databricks
7820 | DialectType::BigQuery
7821 )
7822 {
7823 // CAST(x AS TIMESTAMP WITH TIME ZONE) -> CAST(x AS TIMESTAMP) for Hive/Spark/BigQuery
7824 Action::CastTimestampStripTz
7825 } else if matches!(&c.to, DataType::Json)
7826 && matches!(source, DialectType::DuckDB)
7827 && matches!(target, DialectType::Snowflake)
7828 {
7829 Action::DuckDBCastJsonToVariant
7830 } else if matches!(&c.to, DataType::Json)
7831 && matches!(&c.this, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)))
7832 && matches!(
7833 target,
7834 DialectType::Presto
7835 | DialectType::Trino
7836 | DialectType::Athena
7837 | DialectType::Snowflake
7838 )
7839 {
7840 // CAST('x' AS JSON) -> JSON_PARSE('x') for Presto, PARSE_JSON for Snowflake
7841 // Only when the input is a string literal (JSON 'value' syntax)
7842 Action::JsonLiteralToJsonParse
7843 } else if matches!(&c.to, DataType::Json)
7844 && matches!(source, DialectType::DuckDB)
7845 && matches!(
7846 target,
7847 DialectType::Presto | DialectType::Trino | DialectType::Athena
7848 )
7849 {
7850 // DuckDB's CAST(x AS JSON) parses the string value into a JSON value.
7851 // Trino/Presto/Athena's CAST(x AS JSON) instead wraps the value as a
7852 // JSON string (no parsing) — different semantics. Use JSON_PARSE(x)
7853 // in the target to preserve DuckDB's parse semantics.
7854 Action::JsonLiteralToJsonParse
7855 } else if matches!(&c.to, DataType::Json | DataType::JsonB)
7856 && matches!(target, DialectType::Spark | DialectType::Databricks)
7857 {
7858 // CAST(x AS JSON) -> TO_JSON(x) for Spark
7859 Action::CastToJsonForSpark
7860 } else if (matches!(
7861 &c.to,
7862 DataType::Array { .. } | DataType::Map { .. } | DataType::Struct { .. }
7863 )) && matches!(
7864 target,
7865 DialectType::Spark | DialectType::Databricks
7866 ) && (matches!(&c.this, Expression::ParseJson(_))
7867 || matches!(
7868 &c.this,
7869 Expression::Function(f)
7870 if f.name.eq_ignore_ascii_case("JSON_EXTRACT")
7871 || f.name.eq_ignore_ascii_case("JSON_EXTRACT_SCALAR")
7872 || f.name.eq_ignore_ascii_case("GET_JSON_OBJECT")
7873 ))
7874 {
7875 // CAST(JSON_PARSE(...) AS ARRAY/MAP) or CAST(JSON_EXTRACT/GET_JSON_OBJECT(...) AS ARRAY/MAP)
7876 // -> FROM_JSON(..., type_string) for Spark
7877 Action::CastJsonToFromJson
7878 } else if matches!(target, DialectType::Spark | DialectType::Databricks)
7879 && matches!(
7880 c.to,
7881 DataType::Timestamp {
7882 timezone: false,
7883 ..
7884 }
7885 )
7886 && matches!(source, DialectType::DuckDB)
7887 {
7888 Action::StrftimeCastTimestamp
7889 } else if matches!(source, DialectType::DuckDB)
7890 && matches!(
7891 c.to,
7892 DataType::Decimal {
7893 precision: None,
7894 ..
7895 }
7896 )
7897 {
7898 Action::DecimalDefaultPrecision
7899 } else if matches!(source, DialectType::MySQL | DialectType::SingleStore)
7900 && matches!(c.to, DataType::Char { length: None })
7901 && !matches!(target, DialectType::MySQL | DialectType::SingleStore)
7902 {
7903 // MySQL CAST(x AS CHAR) was originally TEXT - convert to target text type
7904 Action::MysqlCastCharToText
7905 } else if matches!(
7906 source,
7907 DialectType::Spark | DialectType::Databricks | DialectType::Hive
7908 ) && matches!(
7909 target,
7910 DialectType::Spark | DialectType::Databricks | DialectType::Hive
7911 ) && Self::has_varchar_char_type(&c.to)
7912 {
7913 // Spark parses VARCHAR(n)/CHAR(n) as TEXT, so normalize back to STRING
7914 Action::SparkCastVarcharToString
7915 } else {
7916 Action::None
7917 }
7918 }
7919 Expression::SafeCast(ref c) => {
7920 if c.format.is_some()
7921 && matches!(source, DialectType::BigQuery)
7922 && !matches!(target, DialectType::BigQuery)
7923 {
7924 Action::BigQueryCastFormat
7925 } else {
7926 Action::None
7927 }
7928 }
7929 Expression::TryCast(ref c) => {
7930 if matches!(&c.to, DataType::Json)
7931 && matches!(source, DialectType::DuckDB)
7932 && matches!(
7933 target,
7934 DialectType::Presto | DialectType::Trino | DialectType::Athena
7935 )
7936 {
7937 // DuckDB's TRY_CAST(x AS JSON) tries to parse x as JSON, returning
7938 // NULL on parse failure. Trino/Presto/Athena's TRY_CAST(x AS JSON)
7939 // wraps the value as a JSON string (no parse). Emit TRY(JSON_PARSE(x))
7940 // to preserve DuckDB's parse-or-null semantics.
7941 Action::DuckDBTryCastJsonToTryJsonParse
7942 } else {
7943 Action::None
7944 }
7945 }
7946 Expression::JSONArray(ref ja)
7947 if matches!(target, DialectType::Snowflake)
7948 && ja.null_handling.is_none()
7949 && ja.return_type.is_none()
7950 && ja.strict.is_none() =>
7951 {
7952 Action::GenericFunctionNormalize
7953 }
7954 Expression::JsonArray(_) if matches!(target, DialectType::Snowflake) => {
7955 Action::GenericFunctionNormalize
7956 }
7957 // For DuckDB: DATE_TRUNC should preserve the input type
7958 Expression::DateTrunc(_) | Expression::TimestampTrunc(_) => {
7959 if matches!(source, DialectType::Snowflake)
7960 && matches!(target, DialectType::DuckDB)
7961 {
7962 Action::DateTruncWrapCast
7963 } else {
7964 Action::None
7965 }
7966 }
7967 // For DuckDB: SET a = 1 -> SET VARIABLE a = 1
7968 Expression::SetStatement(s) => {
7969 if matches!(target, DialectType::DuckDB)
7970 && !matches!(source, DialectType::TSQL | DialectType::Fabric)
7971 && s.items.iter().any(|item| item.kind.is_none())
7972 {
7973 Action::SetToVariable
7974 } else {
7975 Action::None
7976 }
7977 }
7978 // Cross-dialect NULL ordering normalization.
7979 // When nulls_first is not specified, fill in the source dialect's implied
7980 // default so the target generator can correctly add/strip NULLS FIRST/LAST.
7981 Expression::Ordered(o) => {
7982 // MySQL doesn't support NULLS FIRST/LAST - strip or rewrite
7983 if matches!(target, DialectType::MySQL) && o.nulls_first.is_some() {
7984 Action::MysqlNullsOrdering
7985 } else {
7986 // Skip targets that don't support NULLS FIRST/LAST syntax
7987 let target_supports_nulls = !matches!(
7988 target,
7989 DialectType::MySQL
7990 | DialectType::TSQL
7991 | DialectType::StarRocks
7992 | DialectType::Doris
7993 );
7994 if o.nulls_first.is_none() && source != target && target_supports_nulls
7995 {
7996 Action::NullsOrdering
7997 } else {
7998 Action::None
7999 }
8000 }
8001 }
8002 // BigQuery data types: convert INT64, BYTES, NUMERIC etc. to standard types
8003 Expression::DataType(dt) => {
8004 if matches!(source, DialectType::BigQuery)
8005 && !matches!(target, DialectType::BigQuery)
8006 {
8007 match dt {
8008 DataType::Custom { ref name }
8009 if name.eq_ignore_ascii_case("INT64")
8010 || name.eq_ignore_ascii_case("FLOAT64")
8011 || name.eq_ignore_ascii_case("BOOL")
8012 || name.eq_ignore_ascii_case("BYTES")
8013 || name.eq_ignore_ascii_case("NUMERIC")
8014 || name.eq_ignore_ascii_case("STRING")
8015 || name.eq_ignore_ascii_case("DATETIME") =>
8016 {
8017 Action::BigQueryCastType
8018 }
8019 _ => Action::None,
8020 }
8021 } else if matches!(source, DialectType::TSQL) {
8022 // For TSQL source -> any target (including TSQL itself for REAL)
8023 match dt {
8024 // REAL -> FLOAT even for TSQL->TSQL
8025 DataType::Custom { ref name }
8026 if name.eq_ignore_ascii_case("REAL") =>
8027 {
8028 Action::TSQLTypeNormalize
8029 }
8030 DataType::Float {
8031 real_spelling: true,
8032 ..
8033 } => Action::TSQLTypeNormalize,
8034 // Other TSQL type normalizations only for non-TSQL targets
8035 DataType::Custom { ref name }
8036 if !matches!(target, DialectType::TSQL)
8037 && (name.eq_ignore_ascii_case("MONEY")
8038 || name.eq_ignore_ascii_case("SMALLMONEY")
8039 || name.eq_ignore_ascii_case("DATETIME2")
8040 || name.eq_ignore_ascii_case("IMAGE")
8041 || name.eq_ignore_ascii_case("BIT")
8042 || name.eq_ignore_ascii_case("ROWVERSION")
8043 || name.eq_ignore_ascii_case("UNIQUEIDENTIFIER")
8044 || name.eq_ignore_ascii_case("DATETIMEOFFSET")
8045 || (name.len() >= 7 && name[..7].eq_ignore_ascii_case("NUMERIC"))
8046 || (name.len() >= 10 && name[..10].eq_ignore_ascii_case("DATETIME2("))
8047 || (name.len() >= 5 && name[..5].eq_ignore_ascii_case("TIME("))) =>
8048 {
8049 Action::TSQLTypeNormalize
8050 }
8051 DataType::Float {
8052 precision: Some(_), ..
8053 } if !matches!(target, DialectType::TSQL) => {
8054 Action::TSQLTypeNormalize
8055 }
8056 DataType::TinyInt { .. }
8057 if !matches!(target, DialectType::TSQL) =>
8058 {
8059 Action::TSQLTypeNormalize
8060 }
8061 // INTEGER -> INT for Databricks/Spark targets
8062 DataType::Int {
8063 integer_spelling: true,
8064 ..
8065 } if matches!(
8066 target,
8067 DialectType::Databricks | DialectType::Spark
8068 ) =>
8069 {
8070 Action::TSQLTypeNormalize
8071 }
8072 _ => Action::None,
8073 }
8074 } else if (matches!(source, DialectType::Oracle)
8075 || matches!(source, DialectType::Generic))
8076 && !matches!(target, DialectType::Oracle)
8077 {
8078 match dt {
8079 DataType::Custom { ref name }
8080 if (name.len() >= 9 && name[..9].eq_ignore_ascii_case("VARCHAR2("))
8081 || (name.len() >= 10 && name[..10].eq_ignore_ascii_case("NVARCHAR2("))
8082 || name.eq_ignore_ascii_case("VARCHAR2")
8083 || name.eq_ignore_ascii_case("NVARCHAR2") =>
8084 {
8085 Action::OracleVarchar2ToVarchar
8086 }
8087 _ => Action::None,
8088 }
8089 } else if matches!(target, DialectType::Snowflake)
8090 && !matches!(source, DialectType::Snowflake)
8091 {
8092 // When target is Snowflake but source is NOT Snowflake,
8093 // protect FLOAT from being converted to DOUBLE by Snowflake's transform.
8094 // Snowflake treats FLOAT=DOUBLE internally, but non-Snowflake sources
8095 // should keep their FLOAT spelling.
8096 match dt {
8097 DataType::Float { .. } => Action::SnowflakeFloatProtect,
8098 _ => Action::None,
8099 }
8100 } else {
8101 Action::None
8102 }
8103 }
8104 // LOWER patterns from BigQuery TO_HEX conversions:
8105 // - LOWER(LOWER(HEX(x))) from non-BQ targets: flatten
8106 // - LOWER(Function("TO_HEX")) for BQ->BQ: strip LOWER
8107 Expression::Lower(uf) => {
8108 if matches!(source, DialectType::BigQuery) {
8109 match &uf.this {
8110 Expression::Lower(_) => Action::BigQueryToHexLower,
8111 Expression::Function(f)
8112 if f.name == "TO_HEX"
8113 && matches!(target, DialectType::BigQuery) =>
8114 {
8115 // BQ->BQ: LOWER(TO_HEX(x)) -> TO_HEX(x)
8116 Action::BigQueryToHexLower
8117 }
8118 _ => Action::None,
8119 }
8120 } else {
8121 Action::None
8122 }
8123 }
8124 // UPPER patterns from BigQuery TO_HEX conversions:
8125 // - UPPER(LOWER(HEX(x))) from non-BQ targets: extract inner
8126 // - UPPER(Function("TO_HEX")) for BQ->BQ: keep as UPPER(TO_HEX(x))
8127 Expression::Upper(uf) => {
8128 if matches!(source, DialectType::BigQuery) {
8129 match &uf.this {
8130 Expression::Lower(_) => Action::BigQueryToHexUpper,
8131 _ => Action::None,
8132 }
8133 } else {
8134 Action::None
8135 }
8136 }
8137 // BigQuery LAST_DAY(date, unit) -> strip unit for non-BigQuery targets
8138 // Snowflake supports LAST_DAY with unit, so keep it there
8139 Expression::LastDay(ld) => {
8140 if matches!(source, DialectType::BigQuery)
8141 && !matches!(target, DialectType::BigQuery | DialectType::Snowflake)
8142 && ld.unit.is_some()
8143 {
8144 Action::BigQueryLastDayStripUnit
8145 } else {
8146 Action::None
8147 }
8148 }
8149 // BigQuery SafeDivide expressions (already parsed as SafeDivide)
8150 Expression::SafeDivide(_) => {
8151 if matches!(source, DialectType::BigQuery)
8152 && !matches!(target, DialectType::BigQuery)
8153 {
8154 Action::BigQuerySafeDivide
8155 } else {
8156 Action::None
8157 }
8158 }
8159 // BigQuery ANY_VALUE(x HAVING MAX/MIN y) -> ARG_MAX_NULL/ARG_MIN_NULL for DuckDB
8160 // ANY_VALUE(x) -> ANY_VALUE(x) IGNORE NULLS for Spark
8161 Expression::AnyValue(ref agg) => {
8162 if matches!(source, DialectType::BigQuery)
8163 && matches!(target, DialectType::DuckDB)
8164 && agg.having_max.is_some()
8165 {
8166 Action::BigQueryAnyValueHaving
8167 } else if matches!(target, DialectType::Spark | DialectType::Databricks)
8168 && !matches!(source, DialectType::Spark | DialectType::Databricks)
8169 && agg.ignore_nulls.is_none()
8170 {
8171 Action::AnyValueIgnoreNulls
8172 } else {
8173 Action::None
8174 }
8175 }
8176 Expression::Any(ref q) => {
8177 if matches!(source, DialectType::PostgreSQL)
8178 && matches!(
8179 target,
8180 DialectType::Spark | DialectType::Databricks | DialectType::Hive
8181 )
8182 && q.op.is_some()
8183 && !matches!(
8184 q.subquery,
8185 Expression::Select(_) | Expression::Subquery(_)
8186 )
8187 {
8188 Action::AnyToExists
8189 } else {
8190 Action::None
8191 }
8192 }
8193 // BigQuery APPROX_QUANTILES(x, n) -> APPROX_QUANTILE(x, [quantiles]) for DuckDB
8194 // Snowflake RLIKE does full-string match; DuckDB REGEXP_FULL_MATCH also does full-string match
8195 Expression::RegexpLike(_)
8196 if matches!(source, DialectType::Snowflake)
8197 && matches!(target, DialectType::DuckDB) =>
8198 {
8199 Action::RlikeSnowflakeToDuckDB
8200 }
8201 // RegexpLike from non-DuckDB/non-Snowflake sources -> REGEXP_MATCHES for DuckDB target
8202 Expression::RegexpLike(_)
8203 if !matches!(source, DialectType::DuckDB)
8204 && matches!(target, DialectType::DuckDB) =>
8205 {
8206 Action::RegexpLikeToDuckDB
8207 }
8208 // RegexpLike -> Exasol: anchor pattern with .*...*
8209 Expression::RegexpLike(_)
8210 if matches!(target, DialectType::Exasol) =>
8211 {
8212 Action::RegexpLikeExasolAnchor
8213 }
8214 // Safe-division source -> non-safe target: NULLIF wrapping and/or CAST
8215 // Safe-division dialects: MySQL, DuckDB, SingleStore, TiDB, ClickHouse, Doris
8216 Expression::Div(ref op)
8217 if matches!(
8218 source,
8219 DialectType::MySQL
8220 | DialectType::DuckDB
8221 | DialectType::SingleStore
8222 | DialectType::TiDB
8223 | DialectType::ClickHouse
8224 | DialectType::Doris
8225 ) && matches!(
8226 target,
8227 DialectType::PostgreSQL
8228 | DialectType::Redshift
8229 | DialectType::Drill
8230 | DialectType::Trino
8231 | DialectType::Presto
8232 | DialectType::Athena
8233 | DialectType::TSQL
8234 | DialectType::Teradata
8235 | DialectType::SQLite
8236 | DialectType::BigQuery
8237 | DialectType::Snowflake
8238 | DialectType::Databricks
8239 | DialectType::Oracle
8240 | DialectType::Materialize
8241 | DialectType::RisingWave
8242 ) =>
8243 {
8244 // Only wrap if RHS is not already NULLIF
8245 if !matches!(&op.right, Expression::Function(f) if f.name.eq_ignore_ascii_case("NULLIF"))
8246 {
8247 Action::MySQLSafeDivide
8248 } else {
8249 Action::None
8250 }
8251 }
8252 // ALTER TABLE ... RENAME TO <schema>.<table> -> strip schema for most targets
8253 // For TSQL/Fabric, convert to sp_rename instead
8254 Expression::AlterTable(ref at) if !at.actions.is_empty() => {
8255 if let Some(crate::expressions::AlterTableAction::RenameTable(
8256 ref new_tbl,
8257 )) = at.actions.first()
8258 {
8259 if matches!(target, DialectType::TSQL | DialectType::Fabric) {
8260 // TSQL: ALTER TABLE RENAME -> EXEC sp_rename
8261 Action::AlterTableToSpRename
8262 } else if new_tbl.schema.is_some()
8263 && matches!(
8264 target,
8265 DialectType::BigQuery
8266 | DialectType::Doris
8267 | DialectType::StarRocks
8268 | DialectType::DuckDB
8269 | DialectType::PostgreSQL
8270 | DialectType::Redshift
8271 )
8272 {
8273 Action::AlterTableRenameStripSchema
8274 } else {
8275 Action::None
8276 }
8277 } else {
8278 Action::None
8279 }
8280 }
8281 // EPOCH(x) expression -> target-specific epoch conversion
8282 Expression::Epoch(_) if !matches!(target, DialectType::DuckDB) => {
8283 Action::EpochConvert
8284 }
8285 // EPOCH_MS(x) expression -> target-specific epoch ms conversion
8286 Expression::EpochMs(_) if !matches!(target, DialectType::DuckDB) => {
8287 Action::EpochMsConvert
8288 }
8289 // STRING_AGG -> GROUP_CONCAT for MySQL/SQLite
8290 Expression::StringAgg(_) => {
8291 if matches!(
8292 target,
8293 DialectType::MySQL
8294 | DialectType::SingleStore
8295 | DialectType::Doris
8296 | DialectType::StarRocks
8297 | DialectType::SQLite
8298 ) {
8299 Action::StringAggConvert
8300 } else if matches!(target, DialectType::Spark | DialectType::Databricks) {
8301 Action::StringAggConvert
8302 } else {
8303 Action::None
8304 }
8305 }
8306 Expression::CombinedParameterizedAgg(_) => Action::GenericFunctionNormalize,
8307 // GROUP_CONCAT -> STRING_AGG for PostgreSQL/Presto/etc.
8308 // Also handles GROUP_CONCAT normalization for MySQL/SQLite targets
8309 Expression::GroupConcat(_) => Action::GroupConcatConvert,
8310 // CARDINALITY/ARRAY_LENGTH/ARRAY_SIZE -> target-specific array length
8311 // DuckDB CARDINALITY -> keep as CARDINALITY for DuckDB target (used for maps)
8312 Expression::Cardinality(_)
8313 if matches!(source, DialectType::DuckDB)
8314 && matches!(target, DialectType::DuckDB) =>
8315 {
8316 Action::None
8317 }
8318 Expression::Cardinality(_) | Expression::ArrayLength(_) => {
8319 Action::ArrayLengthConvert
8320 }
8321 Expression::ArraySize(_) => {
8322 if matches!(target, DialectType::Drill) {
8323 Action::ArraySizeDrill
8324 } else {
8325 Action::ArrayLengthConvert
8326 }
8327 }
8328 // ARRAY_REMOVE(arr, target) -> LIST_FILTER/arrayFilter/ARRAY subquery
8329 Expression::ArrayRemove(_) => match target {
8330 DialectType::DuckDB | DialectType::ClickHouse | DialectType::BigQuery => {
8331 Action::ArrayRemoveConvert
8332 }
8333 _ => Action::None,
8334 },
8335 // ARRAY_REVERSE(x) -> arrayReverse for ClickHouse
8336 Expression::ArrayReverse(_) => match target {
8337 DialectType::ClickHouse => Action::ArrayReverseConvert,
8338 _ => Action::None,
8339 },
8340 // JSON_KEYS(x) -> JSON_OBJECT_KEYS/OBJECT_KEYS for Spark/Databricks/Snowflake
8341 Expression::JsonKeys(_) => match target {
8342 DialectType::Spark | DialectType::Databricks | DialectType::Snowflake => {
8343 Action::JsonKeysConvert
8344 }
8345 _ => Action::None,
8346 },
8347 // PARSE_JSON(x) -> strip for SQLite/Doris/MySQL/StarRocks
8348 Expression::ParseJson(_) => match target {
8349 DialectType::SQLite
8350 | DialectType::Doris
8351 | DialectType::MySQL
8352 | DialectType::StarRocks => Action::ParseJsonStrip,
8353 _ => Action::None,
8354 },
8355 // WeekOfYear -> WEEKISO for Snowflake (cross-dialect only)
8356 Expression::WeekOfYear(_)
8357 if matches!(target, DialectType::Snowflake)
8358 && !matches!(source, DialectType::Snowflake) =>
8359 {
8360 Action::WeekOfYearToWeekIso
8361 }
8362 // NVL: clear original_name so generator uses dialect-specific function names
8363 Expression::Nvl(f) if f.original_name.is_some() => Action::NvlClearOriginal,
8364 // XOR: expand for dialects that don't support the XOR keyword
8365 Expression::Xor(_) => {
8366 let target_supports_xor = matches!(
8367 target,
8368 DialectType::MySQL
8369 | DialectType::SingleStore
8370 | DialectType::Doris
8371 | DialectType::StarRocks
8372 );
8373 if !target_supports_xor {
8374 Action::XorExpand
8375 } else {
8376 Action::None
8377 }
8378 }
8379 // TSQL #table -> temp table normalization (CREATE TABLE)
8380 Expression::CreateTable(ct)
8381 if matches!(source, DialectType::TSQL | DialectType::Fabric)
8382 && !matches!(target, DialectType::TSQL | DialectType::Fabric)
8383 && ct.name.name.name.starts_with('#') =>
8384 {
8385 Action::TempTableHash
8386 }
8387 // TSQL #table -> strip # from table references in SELECT/etc.
8388 Expression::Table(tr)
8389 if matches!(source, DialectType::TSQL | DialectType::Fabric)
8390 && !matches!(target, DialectType::TSQL | DialectType::Fabric)
8391 && tr.name.name.starts_with('#') =>
8392 {
8393 Action::TempTableHash
8394 }
8395 // TSQL #table -> strip # from DROP TABLE names
8396 Expression::DropTable(ref dt)
8397 if matches!(source, DialectType::TSQL | DialectType::Fabric)
8398 && !matches!(target, DialectType::TSQL | DialectType::Fabric)
8399 && dt.names.iter().any(|n| n.name.name.starts_with('#')) =>
8400 {
8401 Action::TempTableHash
8402 }
8403 // JSON_EXTRACT -> ISNULL(JSON_QUERY, JSON_VALUE) for TSQL
8404 Expression::JsonExtract(_)
8405 if matches!(target, DialectType::TSQL | DialectType::Fabric) =>
8406 {
8407 Action::JsonExtractToTsql
8408 }
8409 // JSON_EXTRACT_SCALAR -> ISNULL(JSON_QUERY, JSON_VALUE) for TSQL
8410 Expression::JsonExtractScalar(_)
8411 if matches!(target, DialectType::TSQL | DialectType::Fabric) =>
8412 {
8413 Action::JsonExtractToTsql
8414 }
8415 // JSON_EXTRACT -> JSONExtractString for ClickHouse
8416 Expression::JsonExtract(_) if matches!(target, DialectType::ClickHouse) => {
8417 Action::JsonExtractToClickHouse
8418 }
8419 // JSON_EXTRACT_SCALAR -> JSONExtractString for ClickHouse
8420 Expression::JsonExtractScalar(_)
8421 if matches!(target, DialectType::ClickHouse) =>
8422 {
8423 Action::JsonExtractToClickHouse
8424 }
8425 // JSON_EXTRACT -> arrow syntax for SQLite/DuckDB
8426 Expression::JsonExtract(ref f)
8427 if !f.arrow_syntax
8428 && matches!(target, DialectType::SQLite | DialectType::DuckDB) =>
8429 {
8430 Action::JsonExtractToArrow
8431 }
8432 // JSON_EXTRACT with JSONPath -> JSON_EXTRACT_PATH for PostgreSQL (non-PG sources only)
8433 Expression::JsonExtract(ref f)
8434 if matches!(target, DialectType::PostgreSQL | DialectType::Redshift)
8435 && !matches!(
8436 source,
8437 DialectType::PostgreSQL
8438 | DialectType::Redshift
8439 | DialectType::Materialize
8440 )
8441 && matches!(&f.path, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(s) if s.starts_with('$'))) =>
8442 {
8443 Action::JsonExtractToGetJsonObject
8444 }
8445 // JSON_EXTRACT -> GET_JSON_OBJECT for Hive/Spark
8446 Expression::JsonExtract(_)
8447 if matches!(
8448 target,
8449 DialectType::Hive | DialectType::Spark | DialectType::Databricks
8450 ) =>
8451 {
8452 Action::JsonExtractToGetJsonObject
8453 }
8454 // JSON_EXTRACT_SCALAR -> target-specific for PostgreSQL, Snowflake, SQLite
8455 // Skip if already in arrow/hash_arrow syntax (same-dialect identity case)
8456 Expression::JsonExtractScalar(ref f)
8457 if !f.arrow_syntax
8458 && !f.hash_arrow_syntax
8459 && matches!(
8460 target,
8461 DialectType::PostgreSQL
8462 | DialectType::Redshift
8463 | DialectType::Snowflake
8464 | DialectType::SQLite
8465 | DialectType::DuckDB
8466 ) =>
8467 {
8468 Action::JsonExtractScalarConvert
8469 }
8470 // JSON_EXTRACT_SCALAR -> GET_JSON_OBJECT for Hive/Spark
8471 Expression::JsonExtractScalar(_)
8472 if matches!(
8473 target,
8474 DialectType::Hive | DialectType::Spark | DialectType::Databricks
8475 ) =>
8476 {
8477 Action::JsonExtractScalarToGetJsonObject
8478 }
8479 // JSON_EXTRACT path normalization for BigQuery, MySQL (bracket/wildcard handling)
8480 Expression::JsonExtract(ref f)
8481 if !f.arrow_syntax
8482 && matches!(target, DialectType::BigQuery | DialectType::MySQL) =>
8483 {
8484 Action::JsonPathNormalize
8485 }
8486 // JsonQuery (parsed JSON_QUERY) -> target-specific
8487 Expression::JsonQuery(_) => Action::JsonQueryValueConvert,
8488 // JsonValue (parsed JSON_VALUE) -> target-specific
8489 Expression::JsonValue(_) => Action::JsonQueryValueConvert,
8490 // AT TIME ZONE -> AT_TIMEZONE for Presto, FROM_UTC_TIMESTAMP for Spark,
8491 // TIMESTAMP(DATETIME(...)) for BigQuery, CONVERT_TIMEZONE for Snowflake
8492 Expression::AtTimeZone(_)
8493 if matches!(
8494 target,
8495 DialectType::Presto
8496 | DialectType::Trino
8497 | DialectType::Athena
8498 | DialectType::Spark
8499 | DialectType::Databricks
8500 | DialectType::BigQuery
8501 | DialectType::Snowflake
8502 ) =>
8503 {
8504 Action::AtTimeZoneConvert
8505 }
8506 // DAY_OF_WEEK -> dialect-specific
8507 Expression::DayOfWeek(_)
8508 if matches!(
8509 target,
8510 DialectType::DuckDB | DialectType::Spark | DialectType::Databricks
8511 ) =>
8512 {
8513 Action::DayOfWeekConvert
8514 }
8515 // CURRENT_USER -> CURRENT_USER() for Snowflake
8516 Expression::CurrentUser(_) if matches!(target, DialectType::Snowflake) => {
8517 Action::CurrentUserParens
8518 }
8519 // ELEMENT_AT(arr, idx) -> arr[idx] for PostgreSQL, arr[SAFE_ORDINAL(idx)] for BigQuery
8520 Expression::ElementAt(_)
8521 if matches!(target, DialectType::PostgreSQL | DialectType::BigQuery) =>
8522 {
8523 Action::ElementAtConvert
8524 }
8525 // ARRAY[...] (ArrayFunc bracket_notation=false) -> convert for target dialect
8526 Expression::ArrayFunc(ref arr)
8527 if !arr.bracket_notation
8528 && matches!(
8529 target,
8530 DialectType::Spark
8531 | DialectType::Databricks
8532 | DialectType::Hive
8533 | DialectType::BigQuery
8534 | DialectType::DuckDB
8535 | DialectType::Snowflake
8536 | DialectType::Presto
8537 | DialectType::Trino
8538 | DialectType::Athena
8539 | DialectType::ClickHouse
8540 | DialectType::StarRocks
8541 ) =>
8542 {
8543 Action::ArraySyntaxConvert
8544 }
8545 // VARIANCE expression -> varSamp for ClickHouse
8546 Expression::Variance(_) if matches!(target, DialectType::ClickHouse) => {
8547 Action::VarianceToClickHouse
8548 }
8549 // STDDEV expression -> stddevSamp for ClickHouse
8550 Expression::Stddev(_) if matches!(target, DialectType::ClickHouse) => {
8551 Action::StddevToClickHouse
8552 }
8553 // ApproxQuantile -> APPROX_PERCENTILE for Snowflake
8554 Expression::ApproxQuantile(_) if matches!(target, DialectType::Snowflake) => {
8555 Action::ApproxQuantileConvert
8556 }
8557 // MonthsBetween -> target-specific
8558 Expression::MonthsBetween(_)
8559 if !matches!(
8560 target,
8561 DialectType::Spark | DialectType::Databricks | DialectType::Hive
8562 ) =>
8563 {
8564 Action::MonthsBetweenConvert
8565 }
8566 // AddMonths -> target-specific DATEADD/DATE_ADD
8567 Expression::AddMonths(_) => Action::AddMonthsConvert,
8568 // MapFromArrays -> target-specific (MAP, OBJECT_CONSTRUCT, MAP_FROM_ARRAYS)
8569 Expression::MapFromArrays(_)
8570 if !matches!(target, DialectType::Spark | DialectType::Databricks) =>
8571 {
8572 Action::MapFromArraysConvert
8573 }
8574 // CURRENT_USER -> CURRENT_USER() for Spark
8575 Expression::CurrentUser(_)
8576 if matches!(target, DialectType::Spark | DialectType::Databricks) =>
8577 {
8578 Action::CurrentUserSparkParens
8579 }
8580 // MONTH/YEAR/DAY('string') from Spark -> cast string to DATE for DuckDB/Presto
8581 Expression::Month(ref f) | Expression::Year(ref f) | Expression::Day(ref f)
8582 if matches!(
8583 source,
8584 DialectType::Spark | DialectType::Databricks | DialectType::Hive
8585 ) && matches!(&f.this, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)))
8586 && matches!(
8587 target,
8588 DialectType::DuckDB
8589 | DialectType::Presto
8590 | DialectType::Trino
8591 | DialectType::Athena
8592 | DialectType::PostgreSQL
8593 | DialectType::Redshift
8594 ) =>
8595 {
8596 Action::SparkDateFuncCast
8597 }
8598 // $parameter -> @parameter for BigQuery
8599 Expression::Parameter(ref p)
8600 if matches!(target, DialectType::BigQuery)
8601 && matches!(source, DialectType::DuckDB)
8602 && (p.style == crate::expressions::ParameterStyle::Dollar
8603 || p.style == crate::expressions::ParameterStyle::DoubleDollar) =>
8604 {
8605 Action::DollarParamConvert
8606 }
8607 // EscapeString literal: normalize literal newlines to \n
8608 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::EscapeString(ref s) if s.contains('\n') || s.contains('\r') || s.contains('\t'))
8609 =>
8610 {
8611 Action::EscapeStringNormalize
8612 }
8613 // straight_join: keep lowercase for DuckDB, quote for MySQL
8614 Expression::Column(ref col)
8615 if col.name.name == "STRAIGHT_JOIN"
8616 && col.table.is_none()
8617 && matches!(source, DialectType::DuckDB)
8618 && matches!(target, DialectType::DuckDB | DialectType::MySQL) =>
8619 {
8620 Action::StraightJoinCase
8621 }
8622 // DATE and TIMESTAMP literal type conversions are now handled in the generator directly
8623 // Snowflake INTERVAL format: INTERVAL '2' HOUR -> INTERVAL '2 HOUR'
8624 Expression::Interval(ref iv)
8625 if matches!(
8626 target,
8627 DialectType::Snowflake
8628 | DialectType::PostgreSQL
8629 | DialectType::Redshift
8630 ) && iv.unit.is_some()
8631 && iv.this.as_ref().map_or(false, |t| matches!(t, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)))) =>
8632 {
8633 Action::SnowflakeIntervalFormat
8634 }
8635 // TABLESAMPLE -> TABLESAMPLE RESERVOIR for DuckDB target
8636 Expression::TableSample(ref ts) if matches!(target, DialectType::DuckDB) => {
8637 if let Some(ref sample) = ts.sample {
8638 if !sample.explicit_method {
8639 Action::TablesampleReservoir
8640 } else {
8641 Action::None
8642 }
8643 } else {
8644 Action::None
8645 }
8646 }
8647 // TABLESAMPLE from non-Snowflake source to Snowflake: strip method and PERCENT
8648 // Handles both Expression::TableSample wrapper and Expression::Table with table_sample
8649 Expression::TableSample(ref ts)
8650 if matches!(target, DialectType::Snowflake)
8651 && !matches!(source, DialectType::Snowflake)
8652 && ts.sample.is_some() =>
8653 {
8654 if let Some(ref sample) = ts.sample {
8655 if !sample.explicit_method {
8656 Action::TablesampleSnowflakeStrip
8657 } else {
8658 Action::None
8659 }
8660 } else {
8661 Action::None
8662 }
8663 }
8664 Expression::Table(ref t)
8665 if matches!(target, DialectType::Snowflake)
8666 && !matches!(source, DialectType::Snowflake)
8667 && t.table_sample.is_some() =>
8668 {
8669 if let Some(ref sample) = t.table_sample {
8670 if !sample.explicit_method {
8671 Action::TablesampleSnowflakeStrip
8672 } else {
8673 Action::None
8674 }
8675 } else {
8676 Action::None
8677 }
8678 }
8679 // ALTER TABLE RENAME -> EXEC sp_rename for TSQL
8680 Expression::AlterTable(ref at)
8681 if matches!(target, DialectType::TSQL | DialectType::Fabric)
8682 && !at.actions.is_empty()
8683 && matches!(
8684 at.actions.first(),
8685 Some(crate::expressions::AlterTableAction::RenameTable(_))
8686 ) =>
8687 {
8688 Action::AlterTableToSpRename
8689 }
8690 // Subscript index: 1-based to 0-based for BigQuery/Hive/Spark
8691 Expression::Subscript(ref sub)
8692 if matches!(
8693 target,
8694 DialectType::BigQuery
8695 | DialectType::Hive
8696 | DialectType::Spark
8697 | DialectType::Databricks
8698 ) && matches!(
8699 source,
8700 DialectType::DuckDB
8701 | DialectType::PostgreSQL
8702 | DialectType::Presto
8703 | DialectType::Trino
8704 | DialectType::Redshift
8705 | DialectType::ClickHouse
8706 ) && matches!(&sub.index, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(ref n) if n.parse::<i64>().unwrap_or(0) > 0)) =>
8707 {
8708 Action::ArrayIndexConvert
8709 }
8710 // ANY_VALUE IGNORE NULLS detection moved to the AnyValue arm above
8711 // MysqlNullsOrdering for Ordered is now handled in the Ordered arm above
8712 // RESPECT NULLS handling for SQLite (strip it, add NULLS LAST to ORDER BY)
8713 // and for MySQL (rewrite ORDER BY with CASE WHEN for null ordering)
8714 Expression::WindowFunction(ref wf) => {
8715 // BigQuery doesn't support NULLS FIRST/LAST in window function ORDER BY
8716 // EXCEPT for ROW_NUMBER which keeps NULLS LAST
8717 let is_row_number = matches!(wf.this, Expression::RowNumber(_));
8718 if matches!(target, DialectType::BigQuery)
8719 && !is_row_number
8720 && !wf.over.order_by.is_empty()
8721 && wf.over.order_by.iter().any(|o| o.nulls_first.is_some())
8722 {
8723 Action::BigQueryNullsOrdering
8724 // DuckDB -> MySQL: Add CASE WHEN for NULLS LAST simulation in window ORDER BY
8725 // But NOT when frame is RANGE/GROUPS, since adding CASE WHEN would break value-based frames
8726 } else {
8727 let source_nulls_last = matches!(source, DialectType::DuckDB);
8728 let has_range_frame = wf.over.frame.as_ref().map_or(false, |f| {
8729 matches!(
8730 f.kind,
8731 crate::expressions::WindowFrameKind::Range
8732 | crate::expressions::WindowFrameKind::Groups
8733 )
8734 });
8735 if source_nulls_last
8736 && matches!(target, DialectType::MySQL)
8737 && !wf.over.order_by.is_empty()
8738 && wf.over.order_by.iter().any(|o| !o.desc)
8739 && !has_range_frame
8740 {
8741 Action::MysqlNullsLastRewrite
8742 } else {
8743 // Check for Snowflake window frame handling for FIRST_VALUE/LAST_VALUE/NTH_VALUE
8744 let is_ranking_window_func = matches!(
8745 &wf.this,
8746 Expression::FirstValue(_)
8747 | Expression::LastValue(_)
8748 | Expression::NthValue(_)
8749 );
8750 let has_full_unbounded_frame = wf.over.frame.as_ref().map_or(false, |f| {
8751 matches!(f.kind, crate::expressions::WindowFrameKind::Rows)
8752 && matches!(f.start, crate::expressions::WindowFrameBound::UnboundedPreceding)
8753 && matches!(f.end, Some(crate::expressions::WindowFrameBound::UnboundedFollowing))
8754 && f.exclude.is_none()
8755 });
8756 if is_ranking_window_func && matches!(source, DialectType::Snowflake) {
8757 if has_full_unbounded_frame && matches!(target, DialectType::Snowflake) {
8758 // Strip the default frame for Snowflake target
8759 Action::SnowflakeWindowFrameStrip
8760 } else if !has_full_unbounded_frame && wf.over.frame.is_none() && !matches!(target, DialectType::Snowflake) {
8761 // Add default frame for non-Snowflake target
8762 Action::SnowflakeWindowFrameAdd
8763 } else {
8764 match &wf.this {
8765 Expression::FirstValue(ref vf)
8766 | Expression::LastValue(ref vf)
8767 if vf.ignore_nulls == Some(false) =>
8768 {
8769 match target {
8770 DialectType::SQLite => Action::RespectNullsConvert,
8771 _ => Action::None,
8772 }
8773 }
8774 _ => Action::None,
8775 }
8776 }
8777 } else {
8778 match &wf.this {
8779 Expression::FirstValue(ref vf)
8780 | Expression::LastValue(ref vf)
8781 if vf.ignore_nulls == Some(false) =>
8782 {
8783 // RESPECT NULLS
8784 match target {
8785 DialectType::SQLite | DialectType::PostgreSQL => {
8786 Action::RespectNullsConvert
8787 }
8788 _ => Action::None,
8789 }
8790 }
8791 _ => Action::None,
8792 }
8793 }
8794 }
8795 }
8796 }
8797 // CREATE TABLE a LIKE b -> dialect-specific transformations
8798 Expression::CreateTable(ref ct)
8799 if ct.columns.is_empty()
8800 && ct.constraints.iter().any(|c| {
8801 matches!(c, crate::expressions::TableConstraint::Like { .. })
8802 })
8803 && matches!(
8804 target,
8805 DialectType::DuckDB | DialectType::SQLite | DialectType::Drill
8806 ) =>
8807 {
8808 Action::CreateTableLikeToCtas
8809 }
8810 Expression::CreateTable(ref ct)
8811 if ct.columns.is_empty()
8812 && ct.constraints.iter().any(|c| {
8813 matches!(c, crate::expressions::TableConstraint::Like { .. })
8814 })
8815 && matches!(target, DialectType::TSQL | DialectType::Fabric) =>
8816 {
8817 Action::CreateTableLikeToSelectInto
8818 }
8819 Expression::CreateTable(ref ct)
8820 if ct.columns.is_empty()
8821 && ct.constraints.iter().any(|c| {
8822 matches!(c, crate::expressions::TableConstraint::Like { .. })
8823 })
8824 && matches!(target, DialectType::ClickHouse) =>
8825 {
8826 Action::CreateTableLikeToAs
8827 }
8828 // CREATE TABLE: strip COMMENT column constraint, USING, PARTITIONED BY for DuckDB
8829 Expression::CreateTable(ref ct)
8830 if matches!(target, DialectType::DuckDB)
8831 && matches!(
8832 source,
8833 DialectType::DuckDB
8834 | DialectType::Spark
8835 | DialectType::Databricks
8836 | DialectType::Hive
8837 ) =>
8838 {
8839 let has_comment = ct.columns.iter().any(|c| {
8840 c.comment.is_some()
8841 || c.constraints.iter().any(|con| {
8842 matches!(con, crate::expressions::ColumnConstraint::Comment(_))
8843 })
8844 });
8845 let has_props = !ct.properties.is_empty();
8846 if has_comment || has_props {
8847 Action::CreateTableStripComment
8848 } else {
8849 Action::None
8850 }
8851 }
8852 // Array conversion: Expression::Array -> Expression::ArrayFunc for PostgreSQL
8853 Expression::Array(_)
8854 if matches!(target, DialectType::PostgreSQL | DialectType::Redshift) =>
8855 {
8856 Action::ArrayConcatBracketConvert
8857 }
8858 // ArrayFunc (bracket notation) -> Function("ARRAY") for Redshift (from BigQuery source)
8859 Expression::ArrayFunc(ref arr)
8860 if arr.bracket_notation
8861 && matches!(source, DialectType::BigQuery)
8862 && matches!(target, DialectType::Redshift) =>
8863 {
8864 Action::ArrayConcatBracketConvert
8865 }
8866 // BIT_OR/BIT_AND/BIT_XOR: float/decimal arg cast for DuckDB, or rename for Snowflake
8867 Expression::BitwiseOrAgg(ref f)
8868 | Expression::BitwiseAndAgg(ref f)
8869 | Expression::BitwiseXorAgg(ref f) => {
8870 if matches!(target, DialectType::DuckDB) {
8871 // Check if the arg is CAST(val AS FLOAT/DOUBLE/DECIMAL/REAL)
8872 if let Expression::Cast(ref c) = f.this {
8873 match &c.to {
8874 DataType::Float { .. }
8875 | DataType::Double { .. }
8876 | DataType::Decimal { .. } => Action::BitAggFloatCast,
8877 DataType::Custom { ref name }
8878 if name.eq_ignore_ascii_case("REAL") =>
8879 {
8880 Action::BitAggFloatCast
8881 }
8882 _ => Action::None,
8883 }
8884 } else {
8885 Action::None
8886 }
8887 } else if matches!(target, DialectType::Snowflake) {
8888 Action::BitAggSnowflakeRename
8889 } else {
8890 Action::None
8891 }
8892 }
8893 // FILTER -> IFF for Snowflake (aggregate functions with FILTER clause)
8894 Expression::Filter(ref _f) if matches!(target, DialectType::Snowflake) => {
8895 Action::FilterToIff
8896 }
8897 // AggFunc.filter -> IFF wrapping for Snowflake (e.g., AVG(x) FILTER(WHERE cond))
8898 Expression::Avg(ref f)
8899 | Expression::Sum(ref f)
8900 | Expression::Min(ref f)
8901 | Expression::Max(ref f)
8902 | Expression::CountIf(ref f)
8903 | Expression::Stddev(ref f)
8904 | Expression::StddevPop(ref f)
8905 | Expression::StddevSamp(ref f)
8906 | Expression::Variance(ref f)
8907 | Expression::VarPop(ref f)
8908 | Expression::VarSamp(ref f)
8909 | Expression::Median(ref f)
8910 | Expression::Mode(ref f)
8911 | Expression::First(ref f)
8912 | Expression::Last(ref f)
8913 | Expression::ApproxDistinct(ref f)
8914 if f.filter.is_some() && matches!(target, DialectType::Snowflake) =>
8915 {
8916 Action::AggFilterToIff
8917 }
8918 Expression::Count(ref c)
8919 if c.filter.is_some() && matches!(target, DialectType::Snowflake) =>
8920 {
8921 Action::AggFilterToIff
8922 }
8923 // COUNT(DISTINCT a, b) -> COUNT(DISTINCT CASE WHEN ... END) for dialects that don't support multi-arg DISTINCT
8924 Expression::Count(ref c)
8925 if c.distinct
8926 && matches!(&c.this, Some(Expression::Tuple(_)))
8927 && matches!(
8928 target,
8929 DialectType::Presto
8930 | DialectType::Trino
8931 | DialectType::DuckDB
8932 | DialectType::PostgreSQL
8933 ) =>
8934 {
8935 Action::CountDistinctMultiArg
8936 }
8937 // JSON arrow -> GET_PATH/PARSE_JSON for Snowflake
8938 Expression::JsonExtract(_) if matches!(target, DialectType::Snowflake) => {
8939 Action::JsonToGetPath
8940 }
8941 // DuckDB struct/dict -> BigQuery STRUCT / Presto ROW
8942 Expression::Struct(_)
8943 if matches!(
8944 target,
8945 DialectType::BigQuery | DialectType::Presto | DialectType::Trino
8946 ) && matches!(source, DialectType::DuckDB) =>
8947 {
8948 Action::StructToRow
8949 }
8950 // DuckDB curly-brace dict {'key': value} -> BigQuery STRUCT / Presto ROW
8951 Expression::MapFunc(ref m)
8952 if m.curly_brace_syntax
8953 && matches!(
8954 target,
8955 DialectType::BigQuery | DialectType::Presto | DialectType::Trino
8956 )
8957 && matches!(source, DialectType::DuckDB) =>
8958 {
8959 Action::StructToRow
8960 }
8961 // APPROX_COUNT_DISTINCT -> APPROX_DISTINCT for Presto/Trino
8962 Expression::ApproxCountDistinct(_)
8963 if matches!(
8964 target,
8965 DialectType::Presto | DialectType::Trino | DialectType::Athena
8966 ) =>
8967 {
8968 Action::ApproxCountDistinctToApproxDistinct
8969 }
8970 // ARRAY_CONTAINS(arr, val) -> CONTAINS(arr, val) for Presto, ARRAY_CONTAINS(CAST(val AS VARIANT), arr) for Snowflake
8971 Expression::ArrayContains(_)
8972 if matches!(
8973 target,
8974 DialectType::Presto | DialectType::Trino | DialectType::Snowflake
8975 ) && !(matches!(source, DialectType::Snowflake) && matches!(target, DialectType::Snowflake)) =>
8976 {
8977 Action::ArrayContainsConvert
8978 }
8979 // ARRAY_CONTAINS -> DuckDB NULL-aware CASE (from Snowflake source with check_null semantics)
8980 Expression::ArrayContains(_)
8981 if matches!(target, DialectType::DuckDB)
8982 && matches!(source, DialectType::Snowflake) =>
8983 {
8984 Action::ArrayContainsDuckDBConvert
8985 }
8986 // ARRAY_EXCEPT -> target-specific conversion
8987 Expression::ArrayExcept(_)
8988 if matches!(
8989 target,
8990 DialectType::DuckDB | DialectType::Snowflake | DialectType::Presto | DialectType::Trino | DialectType::Athena
8991 ) =>
8992 {
8993 Action::ArrayExceptConvert
8994 }
8995 // ARRAY_POSITION -> swap args for Snowflake target (only when source is not Snowflake)
8996 Expression::ArrayPosition(_)
8997 if matches!(target, DialectType::Snowflake)
8998 && !matches!(source, DialectType::Snowflake) =>
8999 {
9000 Action::ArrayPositionSnowflakeSwap
9001 }
9002 // ARRAY_POSITION(val, arr) -> ARRAY_POSITION(arr, val) - 1 for DuckDB from Snowflake source
9003 Expression::ArrayPosition(_)
9004 if matches!(target, DialectType::DuckDB)
9005 && matches!(source, DialectType::Snowflake) =>
9006 {
9007 Action::SnowflakeArrayPositionToDuckDB
9008 }
9009 // ARRAY_DISTINCT -> arrayDistinct for ClickHouse
9010 Expression::ArrayDistinct(_)
9011 if matches!(target, DialectType::ClickHouse) =>
9012 {
9013 Action::ArrayDistinctClickHouse
9014 }
9015 // ARRAY_DISTINCT -> DuckDB LIST_DISTINCT with NULL-aware CASE
9016 Expression::ArrayDistinct(_)
9017 if matches!(target, DialectType::DuckDB)
9018 && matches!(source, DialectType::Snowflake) =>
9019 {
9020 Action::ArrayDistinctConvert
9021 }
9022 // StrPosition with position -> complex expansion for Presto/DuckDB
9023 // STRPOS doesn't support a position arg in these dialects
9024 Expression::StrPosition(ref sp)
9025 if sp.position.is_some()
9026 && matches!(
9027 target,
9028 DialectType::Presto
9029 | DialectType::Trino
9030 | DialectType::Athena
9031 | DialectType::DuckDB
9032 ) =>
9033 {
9034 Action::StrPositionExpand
9035 }
9036 // FIRST(col) IGNORE NULLS -> ANY_VALUE(col) for DuckDB
9037 Expression::First(ref f)
9038 if f.ignore_nulls == Some(true)
9039 && matches!(target, DialectType::DuckDB) =>
9040 {
9041 Action::FirstToAnyValue
9042 }
9043 // BEGIN -> START TRANSACTION for Presto/Trino
9044 Expression::Command(ref cmd)
9045 if cmd.this.eq_ignore_ascii_case("BEGIN")
9046 && matches!(
9047 target,
9048 DialectType::Presto | DialectType::Trino | DialectType::Athena
9049 ) =>
9050 {
9051 // Handled inline below
9052 Action::None // We'll handle it directly
9053 }
9054 // Note: PostgreSQL ^ is now parsed as Power directly (not BitwiseXor).
9055 // PostgreSQL # is parsed as BitwiseXor (which is correct).
9056 // a || b (Concat operator) -> CONCAT function for Presto/Trino
9057 Expression::Concat(ref _op)
9058 if matches!(source, DialectType::PostgreSQL | DialectType::Redshift)
9059 && matches!(target, DialectType::Presto | DialectType::Trino) =>
9060 {
9061 Action::PipeConcatToConcat
9062 }
9063 _ => Action::None,
9064 }
9065 };
9066
9067 match action {
9068 Action::None => {
9069 // Handle inline transforms that don't need a dedicated action
9070
9071 // BETWEEN SYMMETRIC/ASYMMETRIC expansion for non-PostgreSQL/Dremio targets
9072 if let Expression::Between(ref b) = e {
9073 if let Some(sym) = b.symmetric {
9074 let keeps_symmetric =
9075 matches!(target, DialectType::PostgreSQL | DialectType::Dremio);
9076 if !keeps_symmetric {
9077 if sym {
9078 // SYMMETRIC: expand to (x BETWEEN a AND b OR x BETWEEN b AND a)
9079 let b = if let Expression::Between(b) = e {
9080 *b
9081 } else {
9082 unreachable!()
9083 };
9084 let between1 = Expression::Between(Box::new(
9085 crate::expressions::Between {
9086 this: b.this.clone(),
9087 low: b.low.clone(),
9088 high: b.high.clone(),
9089 not: b.not,
9090 symmetric: None,
9091 },
9092 ));
9093 let between2 = Expression::Between(Box::new(
9094 crate::expressions::Between {
9095 this: b.this,
9096 low: b.high,
9097 high: b.low,
9098 not: b.not,
9099 symmetric: None,
9100 },
9101 ));
9102 return Ok(Expression::Paren(Box::new(
9103 crate::expressions::Paren {
9104 this: Expression::Or(Box::new(
9105 crate::expressions::BinaryOp::new(
9106 between1, between2,
9107 ),
9108 )),
9109 trailing_comments: vec![],
9110 },
9111 )));
9112 } else {
9113 // ASYMMETRIC: strip qualifier, keep as regular BETWEEN
9114 let b = if let Expression::Between(b) = e {
9115 *b
9116 } else {
9117 unreachable!()
9118 };
9119 return Ok(Expression::Between(Box::new(
9120 crate::expressions::Between {
9121 this: b.this,
9122 low: b.low,
9123 high: b.high,
9124 not: b.not,
9125 symmetric: None,
9126 },
9127 )));
9128 }
9129 }
9130 }
9131 }
9132
9133 // ILIKE -> LOWER(x) LIKE LOWER(y) for StarRocks/Doris
9134 if let Expression::ILike(ref _like) = e {
9135 if matches!(target, DialectType::StarRocks | DialectType::Doris) {
9136 let like = if let Expression::ILike(l) = e {
9137 *l
9138 } else {
9139 unreachable!()
9140 };
9141 let lower_left = Expression::Function(Box::new(Function::new(
9142 "LOWER".to_string(),
9143 vec![like.left],
9144 )));
9145 let lower_right = Expression::Function(Box::new(Function::new(
9146 "LOWER".to_string(),
9147 vec![like.right],
9148 )));
9149 return Ok(Expression::Like(Box::new(crate::expressions::LikeOp {
9150 left: lower_left,
9151 right: lower_right,
9152 escape: like.escape,
9153 quantifier: like.quantifier,
9154 inferred_type: None,
9155 })));
9156 }
9157 }
9158
9159 // Oracle DBMS_RANDOM.VALUE() -> RANDOM() for PostgreSQL, RAND() for others
9160 if let Expression::MethodCall(ref mc) = e {
9161 if matches!(source, DialectType::Oracle)
9162 && mc.method.name.eq_ignore_ascii_case("VALUE")
9163 && mc.args.is_empty()
9164 {
9165 let is_dbms_random = match &mc.this {
9166 Expression::Identifier(id) => {
9167 id.name.eq_ignore_ascii_case("DBMS_RANDOM")
9168 }
9169 Expression::Column(col) => {
9170 col.table.is_none()
9171 && col.name.name.eq_ignore_ascii_case("DBMS_RANDOM")
9172 }
9173 _ => false,
9174 };
9175 if is_dbms_random {
9176 let func_name = match target {
9177 DialectType::PostgreSQL
9178 | DialectType::Redshift
9179 | DialectType::DuckDB
9180 | DialectType::SQLite => "RANDOM",
9181 DialectType::Oracle => "DBMS_RANDOM.VALUE",
9182 _ => "RAND",
9183 };
9184 return Ok(Expression::Function(Box::new(Function::new(
9185 func_name.to_string(),
9186 vec![],
9187 ))));
9188 }
9189 }
9190 }
9191 // TRIM without explicit position -> add BOTH for ClickHouse
9192 if let Expression::Trim(ref trim) = e {
9193 if matches!(target, DialectType::ClickHouse)
9194 && trim.sql_standard_syntax
9195 && trim.characters.is_some()
9196 && !trim.position_explicit
9197 {
9198 let mut new_trim = (**trim).clone();
9199 new_trim.position_explicit = true;
9200 return Ok(Expression::Trim(Box::new(new_trim)));
9201 }
9202 }
9203 // BEGIN -> START TRANSACTION for Presto/Trino
9204 if let Expression::Transaction(ref txn) = e {
9205 if matches!(
9206 target,
9207 DialectType::Presto | DialectType::Trino | DialectType::Athena
9208 ) {
9209 // Convert BEGIN to START TRANSACTION by setting mark to "START"
9210 let mut txn = txn.clone();
9211 txn.mark = Some(Box::new(Expression::Identifier(Identifier::new(
9212 "START".to_string(),
9213 ))));
9214 return Ok(Expression::Transaction(Box::new(*txn)));
9215 }
9216 }
9217 // IS TRUE/FALSE -> simplified forms for Presto/Trino
9218 if matches!(
9219 target,
9220 DialectType::Presto | DialectType::Trino | DialectType::Athena
9221 ) {
9222 match &e {
9223 Expression::IsTrue(itf) if !itf.not => {
9224 // x IS TRUE -> x
9225 return Ok(itf.this.clone());
9226 }
9227 Expression::IsTrue(itf) if itf.not => {
9228 // x IS NOT TRUE -> NOT x
9229 return Ok(Expression::Not(Box::new(
9230 crate::expressions::UnaryOp {
9231 this: itf.this.clone(),
9232 inferred_type: None,
9233 },
9234 )));
9235 }
9236 Expression::IsFalse(itf) if !itf.not => {
9237 // x IS FALSE -> NOT x
9238 return Ok(Expression::Not(Box::new(
9239 crate::expressions::UnaryOp {
9240 this: itf.this.clone(),
9241 inferred_type: None,
9242 },
9243 )));
9244 }
9245 Expression::IsFalse(itf) if itf.not => {
9246 // x IS NOT FALSE -> NOT NOT x
9247 let not_x =
9248 Expression::Not(Box::new(crate::expressions::UnaryOp {
9249 this: itf.this.clone(),
9250 inferred_type: None,
9251 }));
9252 return Ok(Expression::Not(Box::new(
9253 crate::expressions::UnaryOp {
9254 this: not_x,
9255 inferred_type: None,
9256 },
9257 )));
9258 }
9259 _ => {}
9260 }
9261 }
9262 // x IS NOT FALSE -> NOT x IS FALSE for Redshift
9263 if matches!(target, DialectType::Redshift) {
9264 if let Expression::IsFalse(ref itf) = e {
9265 if itf.not {
9266 return Ok(Expression::Not(Box::new(
9267 crate::expressions::UnaryOp {
9268 this: Expression::IsFalse(Box::new(
9269 crate::expressions::IsTrueFalse {
9270 this: itf.this.clone(),
9271 not: false,
9272 },
9273 )),
9274 inferred_type: None,
9275 },
9276 )));
9277 }
9278 }
9279 }
9280 // REGEXP_REPLACE: add 'g' flag when source defaults to global replacement
9281 // Snowflake default is global, PostgreSQL/DuckDB default is first-match-only
9282 if let Expression::Function(ref f) = e {
9283 if f.name.eq_ignore_ascii_case("REGEXP_REPLACE")
9284 && matches!(source, DialectType::Snowflake)
9285 && matches!(target, DialectType::PostgreSQL | DialectType::DuckDB)
9286 {
9287 if f.args.len() == 3 {
9288 let mut args = f.args.clone();
9289 args.push(Expression::string("g"));
9290 return Ok(Expression::Function(Box::new(Function::new(
9291 "REGEXP_REPLACE".to_string(),
9292 args,
9293 ))));
9294 } else if f.args.len() == 4 {
9295 // 4th arg might be position, add 'g' as 5th
9296 let mut args = f.args.clone();
9297 args.push(Expression::string("g"));
9298 return Ok(Expression::Function(Box::new(Function::new(
9299 "REGEXP_REPLACE".to_string(),
9300 args,
9301 ))));
9302 }
9303 }
9304 }
9305 Ok(e)
9306 }
9307
9308 Action::GreatestLeastNull => {
9309 let f = if let Expression::Function(f) = e {
9310 *f
9311 } else {
9312 unreachable!("action only triggered for Function expressions")
9313 };
9314 let mut null_checks: Vec<Expression> = f
9315 .args
9316 .iter()
9317 .map(|a| {
9318 Expression::IsNull(Box::new(IsNull {
9319 this: a.clone(),
9320 not: false,
9321 postfix_form: false,
9322 }))
9323 })
9324 .collect();
9325 let condition = if null_checks.len() == 1 {
9326 null_checks.remove(0)
9327 } else {
9328 let first = null_checks.remove(0);
9329 null_checks.into_iter().fold(first, |acc, check| {
9330 Expression::Or(Box::new(BinaryOp::new(acc, check)))
9331 })
9332 };
9333 Ok(Expression::Case(Box::new(Case {
9334 operand: None,
9335 whens: vec![(condition, Expression::Null(Null))],
9336 else_: Some(Expression::Function(Box::new(Function::new(
9337 f.name, f.args,
9338 )))),
9339 comments: Vec::new(),
9340 inferred_type: None,
9341 })))
9342 }
9343
9344 Action::ArrayGenerateRange => {
9345 let f = if let Expression::Function(f) = e {
9346 *f
9347 } else {
9348 unreachable!("action only triggered for Function expressions")
9349 };
9350 let start = f.args[0].clone();
9351 let end = f.args[1].clone();
9352 let step = f.args.get(2).cloned();
9353
9354 // Helper: compute end - 1 for converting exclusive→inclusive end.
9355 // When end is a literal number, simplify to a computed literal.
9356 fn exclusive_to_inclusive_end(end: &Expression) -> Expression {
9357 // Try to simplify literal numbers
9358 match end {
9359 Expression::Literal(lit)
9360 if matches!(lit.as_ref(), Literal::Number(_)) =>
9361 {
9362 let Literal::Number(n) = lit.as_ref() else {
9363 unreachable!()
9364 };
9365 if let Ok(val) = n.parse::<i64>() {
9366 return Expression::number(val - 1);
9367 }
9368 }
9369 Expression::Neg(u) => {
9370 if let Expression::Literal(lit) = &u.this {
9371 if let Literal::Number(n) = lit.as_ref() {
9372 if let Ok(val) = n.parse::<i64>() {
9373 return Expression::number(-val - 1);
9374 }
9375 }
9376 }
9377 }
9378 _ => {}
9379 }
9380 // Non-literal: produce end - 1 expression
9381 Expression::Sub(Box::new(BinaryOp::new(end.clone(), Expression::number(1))))
9382 }
9383
9384 match target {
9385 // Snowflake ARRAY_GENERATE_RANGE and DuckDB RANGE both use exclusive end,
9386 // so no adjustment needed — just rename the function.
9387 DialectType::Snowflake => {
9388 let mut args = vec![start, end];
9389 if let Some(s) = step {
9390 args.push(s);
9391 }
9392 Ok(Expression::Function(Box::new(Function::new(
9393 "ARRAY_GENERATE_RANGE".to_string(),
9394 args,
9395 ))))
9396 }
9397 DialectType::DuckDB => {
9398 let mut args = vec![start, end];
9399 if let Some(s) = step {
9400 args.push(s);
9401 }
9402 Ok(Expression::Function(Box::new(Function::new(
9403 "RANGE".to_string(),
9404 args,
9405 ))))
9406 }
9407 // These dialects use inclusive end, so convert exclusive→inclusive.
9408 // Presto/Trino: simplify literal numbers (3 → 2).
9409 DialectType::Presto | DialectType::Trino => {
9410 let end_inclusive = exclusive_to_inclusive_end(&end);
9411 let mut args = vec![start, end_inclusive];
9412 if let Some(s) = step {
9413 args.push(s);
9414 }
9415 Ok(Expression::Function(Box::new(Function::new(
9416 "SEQUENCE".to_string(),
9417 args,
9418 ))))
9419 }
9420 // PostgreSQL, Redshift, BigQuery: keep as end - 1 expression form.
9421 DialectType::PostgreSQL | DialectType::Redshift => {
9422 let end_minus_1 = Expression::Sub(Box::new(BinaryOp::new(
9423 end.clone(),
9424 Expression::number(1),
9425 )));
9426 let mut args = vec![start, end_minus_1];
9427 if let Some(s) = step {
9428 args.push(s);
9429 }
9430 Ok(Expression::Function(Box::new(Function::new(
9431 "GENERATE_SERIES".to_string(),
9432 args,
9433 ))))
9434 }
9435 DialectType::BigQuery => {
9436 let end_minus_1 = Expression::Sub(Box::new(BinaryOp::new(
9437 end.clone(),
9438 Expression::number(1),
9439 )));
9440 let mut args = vec![start, end_minus_1];
9441 if let Some(s) = step {
9442 args.push(s);
9443 }
9444 Ok(Expression::Function(Box::new(Function::new(
9445 "GENERATE_ARRAY".to_string(),
9446 args,
9447 ))))
9448 }
9449 _ => Ok(Expression::Function(Box::new(Function::new(
9450 f.name, f.args,
9451 )))),
9452 }
9453 }
9454
9455 Action::Div0TypedDivision => {
9456 let if_func = if let Expression::IfFunc(f) = e {
9457 *f
9458 } else {
9459 unreachable!("action only triggered for IfFunc expressions")
9460 };
9461 if let Some(Expression::Div(div)) = if_func.false_value {
9462 let cast_type = if matches!(target, DialectType::SQLite) {
9463 DataType::Float {
9464 precision: None,
9465 scale: None,
9466 real_spelling: true,
9467 }
9468 } else {
9469 DataType::Double {
9470 precision: None,
9471 scale: None,
9472 }
9473 };
9474 let casted_left = Expression::Cast(Box::new(Cast {
9475 this: div.left,
9476 to: cast_type,
9477 trailing_comments: vec![],
9478 double_colon_syntax: false,
9479 format: None,
9480 default: None,
9481 inferred_type: None,
9482 }));
9483 Ok(Expression::IfFunc(Box::new(crate::expressions::IfFunc {
9484 condition: if_func.condition,
9485 true_value: if_func.true_value,
9486 false_value: Some(Expression::Div(Box::new(BinaryOp::new(
9487 casted_left,
9488 div.right,
9489 )))),
9490 original_name: if_func.original_name,
9491 inferred_type: None,
9492 })))
9493 } else {
9494 // Not actually a Div, reconstruct
9495 Ok(Expression::IfFunc(Box::new(if_func)))
9496 }
9497 }
9498
9499 Action::ArrayAggCollectList => {
9500 let agg = if let Expression::ArrayAgg(a) = e {
9501 *a
9502 } else {
9503 unreachable!("action only triggered for ArrayAgg expressions")
9504 };
9505 Ok(Expression::ArrayAgg(Box::new(AggFunc {
9506 name: Some("COLLECT_LIST".to_string()),
9507 ..agg
9508 })))
9509 }
9510
9511 Action::ArrayAggToGroupConcat => {
9512 let agg = if let Expression::ArrayAgg(a) = e {
9513 *a
9514 } else {
9515 unreachable!("action only triggered for ArrayAgg expressions")
9516 };
9517 Ok(Expression::ArrayAgg(Box::new(AggFunc {
9518 name: Some("GROUP_CONCAT".to_string()),
9519 ..agg
9520 })))
9521 }
9522
9523 Action::ArrayAggWithinGroupFilter => {
9524 let wg = if let Expression::WithinGroup(w) = e {
9525 *w
9526 } else {
9527 unreachable!("action only triggered for WithinGroup expressions")
9528 };
9529 if let Expression::ArrayAgg(inner_agg) = wg.this {
9530 let col = inner_agg.this.clone();
9531 let filter = Expression::IsNull(Box::new(IsNull {
9532 this: col,
9533 not: true,
9534 postfix_form: false,
9535 }));
9536 // For DuckDB, add explicit NULLS FIRST for DESC ordering
9537 let order_by = if matches!(target, DialectType::DuckDB) {
9538 wg.order_by
9539 .into_iter()
9540 .map(|mut o| {
9541 if o.desc && o.nulls_first.is_none() {
9542 o.nulls_first = Some(true);
9543 }
9544 o
9545 })
9546 .collect()
9547 } else {
9548 wg.order_by
9549 };
9550 Ok(Expression::ArrayAgg(Box::new(AggFunc {
9551 this: inner_agg.this,
9552 distinct: inner_agg.distinct,
9553 filter: Some(filter),
9554 order_by,
9555 name: inner_agg.name,
9556 ignore_nulls: inner_agg.ignore_nulls,
9557 having_max: inner_agg.having_max,
9558 limit: inner_agg.limit,
9559 inferred_type: None,
9560 })))
9561 } else {
9562 Ok(Expression::WithinGroup(Box::new(wg)))
9563 }
9564 }
9565
9566 Action::ArrayAggFilter => {
9567 let agg = if let Expression::ArrayAgg(a) = e {
9568 *a
9569 } else {
9570 unreachable!("action only triggered for ArrayAgg expressions")
9571 };
9572 let col = agg.this.clone();
9573 let filter = Expression::IsNull(Box::new(IsNull {
9574 this: col,
9575 not: true,
9576 postfix_form: false,
9577 }));
9578 Ok(Expression::ArrayAgg(Box::new(AggFunc {
9579 filter: Some(filter),
9580 ..agg
9581 })))
9582 }
9583
9584 Action::ArrayAggNullFilter => {
9585 // ARRAY_AGG(x) FILTER(WHERE cond) -> ARRAY_AGG(x) FILTER(WHERE cond AND NOT x IS NULL)
9586 // For source dialects that exclude NULLs (Spark/Hive) targeting DuckDB which includes them
9587 let agg = if let Expression::ArrayAgg(a) = e {
9588 *a
9589 } else {
9590 unreachable!("action only triggered for ArrayAgg expressions")
9591 };
9592 let col = agg.this.clone();
9593 let not_null = Expression::IsNull(Box::new(IsNull {
9594 this: col,
9595 not: true,
9596 postfix_form: true, // Use "NOT x IS NULL" form (prefix NOT)
9597 }));
9598 let new_filter = if let Some(existing_filter) = agg.filter {
9599 // AND the NOT IS NULL with existing filter
9600 Expression::And(Box::new(crate::expressions::BinaryOp::new(
9601 existing_filter,
9602 not_null,
9603 )))
9604 } else {
9605 not_null
9606 };
9607 Ok(Expression::ArrayAgg(Box::new(AggFunc {
9608 filter: Some(new_filter),
9609 ..agg
9610 })))
9611 }
9612
9613 Action::BigQueryArraySelectAsStructToSnowflake => {
9614 // ARRAY(SELECT AS STRUCT x1 AS x1, x2 AS x2 FROM t)
9615 // -> (SELECT ARRAY_AGG(OBJECT_CONSTRUCT('x1', x1, 'x2', x2)) FROM t)
9616 if let Expression::Function(mut f) = e {
9617 let is_match = f.args.len() == 1
9618 && matches!(&f.args[0], Expression::Select(s) if s.kind.as_deref() == Some("STRUCT"));
9619 if is_match {
9620 let inner_select = match f.args.remove(0) {
9621 Expression::Select(s) => *s,
9622 _ => unreachable!(
9623 "argument already verified to be a Select expression"
9624 ),
9625 };
9626 // Build OBJECT_CONSTRUCT args from SELECT expressions
9627 let mut oc_args = Vec::new();
9628 for expr in &inner_select.expressions {
9629 match expr {
9630 Expression::Alias(a) => {
9631 let key = Expression::Literal(Box::new(Literal::String(
9632 a.alias.name.clone(),
9633 )));
9634 let value = a.this.clone();
9635 oc_args.push(key);
9636 oc_args.push(value);
9637 }
9638 Expression::Column(c) => {
9639 let key = Expression::Literal(Box::new(Literal::String(
9640 c.name.name.clone(),
9641 )));
9642 oc_args.push(key);
9643 oc_args.push(expr.clone());
9644 }
9645 _ => {
9646 oc_args.push(expr.clone());
9647 }
9648 }
9649 }
9650 let object_construct = Expression::Function(Box::new(Function::new(
9651 "OBJECT_CONSTRUCT".to_string(),
9652 oc_args,
9653 )));
9654 let array_agg = Expression::Function(Box::new(Function::new(
9655 "ARRAY_AGG".to_string(),
9656 vec![object_construct],
9657 )));
9658 let mut new_select = crate::expressions::Select::new();
9659 new_select.expressions = vec![array_agg];
9660 new_select.from = inner_select.from.clone();
9661 new_select.where_clause = inner_select.where_clause.clone();
9662 new_select.group_by = inner_select.group_by.clone();
9663 new_select.having = inner_select.having.clone();
9664 new_select.joins = inner_select.joins.clone();
9665 Ok(Expression::Subquery(Box::new(
9666 crate::expressions::Subquery {
9667 this: Expression::Select(Box::new(new_select)),
9668 alias: None,
9669 column_aliases: Vec::new(),
9670 order_by: None,
9671 limit: None,
9672 offset: None,
9673 distribute_by: None,
9674 sort_by: None,
9675 cluster_by: None,
9676 lateral: false,
9677 modifiers_inside: false,
9678 trailing_comments: Vec::new(),
9679 inferred_type: None,
9680 },
9681 )))
9682 } else {
9683 Ok(Expression::Function(f))
9684 }
9685 } else {
9686 Ok(e)
9687 }
9688 }
9689
9690 Action::BigQueryPercentileContToDuckDB => {
9691 // PERCENTILE_CONT(x, frac [RESPECT NULLS]) -> QUANTILE_CONT(x, frac) for DuckDB
9692 if let Expression::AggregateFunction(mut af) = e {
9693 af.name = "QUANTILE_CONT".to_string();
9694 af.ignore_nulls = None; // Strip RESPECT/IGNORE NULLS
9695 // Keep only first 2 args
9696 if af.args.len() > 2 {
9697 af.args.truncate(2);
9698 }
9699 Ok(Expression::AggregateFunction(af))
9700 } else {
9701 Ok(e)
9702 }
9703 }
9704
9705 Action::ArrayAggIgnoreNullsDuckDB => {
9706 // ARRAY_AGG(x IGNORE NULLS ORDER BY a, b DESC) -> ARRAY_AGG(x ORDER BY a NULLS FIRST, b DESC)
9707 // Strip IGNORE NULLS, add NULLS FIRST to first ORDER BY column
9708 let mut agg = if let Expression::ArrayAgg(a) = e {
9709 *a
9710 } else {
9711 unreachable!("action only triggered for ArrayAgg expressions")
9712 };
9713 agg.ignore_nulls = None; // Strip IGNORE NULLS
9714 if !agg.order_by.is_empty() {
9715 agg.order_by[0].nulls_first = Some(true);
9716 }
9717 Ok(Expression::ArrayAgg(Box::new(agg)))
9718 }
9719
9720 Action::CountDistinctMultiArg => {
9721 // COUNT(DISTINCT a, b) -> COUNT(DISTINCT CASE WHEN a IS NULL THEN NULL WHEN b IS NULL THEN NULL ELSE (a, b) END)
9722 if let Expression::Count(c) = e {
9723 if let Some(Expression::Tuple(t)) = c.this {
9724 let args = t.expressions;
9725 // Build CASE expression:
9726 // WHEN a IS NULL THEN NULL WHEN b IS NULL THEN NULL ELSE (a, b) END
9727 let mut whens = Vec::new();
9728 for arg in &args {
9729 whens.push((
9730 Expression::IsNull(Box::new(IsNull {
9731 this: arg.clone(),
9732 not: false,
9733 postfix_form: false,
9734 })),
9735 Expression::Null(crate::expressions::Null),
9736 ));
9737 }
9738 // Build the tuple for ELSE
9739 let tuple_expr =
9740 Expression::Tuple(Box::new(crate::expressions::Tuple {
9741 expressions: args,
9742 }));
9743 let case_expr = Expression::Case(Box::new(crate::expressions::Case {
9744 operand: None,
9745 whens,
9746 else_: Some(tuple_expr),
9747 comments: Vec::new(),
9748 inferred_type: None,
9749 }));
9750 Ok(Expression::Count(Box::new(crate::expressions::CountFunc {
9751 this: Some(case_expr),
9752 star: false,
9753 distinct: true,
9754 filter: c.filter,
9755 ignore_nulls: c.ignore_nulls,
9756 original_name: c.original_name,
9757 inferred_type: None,
9758 })))
9759 } else {
9760 Ok(Expression::Count(c))
9761 }
9762 } else {
9763 Ok(e)
9764 }
9765 }
9766
9767 Action::CastTimestampToDatetime => {
9768 let c = if let Expression::Cast(c) = e {
9769 *c
9770 } else {
9771 unreachable!("action only triggered for Cast expressions")
9772 };
9773 Ok(Expression::Cast(Box::new(Cast {
9774 to: DataType::Custom {
9775 name: "DATETIME".to_string(),
9776 },
9777 ..c
9778 })))
9779 }
9780
9781 Action::CastTimestampStripTz => {
9782 // CAST(x AS TIMESTAMP(n) WITH TIME ZONE) -> CAST(x AS TIMESTAMP) for Hive/Spark/BigQuery
9783 let c = if let Expression::Cast(c) = e {
9784 *c
9785 } else {
9786 unreachable!("action only triggered for Cast expressions")
9787 };
9788 Ok(Expression::Cast(Box::new(Cast {
9789 to: DataType::Timestamp {
9790 precision: None,
9791 timezone: false,
9792 },
9793 ..c
9794 })))
9795 }
9796
9797 Action::CastTimestamptzToFunc => {
9798 // CAST(x AS TIMESTAMPTZ) -> TIMESTAMP(x) function for MySQL/StarRocks
9799 let c = if let Expression::Cast(c) = e {
9800 *c
9801 } else {
9802 unreachable!("action only triggered for Cast expressions")
9803 };
9804 Ok(Expression::Function(Box::new(Function::new(
9805 "TIMESTAMP".to_string(),
9806 vec![c.this],
9807 ))))
9808 }
9809
9810 Action::ToDateToCast => {
9811 // Convert TO_DATE(x) -> CAST(x AS DATE) for DuckDB
9812 if let Expression::Function(f) = e {
9813 let arg = f.args.into_iter().next().unwrap();
9814 Ok(Expression::Cast(Box::new(Cast {
9815 this: arg,
9816 to: DataType::Date,
9817 double_colon_syntax: false,
9818 trailing_comments: vec![],
9819 format: None,
9820 default: None,
9821 inferred_type: None,
9822 })))
9823 } else {
9824 Ok(e)
9825 }
9826 }
9827 Action::DateTruncWrapCast => {
9828 // Handle both Expression::DateTrunc/TimestampTrunc and
9829 // Expression::Function("DATE_TRUNC", [unit, expr])
9830 match e {
9831 Expression::DateTrunc(d) | Expression::TimestampTrunc(d) => {
9832 let input_type = match &d.this {
9833 Expression::Cast(c) => Some(c.to.clone()),
9834 _ => None,
9835 };
9836 if let Some(cast_type) = input_type {
9837 let is_time = matches!(cast_type, DataType::Time { .. });
9838 if is_time {
9839 let date_expr = Expression::Cast(Box::new(Cast {
9840 this: Expression::Literal(Box::new(
9841 crate::expressions::Literal::String(
9842 "1970-01-01".to_string(),
9843 ),
9844 )),
9845 to: DataType::Date,
9846 double_colon_syntax: false,
9847 trailing_comments: vec![],
9848 format: None,
9849 default: None,
9850 inferred_type: None,
9851 }));
9852 let add_expr =
9853 Expression::Add(Box::new(BinaryOp::new(date_expr, d.this)));
9854 let inner = Expression::DateTrunc(Box::new(DateTruncFunc {
9855 this: add_expr,
9856 unit: d.unit,
9857 }));
9858 Ok(Expression::Cast(Box::new(Cast {
9859 this: inner,
9860 to: cast_type,
9861 double_colon_syntax: false,
9862 trailing_comments: vec![],
9863 format: None,
9864 default: None,
9865 inferred_type: None,
9866 })))
9867 } else {
9868 let inner = Expression::DateTrunc(Box::new(*d));
9869 Ok(Expression::Cast(Box::new(Cast {
9870 this: inner,
9871 to: cast_type,
9872 double_colon_syntax: false,
9873 trailing_comments: vec![],
9874 format: None,
9875 default: None,
9876 inferred_type: None,
9877 })))
9878 }
9879 } else {
9880 Ok(Expression::DateTrunc(d))
9881 }
9882 }
9883 Expression::Function(f) if f.args.len() == 2 => {
9884 // Function-based DATE_TRUNC(unit, expr)
9885 let input_type = match &f.args[1] {
9886 Expression::Cast(c) => Some(c.to.clone()),
9887 _ => None,
9888 };
9889 if let Some(cast_type) = input_type {
9890 let is_time = matches!(cast_type, DataType::Time { .. });
9891 if is_time {
9892 let date_expr = Expression::Cast(Box::new(Cast {
9893 this: Expression::Literal(Box::new(
9894 crate::expressions::Literal::String(
9895 "1970-01-01".to_string(),
9896 ),
9897 )),
9898 to: DataType::Date,
9899 double_colon_syntax: false,
9900 trailing_comments: vec![],
9901 format: None,
9902 default: None,
9903 inferred_type: None,
9904 }));
9905 let mut args = f.args;
9906 let unit_arg = args.remove(0);
9907 let time_expr = args.remove(0);
9908 let add_expr = Expression::Add(Box::new(BinaryOp::new(
9909 date_expr, time_expr,
9910 )));
9911 let inner = Expression::Function(Box::new(Function::new(
9912 "DATE_TRUNC".to_string(),
9913 vec![unit_arg, add_expr],
9914 )));
9915 Ok(Expression::Cast(Box::new(Cast {
9916 this: inner,
9917 to: cast_type,
9918 double_colon_syntax: false,
9919 trailing_comments: vec![],
9920 format: None,
9921 default: None,
9922 inferred_type: None,
9923 })))
9924 } else {
9925 // Wrap the function in CAST
9926 Ok(Expression::Cast(Box::new(Cast {
9927 this: Expression::Function(f),
9928 to: cast_type,
9929 double_colon_syntax: false,
9930 trailing_comments: vec![],
9931 format: None,
9932 default: None,
9933 inferred_type: None,
9934 })))
9935 }
9936 } else {
9937 Ok(Expression::Function(f))
9938 }
9939 }
9940 other => Ok(other),
9941 }
9942 }
9943
9944 Action::RegexpReplaceSnowflakeToDuckDB => {
9945 // Snowflake REGEXP_REPLACE(s, p, r, position) -> REGEXP_REPLACE(s, p, r, 'g')
9946 if let Expression::Function(f) = e {
9947 let mut args = f.args;
9948 let subject = args.remove(0);
9949 let pattern = args.remove(0);
9950 let replacement = args.remove(0);
9951 Ok(Expression::Function(Box::new(Function::new(
9952 "REGEXP_REPLACE".to_string(),
9953 vec![
9954 subject,
9955 pattern,
9956 replacement,
9957 Expression::Literal(Box::new(crate::expressions::Literal::String(
9958 "g".to_string(),
9959 ))),
9960 ],
9961 ))))
9962 } else {
9963 Ok(e)
9964 }
9965 }
9966
9967 Action::RegexpReplacePositionSnowflakeToDuckDB => {
9968 // Snowflake REGEXP_REPLACE(s, p, r, pos, occ) -> DuckDB form
9969 // pos=1, occ=1 -> REGEXP_REPLACE(s, p, r) (single replace, no 'g')
9970 // pos>1, occ=0 -> SUBSTRING(s, 1, pos-1) || REGEXP_REPLACE(SUBSTRING(s, pos), p, r, 'g')
9971 // pos>1, occ=1 -> SUBSTRING(s, 1, pos-1) || REGEXP_REPLACE(SUBSTRING(s, pos), p, r)
9972 // pos=1, occ=0 -> REGEXP_REPLACE(s, p, r, 'g') (replace all)
9973 if let Expression::Function(f) = e {
9974 let mut args = f.args;
9975 let subject = args.remove(0);
9976 let pattern = args.remove(0);
9977 let replacement = args.remove(0);
9978 let position = args.remove(0);
9979 let occurrence = args.remove(0);
9980
9981 let is_pos_1 = matches!(&position, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(n) if n == "1"));
9982 let is_occ_0 = matches!(&occurrence, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(n) if n == "0"));
9983 let is_occ_1 = matches!(&occurrence, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(n) if n == "1"));
9984
9985 if is_pos_1 && is_occ_1 {
9986 // REGEXP_REPLACE(s, p, r) - single replace, no flags
9987 Ok(Expression::Function(Box::new(Function::new(
9988 "REGEXP_REPLACE".to_string(),
9989 vec![subject, pattern, replacement],
9990 ))))
9991 } else if is_pos_1 && is_occ_0 {
9992 // REGEXP_REPLACE(s, p, r, 'g') - global replace
9993 Ok(Expression::Function(Box::new(Function::new(
9994 "REGEXP_REPLACE".to_string(),
9995 vec![
9996 subject,
9997 pattern,
9998 replacement,
9999 Expression::Literal(Box::new(Literal::String("g".to_string()))),
10000 ],
10001 ))))
10002 } else {
10003 // pos>1: SUBSTRING(s, 1, pos-1) || REGEXP_REPLACE(SUBSTRING(s, pos), p, r[, 'g'])
10004 // Pre-compute pos-1 when position is a numeric literal
10005 let pos_minus_1 = if let Expression::Literal(ref lit) = position {
10006 if let Literal::Number(ref n) = lit.as_ref() {
10007 if let Ok(val) = n.parse::<i64>() {
10008 Expression::number(val - 1)
10009 } else {
10010 Expression::Sub(Box::new(BinaryOp::new(
10011 position.clone(),
10012 Expression::number(1),
10013 )))
10014 }
10015 } else {
10016 position.clone()
10017 }
10018 } else {
10019 Expression::Sub(Box::new(BinaryOp::new(
10020 position.clone(),
10021 Expression::number(1),
10022 )))
10023 };
10024 let prefix = Expression::Function(Box::new(Function::new(
10025 "SUBSTRING".to_string(),
10026 vec![subject.clone(), Expression::number(1), pos_minus_1],
10027 )));
10028 let suffix_subject = Expression::Function(Box::new(Function::new(
10029 "SUBSTRING".to_string(),
10030 vec![subject, position],
10031 )));
10032 let mut replace_args = vec![suffix_subject, pattern, replacement];
10033 if is_occ_0 {
10034 replace_args.push(Expression::Literal(Box::new(Literal::String(
10035 "g".to_string(),
10036 ))));
10037 }
10038 let replace_expr = Expression::Function(Box::new(Function::new(
10039 "REGEXP_REPLACE".to_string(),
10040 replace_args,
10041 )));
10042 Ok(Expression::DPipe(Box::new(crate::expressions::DPipe {
10043 this: Box::new(prefix),
10044 expression: Box::new(replace_expr),
10045 safe: None,
10046 })))
10047 }
10048 } else {
10049 Ok(e)
10050 }
10051 }
10052
10053 Action::RegexpSubstrSnowflakeToDuckDB => {
10054 // Snowflake REGEXP_SUBSTR -> DuckDB REGEXP_EXTRACT variants
10055 if let Expression::Function(f) = e {
10056 let mut args = f.args;
10057 let arg_count = args.len();
10058 match arg_count {
10059 // REGEXP_SUBSTR(s, p) -> REGEXP_EXTRACT(s, p)
10060 0..=2 => Ok(Expression::Function(Box::new(Function::new(
10061 "REGEXP_EXTRACT".to_string(),
10062 args,
10063 )))),
10064 // REGEXP_SUBSTR(s, p, pos) -> REGEXP_EXTRACT(NULLIF(SUBSTRING(s, pos), ''), p)
10065 3 => {
10066 let subject = args.remove(0);
10067 let pattern = args.remove(0);
10068 let position = args.remove(0);
10069 let is_pos_1 = matches!(&position, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(n) if n == "1"));
10070 if is_pos_1 {
10071 Ok(Expression::Function(Box::new(Function::new(
10072 "REGEXP_EXTRACT".to_string(),
10073 vec![subject, pattern],
10074 ))))
10075 } else {
10076 let substring_expr =
10077 Expression::Function(Box::new(Function::new(
10078 "SUBSTRING".to_string(),
10079 vec![subject, position],
10080 )));
10081 let nullif_expr =
10082 Expression::Function(Box::new(Function::new(
10083 "NULLIF".to_string(),
10084 vec![
10085 substring_expr,
10086 Expression::Literal(Box::new(Literal::String(
10087 String::new(),
10088 ))),
10089 ],
10090 )));
10091 Ok(Expression::Function(Box::new(Function::new(
10092 "REGEXP_EXTRACT".to_string(),
10093 vec![nullif_expr, pattern],
10094 ))))
10095 }
10096 }
10097 // REGEXP_SUBSTR(s, p, pos, occ) -> depends on pos and occ
10098 4 => {
10099 let subject = args.remove(0);
10100 let pattern = args.remove(0);
10101 let position = args.remove(0);
10102 let occurrence = args.remove(0);
10103 let is_pos_1 = matches!(&position, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(n) if n == "1"));
10104 let is_occ_1 = matches!(&occurrence, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(n) if n == "1"));
10105
10106 let effective_subject = if is_pos_1 {
10107 subject
10108 } else {
10109 let substring_expr =
10110 Expression::Function(Box::new(Function::new(
10111 "SUBSTRING".to_string(),
10112 vec![subject, position],
10113 )));
10114 Expression::Function(Box::new(Function::new(
10115 "NULLIF".to_string(),
10116 vec![
10117 substring_expr,
10118 Expression::Literal(Box::new(Literal::String(
10119 String::new(),
10120 ))),
10121 ],
10122 )))
10123 };
10124
10125 if is_occ_1 {
10126 Ok(Expression::Function(Box::new(Function::new(
10127 "REGEXP_EXTRACT".to_string(),
10128 vec![effective_subject, pattern],
10129 ))))
10130 } else {
10131 // ARRAY_EXTRACT(REGEXP_EXTRACT_ALL(s, p), occ)
10132 let extract_all =
10133 Expression::Function(Box::new(Function::new(
10134 "REGEXP_EXTRACT_ALL".to_string(),
10135 vec![effective_subject, pattern],
10136 )));
10137 Ok(Expression::Function(Box::new(Function::new(
10138 "ARRAY_EXTRACT".to_string(),
10139 vec![extract_all, occurrence],
10140 ))))
10141 }
10142 }
10143 // REGEXP_SUBSTR(s, p, 1, 1, 'e') -> REGEXP_EXTRACT(s, p)
10144 5 => {
10145 let subject = args.remove(0);
10146 let pattern = args.remove(0);
10147 let _position = args.remove(0);
10148 let _occurrence = args.remove(0);
10149 let _flags = args.remove(0);
10150 // Strip 'e' flag, convert to REGEXP_EXTRACT
10151 Ok(Expression::Function(Box::new(Function::new(
10152 "REGEXP_EXTRACT".to_string(),
10153 vec![subject, pattern],
10154 ))))
10155 }
10156 // REGEXP_SUBSTR(s, p, 1, 1, 'e', group) -> REGEXP_EXTRACT(s, p[, group])
10157 _ => {
10158 let subject = args.remove(0);
10159 let pattern = args.remove(0);
10160 let _position = args.remove(0);
10161 let _occurrence = args.remove(0);
10162 let _flags = args.remove(0);
10163 let group = args.remove(0);
10164 let is_group_0 = matches!(&group, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(n) if n == "0"));
10165 if is_group_0 {
10166 // Strip group=0 (default)
10167 Ok(Expression::Function(Box::new(Function::new(
10168 "REGEXP_EXTRACT".to_string(),
10169 vec![subject, pattern],
10170 ))))
10171 } else {
10172 Ok(Expression::Function(Box::new(Function::new(
10173 "REGEXP_EXTRACT".to_string(),
10174 vec![subject, pattern, group],
10175 ))))
10176 }
10177 }
10178 }
10179 } else {
10180 Ok(e)
10181 }
10182 }
10183
10184 Action::RegexpSubstrSnowflakeIdentity => {
10185 // Snowflake→Snowflake: REGEXP_SUBSTR/REGEXP_SUBSTR_ALL with 6 args
10186 // Strip trailing group=0
10187 if let Expression::Function(f) = e {
10188 let func_name = f.name.clone();
10189 let mut args = f.args;
10190 if args.len() == 6 {
10191 let is_group_0 = matches!(&args[5], Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(n) if n == "0"));
10192 if is_group_0 {
10193 args.truncate(5);
10194 }
10195 }
10196 Ok(Expression::Function(Box::new(Function::new(
10197 func_name, args,
10198 ))))
10199 } else {
10200 Ok(e)
10201 }
10202 }
10203
10204 Action::RegexpSubstrAllSnowflakeToDuckDB => {
10205 // Snowflake REGEXP_SUBSTR_ALL -> DuckDB REGEXP_EXTRACT_ALL variants
10206 if let Expression::Function(f) = e {
10207 let mut args = f.args;
10208 let arg_count = args.len();
10209 match arg_count {
10210 // REGEXP_SUBSTR_ALL(s, p) -> REGEXP_EXTRACT_ALL(s, p)
10211 0..=2 => Ok(Expression::Function(Box::new(Function::new(
10212 "REGEXP_EXTRACT_ALL".to_string(),
10213 args,
10214 )))),
10215 // REGEXP_SUBSTR_ALL(s, p, pos) -> REGEXP_EXTRACT_ALL(SUBSTRING(s, pos), p)
10216 3 => {
10217 let subject = args.remove(0);
10218 let pattern = args.remove(0);
10219 let position = args.remove(0);
10220 let is_pos_1 = matches!(&position, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(n) if n == "1"));
10221 if is_pos_1 {
10222 Ok(Expression::Function(Box::new(Function::new(
10223 "REGEXP_EXTRACT_ALL".to_string(),
10224 vec![subject, pattern],
10225 ))))
10226 } else {
10227 let substring_expr =
10228 Expression::Function(Box::new(Function::new(
10229 "SUBSTRING".to_string(),
10230 vec![subject, position],
10231 )));
10232 Ok(Expression::Function(Box::new(Function::new(
10233 "REGEXP_EXTRACT_ALL".to_string(),
10234 vec![substring_expr, pattern],
10235 ))))
10236 }
10237 }
10238 // REGEXP_SUBSTR_ALL(s, p, 1, occ) -> REGEXP_EXTRACT_ALL(s, p)[occ:]
10239 4 => {
10240 let subject = args.remove(0);
10241 let pattern = args.remove(0);
10242 let position = args.remove(0);
10243 let occurrence = args.remove(0);
10244 let is_pos_1 = matches!(&position, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(n) if n == "1"));
10245 let is_occ_1 = matches!(&occurrence, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(n) if n == "1"));
10246
10247 let effective_subject = if is_pos_1 {
10248 subject
10249 } else {
10250 Expression::Function(Box::new(Function::new(
10251 "SUBSTRING".to_string(),
10252 vec![subject, position],
10253 )))
10254 };
10255
10256 if is_occ_1 {
10257 Ok(Expression::Function(Box::new(Function::new(
10258 "REGEXP_EXTRACT_ALL".to_string(),
10259 vec![effective_subject, pattern],
10260 ))))
10261 } else {
10262 // REGEXP_EXTRACT_ALL(s, p)[occ:]
10263 let extract_all =
10264 Expression::Function(Box::new(Function::new(
10265 "REGEXP_EXTRACT_ALL".to_string(),
10266 vec![effective_subject, pattern],
10267 )));
10268 Ok(Expression::ArraySlice(Box::new(
10269 crate::expressions::ArraySlice {
10270 this: extract_all,
10271 start: Some(occurrence),
10272 end: None,
10273 },
10274 )))
10275 }
10276 }
10277 // REGEXP_SUBSTR_ALL(s, p, 1, 1, 'e') -> REGEXP_EXTRACT_ALL(s, p)
10278 5 => {
10279 let subject = args.remove(0);
10280 let pattern = args.remove(0);
10281 let _position = args.remove(0);
10282 let _occurrence = args.remove(0);
10283 let _flags = args.remove(0);
10284 Ok(Expression::Function(Box::new(Function::new(
10285 "REGEXP_EXTRACT_ALL".to_string(),
10286 vec![subject, pattern],
10287 ))))
10288 }
10289 // REGEXP_SUBSTR_ALL(s, p, 1, 1, 'e', 0) -> REGEXP_EXTRACT_ALL(s, p)
10290 _ => {
10291 let subject = args.remove(0);
10292 let pattern = args.remove(0);
10293 let _position = args.remove(0);
10294 let _occurrence = args.remove(0);
10295 let _flags = args.remove(0);
10296 let group = args.remove(0);
10297 let is_group_0 = matches!(&group, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(n) if n == "0"));
10298 if is_group_0 {
10299 Ok(Expression::Function(Box::new(Function::new(
10300 "REGEXP_EXTRACT_ALL".to_string(),
10301 vec![subject, pattern],
10302 ))))
10303 } else {
10304 Ok(Expression::Function(Box::new(Function::new(
10305 "REGEXP_EXTRACT_ALL".to_string(),
10306 vec![subject, pattern, group],
10307 ))))
10308 }
10309 }
10310 }
10311 } else {
10312 Ok(e)
10313 }
10314 }
10315
10316 Action::RegexpCountSnowflakeToDuckDB => {
10317 // Snowflake REGEXP_COUNT(s, p[, pos[, flags]]) ->
10318 // DuckDB: CASE WHEN p = '' THEN 0 ELSE LENGTH(REGEXP_EXTRACT_ALL(s, p)) END
10319 if let Expression::Function(f) = e {
10320 let mut args = f.args;
10321 let arg_count = args.len();
10322 let subject = args.remove(0);
10323 let pattern = args.remove(0);
10324
10325 // Handle position arg
10326 let effective_subject = if arg_count >= 3 {
10327 let position = args.remove(0);
10328 Expression::Function(Box::new(Function::new(
10329 "SUBSTRING".to_string(),
10330 vec![subject, position],
10331 )))
10332 } else {
10333 subject
10334 };
10335
10336 // Handle flags arg -> embed as (?flags) prefix in pattern
10337 let effective_pattern = if arg_count >= 4 {
10338 let flags = args.remove(0);
10339 match &flags {
10340 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(f_str) if !f_str.is_empty()) =>
10341 {
10342 let Literal::String(f_str) = lit.as_ref() else {
10343 unreachable!()
10344 };
10345 // Always use concatenation: '(?flags)' || pattern
10346 let prefix = Expression::Literal(Box::new(Literal::String(
10347 format!("(?{})", f_str),
10348 )));
10349 Expression::DPipe(Box::new(crate::expressions::DPipe {
10350 this: Box::new(prefix),
10351 expression: Box::new(pattern.clone()),
10352 safe: None,
10353 }))
10354 }
10355 _ => pattern.clone(),
10356 }
10357 } else {
10358 pattern.clone()
10359 };
10360
10361 // Build: CASE WHEN p = '' THEN 0 ELSE LENGTH(REGEXP_EXTRACT_ALL(s, p)) END
10362 let extract_all = Expression::Function(Box::new(Function::new(
10363 "REGEXP_EXTRACT_ALL".to_string(),
10364 vec![effective_subject, effective_pattern.clone()],
10365 )));
10366 let length_expr =
10367 Expression::Length(Box::new(crate::expressions::UnaryFunc {
10368 this: extract_all,
10369 original_name: None,
10370 inferred_type: None,
10371 }));
10372 let condition = Expression::Eq(Box::new(BinaryOp::new(
10373 effective_pattern,
10374 Expression::Literal(Box::new(Literal::String(String::new()))),
10375 )));
10376 Ok(Expression::Case(Box::new(Case {
10377 operand: None,
10378 whens: vec![(condition, Expression::number(0))],
10379 else_: Some(length_expr),
10380 comments: vec![],
10381 inferred_type: None,
10382 })))
10383 } else {
10384 Ok(e)
10385 }
10386 }
10387
10388 Action::RegexpInstrSnowflakeToDuckDB => {
10389 // Snowflake REGEXP_INSTR(s, p[, pos[, occ[, option[, flags[, group]]]]]) ->
10390 // DuckDB: CASE WHEN s IS NULL OR p IS NULL [OR ...] THEN NULL
10391 // WHEN p = '' THEN 0
10392 // WHEN LENGTH(REGEXP_EXTRACT_ALL(eff_s, eff_p)) < occ THEN 0
10393 // ELSE 1 + COALESCE(LIST_SUM(LIST_TRANSFORM(STRING_SPLIT_REGEX(eff_s, eff_p)[1:occ], x -> LENGTH(x))), 0)
10394 // + COALESCE(LIST_SUM(LIST_TRANSFORM(REGEXP_EXTRACT_ALL(eff_s, eff_p)[1:occ - 1], x -> LENGTH(x))), 0)
10395 // + pos_offset
10396 // END
10397 if let Expression::Function(f) = e {
10398 let mut args = f.args;
10399 let subject = args.remove(0);
10400 let pattern = if !args.is_empty() {
10401 args.remove(0)
10402 } else {
10403 Expression::Literal(Box::new(Literal::String(String::new())))
10404 };
10405
10406 // Collect all original args for NULL checks
10407 let position = if !args.is_empty() {
10408 Some(args.remove(0))
10409 } else {
10410 None
10411 };
10412 let occurrence = if !args.is_empty() {
10413 Some(args.remove(0))
10414 } else {
10415 None
10416 };
10417 let option = if !args.is_empty() {
10418 Some(args.remove(0))
10419 } else {
10420 None
10421 };
10422 let flags = if !args.is_empty() {
10423 Some(args.remove(0))
10424 } else {
10425 None
10426 };
10427 let _group = if !args.is_empty() {
10428 Some(args.remove(0))
10429 } else {
10430 None
10431 };
10432
10433 let is_pos_1 = position.as_ref().map_or(true, |p| matches!(p, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(n) if n == "1")));
10434 let occurrence_expr = occurrence.clone().unwrap_or(Expression::number(1));
10435
10436 // Build NULL check: subject IS NULL OR pattern IS NULL [OR pos IS NULL ...]
10437 let mut null_checks: Vec<Expression> = vec![
10438 Expression::Is(Box::new(BinaryOp::new(
10439 subject.clone(),
10440 Expression::Null(Null),
10441 ))),
10442 Expression::Is(Box::new(BinaryOp::new(
10443 pattern.clone(),
10444 Expression::Null(Null),
10445 ))),
10446 ];
10447 // Add NULL checks for all provided optional args
10448 for opt_arg in [&position, &occurrence, &option, &flags].iter() {
10449 if let Some(arg) = opt_arg {
10450 null_checks.push(Expression::Is(Box::new(BinaryOp::new(
10451 (*arg).clone(),
10452 Expression::Null(Null),
10453 ))));
10454 }
10455 }
10456 // Chain with OR
10457 let null_condition = null_checks
10458 .into_iter()
10459 .reduce(|a, b| Expression::Or(Box::new(BinaryOp::new(a, b))))
10460 .unwrap();
10461
10462 // Effective subject (apply position offset)
10463 let effective_subject = if is_pos_1 {
10464 subject.clone()
10465 } else {
10466 let pos = position.clone().unwrap_or(Expression::number(1));
10467 Expression::Function(Box::new(Function::new(
10468 "SUBSTRING".to_string(),
10469 vec![subject.clone(), pos],
10470 )))
10471 };
10472
10473 // Effective pattern (apply flags if present)
10474 let effective_pattern = if let Some(ref fl) = flags {
10475 if let Expression::Literal(lit) = fl {
10476 if let Literal::String(f_str) = lit.as_ref() {
10477 if !f_str.is_empty() {
10478 let prefix = Expression::Literal(Box::new(
10479 Literal::String(format!("(?{})", f_str)),
10480 ));
10481 Expression::DPipe(Box::new(crate::expressions::DPipe {
10482 this: Box::new(prefix),
10483 expression: Box::new(pattern.clone()),
10484 safe: None,
10485 }))
10486 } else {
10487 pattern.clone()
10488 }
10489 } else {
10490 fl.clone()
10491 }
10492 } else {
10493 pattern.clone()
10494 }
10495 } else {
10496 pattern.clone()
10497 };
10498
10499 // WHEN pattern = '' THEN 0
10500 let empty_pattern_check = Expression::Eq(Box::new(BinaryOp::new(
10501 effective_pattern.clone(),
10502 Expression::Literal(Box::new(Literal::String(String::new()))),
10503 )));
10504
10505 // WHEN LENGTH(REGEXP_EXTRACT_ALL(eff_s, eff_p)) < occ THEN 0
10506 let match_count_check = Expression::Lt(Box::new(BinaryOp::new(
10507 Expression::Length(Box::new(crate::expressions::UnaryFunc {
10508 this: Expression::Function(Box::new(Function::new(
10509 "REGEXP_EXTRACT_ALL".to_string(),
10510 vec![effective_subject.clone(), effective_pattern.clone()],
10511 ))),
10512 original_name: None,
10513 inferred_type: None,
10514 })),
10515 occurrence_expr.clone(),
10516 )));
10517
10518 // Helper: build LENGTH lambda for LIST_TRANSFORM
10519 let make_len_lambda = || {
10520 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
10521 parameters: vec![crate::expressions::Identifier::new("x")],
10522 body: Expression::Length(Box::new(crate::expressions::UnaryFunc {
10523 this: Expression::Identifier(
10524 crate::expressions::Identifier::new("x"),
10525 ),
10526 original_name: None,
10527 inferred_type: None,
10528 })),
10529 colon: false,
10530 parameter_types: vec![],
10531 }))
10532 };
10533
10534 // COALESCE(LIST_SUM(LIST_TRANSFORM(STRING_SPLIT_REGEX(s, p)[1:occ], x -> LENGTH(x))), 0)
10535 let split_sliced =
10536 Expression::ArraySlice(Box::new(crate::expressions::ArraySlice {
10537 this: Expression::Function(Box::new(Function::new(
10538 "STRING_SPLIT_REGEX".to_string(),
10539 vec![effective_subject.clone(), effective_pattern.clone()],
10540 ))),
10541 start: Some(Expression::number(1)),
10542 end: Some(occurrence_expr.clone()),
10543 }));
10544 let split_sum = Expression::Function(Box::new(Function::new(
10545 "COALESCE".to_string(),
10546 vec![
10547 Expression::Function(Box::new(Function::new(
10548 "LIST_SUM".to_string(),
10549 vec![Expression::Function(Box::new(Function::new(
10550 "LIST_TRANSFORM".to_string(),
10551 vec![split_sliced, make_len_lambda()],
10552 )))],
10553 ))),
10554 Expression::number(0),
10555 ],
10556 )));
10557
10558 // COALESCE(LIST_SUM(LIST_TRANSFORM(REGEXP_EXTRACT_ALL(s, p)[1:occ - 1], x -> LENGTH(x))), 0)
10559 let extract_sliced =
10560 Expression::ArraySlice(Box::new(crate::expressions::ArraySlice {
10561 this: Expression::Function(Box::new(Function::new(
10562 "REGEXP_EXTRACT_ALL".to_string(),
10563 vec![effective_subject.clone(), effective_pattern.clone()],
10564 ))),
10565 start: Some(Expression::number(1)),
10566 end: Some(Expression::Sub(Box::new(BinaryOp::new(
10567 occurrence_expr.clone(),
10568 Expression::number(1),
10569 )))),
10570 }));
10571 let extract_sum = Expression::Function(Box::new(Function::new(
10572 "COALESCE".to_string(),
10573 vec![
10574 Expression::Function(Box::new(Function::new(
10575 "LIST_SUM".to_string(),
10576 vec![Expression::Function(Box::new(Function::new(
10577 "LIST_TRANSFORM".to_string(),
10578 vec![extract_sliced, make_len_lambda()],
10579 )))],
10580 ))),
10581 Expression::number(0),
10582 ],
10583 )));
10584
10585 // Position offset: pos - 1 when pos > 1, else 0
10586 let pos_offset: Expression = if !is_pos_1 {
10587 let pos = position.clone().unwrap_or(Expression::number(1));
10588 Expression::Sub(Box::new(BinaryOp::new(pos, Expression::number(1))))
10589 } else {
10590 Expression::number(0)
10591 };
10592
10593 // ELSE: 1 + split_sum + extract_sum + pos_offset
10594 let else_expr = Expression::Add(Box::new(BinaryOp::new(
10595 Expression::Add(Box::new(BinaryOp::new(
10596 Expression::Add(Box::new(BinaryOp::new(
10597 Expression::number(1),
10598 split_sum,
10599 ))),
10600 extract_sum,
10601 ))),
10602 pos_offset,
10603 )));
10604
10605 Ok(Expression::Case(Box::new(Case {
10606 operand: None,
10607 whens: vec![
10608 (null_condition, Expression::Null(Null)),
10609 (empty_pattern_check, Expression::number(0)),
10610 (match_count_check, Expression::number(0)),
10611 ],
10612 else_: Some(else_expr),
10613 comments: vec![],
10614 inferred_type: None,
10615 })))
10616 } else {
10617 Ok(e)
10618 }
10619 }
10620
10621 Action::RlikeSnowflakeToDuckDB => {
10622 // Snowflake RLIKE(a, b[, flags]) -> DuckDB REGEXP_FULL_MATCH(a, b[, flags])
10623 // Both do full-string matching, so no anchoring needed
10624 let (subject, pattern, flags) = match e {
10625 Expression::RegexpLike(ref rl) => {
10626 (rl.this.clone(), rl.pattern.clone(), rl.flags.clone())
10627 }
10628 Expression::Function(ref f) if f.args.len() >= 2 => {
10629 let s = f.args[0].clone();
10630 let p = f.args[1].clone();
10631 let fl = f.args.get(2).cloned();
10632 (s, p, fl)
10633 }
10634 _ => return Ok(e),
10635 };
10636
10637 let mut result_args = vec![subject, pattern];
10638 if let Some(fl) = flags {
10639 result_args.push(fl);
10640 }
10641 Ok(Expression::Function(Box::new(Function::new(
10642 "REGEXP_FULL_MATCH".to_string(),
10643 result_args,
10644 ))))
10645 }
10646
10647 Action::RegexpExtractAllToSnowflake => {
10648 // BigQuery REGEXP_EXTRACT_ALL(s, p) -> Snowflake REGEXP_SUBSTR_ALL(s, p)
10649 // With capture group: REGEXP_SUBSTR_ALL(s, p, 1, 1, 'c', 1)
10650 if let Expression::Function(f) = e {
10651 let mut args = f.args;
10652 if args.len() >= 2 {
10653 let str_expr = args.remove(0);
10654 let pattern = args.remove(0);
10655
10656 let has_groups = match &pattern {
10657 Expression::Literal(lit)
10658 if matches!(lit.as_ref(), Literal::String(_)) =>
10659 {
10660 let Literal::String(s) = lit.as_ref() else {
10661 unreachable!()
10662 };
10663 s.contains('(') && s.contains(')')
10664 }
10665 _ => false,
10666 };
10667
10668 if has_groups {
10669 Ok(Expression::Function(Box::new(Function::new(
10670 "REGEXP_SUBSTR_ALL".to_string(),
10671 vec![
10672 str_expr,
10673 pattern,
10674 Expression::number(1),
10675 Expression::number(1),
10676 Expression::Literal(Box::new(Literal::String(
10677 "c".to_string(),
10678 ))),
10679 Expression::number(1),
10680 ],
10681 ))))
10682 } else {
10683 Ok(Expression::Function(Box::new(Function::new(
10684 "REGEXP_SUBSTR_ALL".to_string(),
10685 vec![str_expr, pattern],
10686 ))))
10687 }
10688 } else {
10689 Ok(Expression::Function(Box::new(Function::new(
10690 "REGEXP_SUBSTR_ALL".to_string(),
10691 args,
10692 ))))
10693 }
10694 } else {
10695 Ok(e)
10696 }
10697 }
10698
10699 Action::SetToVariable => {
10700 // For DuckDB: SET a = 1 -> SET VARIABLE a = 1
10701 if let Expression::SetStatement(mut s) = e {
10702 for item in &mut s.items {
10703 if item.kind.is_none() {
10704 // Check if name already has VARIABLE prefix (from DuckDB source parsing)
10705 let already_variable = match &item.name {
10706 Expression::Identifier(id) => id.name.starts_with("VARIABLE "),
10707 _ => false,
10708 };
10709 if already_variable {
10710 // Extract the actual name and set kind
10711 if let Expression::Identifier(ref mut id) = item.name {
10712 let actual_name = id.name["VARIABLE ".len()..].to_string();
10713 id.name = actual_name;
10714 }
10715 }
10716 item.kind = Some("VARIABLE".to_string());
10717 }
10718 }
10719 Ok(Expression::SetStatement(s))
10720 } else {
10721 Ok(e)
10722 }
10723 }
10724
10725 Action::ConvertTimezoneToExpr => {
10726 // Convert Function("CONVERT_TIMEZONE", args) to Expression::ConvertTimezone
10727 // This prevents Redshift's transform_expr from expanding 2-arg to 3-arg with 'UTC'
10728 if let Expression::Function(f) = e {
10729 if f.args.len() == 2 {
10730 let mut args = f.args;
10731 let target_tz = args.remove(0);
10732 let timestamp = args.remove(0);
10733 Ok(Expression::ConvertTimezone(Box::new(ConvertTimezone {
10734 source_tz: None,
10735 target_tz: Some(Box::new(target_tz)),
10736 timestamp: Some(Box::new(timestamp)),
10737 options: vec![],
10738 })))
10739 } else if f.args.len() == 3 {
10740 let mut args = f.args;
10741 let source_tz = args.remove(0);
10742 let target_tz = args.remove(0);
10743 let timestamp = args.remove(0);
10744 Ok(Expression::ConvertTimezone(Box::new(ConvertTimezone {
10745 source_tz: Some(Box::new(source_tz)),
10746 target_tz: Some(Box::new(target_tz)),
10747 timestamp: Some(Box::new(timestamp)),
10748 options: vec![],
10749 })))
10750 } else {
10751 Ok(Expression::Function(f))
10752 }
10753 } else {
10754 Ok(e)
10755 }
10756 }
10757
10758 Action::BigQueryCastType => {
10759 // Convert BigQuery types to standard SQL types
10760 if let Expression::DataType(dt) = e {
10761 match dt {
10762 DataType::Custom { ref name } if name.eq_ignore_ascii_case("INT64") => {
10763 Ok(Expression::DataType(DataType::BigInt { length: None }))
10764 }
10765 DataType::Custom { ref name }
10766 if name.eq_ignore_ascii_case("FLOAT64") =>
10767 {
10768 Ok(Expression::DataType(DataType::Double {
10769 precision: None,
10770 scale: None,
10771 }))
10772 }
10773 DataType::Custom { ref name } if name.eq_ignore_ascii_case("BOOL") => {
10774 Ok(Expression::DataType(DataType::Boolean))
10775 }
10776 DataType::Custom { ref name } if name.eq_ignore_ascii_case("BYTES") => {
10777 Ok(Expression::DataType(DataType::VarBinary { length: None }))
10778 }
10779 DataType::Custom { ref name }
10780 if name.eq_ignore_ascii_case("NUMERIC") =>
10781 {
10782 // For DuckDB target, use Custom("DECIMAL") to avoid DuckDB's
10783 // default precision (18, 3) being added to bare DECIMAL
10784 if matches!(target, DialectType::DuckDB) {
10785 Ok(Expression::DataType(DataType::Custom {
10786 name: "DECIMAL".to_string(),
10787 }))
10788 } else {
10789 Ok(Expression::DataType(DataType::Decimal {
10790 precision: None,
10791 scale: None,
10792 }))
10793 }
10794 }
10795 DataType::Custom { ref name }
10796 if name.eq_ignore_ascii_case("STRING") =>
10797 {
10798 Ok(Expression::DataType(DataType::String { length: None }))
10799 }
10800 DataType::Custom { ref name }
10801 if name.eq_ignore_ascii_case("DATETIME") =>
10802 {
10803 Ok(Expression::DataType(DataType::Timestamp {
10804 precision: None,
10805 timezone: false,
10806 }))
10807 }
10808 _ => Ok(Expression::DataType(dt)),
10809 }
10810 } else {
10811 Ok(e)
10812 }
10813 }
10814
10815 Action::BigQuerySafeDivide => {
10816 // Convert SafeDivide expression to IF/CASE form for most targets
10817 if let Expression::SafeDivide(sd) = e {
10818 let x = *sd.this;
10819 let y = *sd.expression;
10820 // Wrap x and y in parens if they're complex expressions
10821 let y_ref = match &y {
10822 Expression::Column(_)
10823 | Expression::Literal(_)
10824 | Expression::Identifier(_) => y.clone(),
10825 _ => Expression::Paren(Box::new(Paren {
10826 this: y.clone(),
10827 trailing_comments: vec![],
10828 })),
10829 };
10830 let x_ref = match &x {
10831 Expression::Column(_)
10832 | Expression::Literal(_)
10833 | Expression::Identifier(_) => x.clone(),
10834 _ => Expression::Paren(Box::new(Paren {
10835 this: x.clone(),
10836 trailing_comments: vec![],
10837 })),
10838 };
10839 let condition = Expression::Neq(Box::new(BinaryOp::new(
10840 y_ref.clone(),
10841 Expression::number(0),
10842 )));
10843 let div_expr = Expression::Div(Box::new(BinaryOp::new(x_ref, y_ref)));
10844
10845 if matches!(target, DialectType::Spark | DialectType::Databricks) {
10846 Ok(Expression::Function(Box::new(Function::new(
10847 "TRY_DIVIDE".to_string(),
10848 vec![x, y],
10849 ))))
10850 } else if matches!(target, DialectType::Presto | DialectType::Trino) {
10851 // Presto/Trino: IF(y <> 0, CAST(x AS DOUBLE) / y, NULL)
10852 let cast_x = Expression::Cast(Box::new(Cast {
10853 this: match &x {
10854 Expression::Column(_)
10855 | Expression::Literal(_)
10856 | Expression::Identifier(_) => x,
10857 _ => Expression::Paren(Box::new(Paren {
10858 this: x,
10859 trailing_comments: vec![],
10860 })),
10861 },
10862 to: DataType::Double {
10863 precision: None,
10864 scale: None,
10865 },
10866 trailing_comments: vec![],
10867 double_colon_syntax: false,
10868 format: None,
10869 default: None,
10870 inferred_type: None,
10871 }));
10872 let cast_div = Expression::Div(Box::new(BinaryOp::new(
10873 cast_x,
10874 match &y {
10875 Expression::Column(_)
10876 | Expression::Literal(_)
10877 | Expression::Identifier(_) => y,
10878 _ => Expression::Paren(Box::new(Paren {
10879 this: y,
10880 trailing_comments: vec![],
10881 })),
10882 },
10883 )));
10884 Ok(Expression::IfFunc(Box::new(crate::expressions::IfFunc {
10885 condition,
10886 true_value: cast_div,
10887 false_value: Some(Expression::Null(Null)),
10888 original_name: None,
10889 inferred_type: None,
10890 })))
10891 } else if matches!(target, DialectType::PostgreSQL) {
10892 // PostgreSQL: CASE WHEN y <> 0 THEN CAST(x AS DOUBLE PRECISION) / y ELSE NULL END
10893 let cast_x = Expression::Cast(Box::new(Cast {
10894 this: match &x {
10895 Expression::Column(_)
10896 | Expression::Literal(_)
10897 | Expression::Identifier(_) => x,
10898 _ => Expression::Paren(Box::new(Paren {
10899 this: x,
10900 trailing_comments: vec![],
10901 })),
10902 },
10903 to: DataType::Custom {
10904 name: "DOUBLE PRECISION".to_string(),
10905 },
10906 trailing_comments: vec![],
10907 double_colon_syntax: false,
10908 format: None,
10909 default: None,
10910 inferred_type: None,
10911 }));
10912 let y_paren = match &y {
10913 Expression::Column(_)
10914 | Expression::Literal(_)
10915 | Expression::Identifier(_) => y,
10916 _ => Expression::Paren(Box::new(Paren {
10917 this: y,
10918 trailing_comments: vec![],
10919 })),
10920 };
10921 let cast_div =
10922 Expression::Div(Box::new(BinaryOp::new(cast_x, y_paren)));
10923 Ok(Expression::Case(Box::new(Case {
10924 operand: None,
10925 whens: vec![(condition, cast_div)],
10926 else_: Some(Expression::Null(Null)),
10927 comments: Vec::new(),
10928 inferred_type: None,
10929 })))
10930 } else if matches!(target, DialectType::DuckDB) {
10931 // DuckDB: CASE WHEN y <> 0 THEN x / y ELSE NULL END
10932 Ok(Expression::Case(Box::new(Case {
10933 operand: None,
10934 whens: vec![(condition, div_expr)],
10935 else_: Some(Expression::Null(Null)),
10936 comments: Vec::new(),
10937 inferred_type: None,
10938 })))
10939 } else if matches!(target, DialectType::Snowflake) {
10940 // Snowflake: IFF(y <> 0, x / y, NULL)
10941 Ok(Expression::IfFunc(Box::new(crate::expressions::IfFunc {
10942 condition,
10943 true_value: div_expr,
10944 false_value: Some(Expression::Null(Null)),
10945 original_name: Some("IFF".to_string()),
10946 inferred_type: None,
10947 })))
10948 } else {
10949 // All others: IF(y <> 0, x / y, NULL)
10950 Ok(Expression::IfFunc(Box::new(crate::expressions::IfFunc {
10951 condition,
10952 true_value: div_expr,
10953 false_value: Some(Expression::Null(Null)),
10954 original_name: None,
10955 inferred_type: None,
10956 })))
10957 }
10958 } else {
10959 Ok(e)
10960 }
10961 }
10962
10963 Action::BigQueryLastDayStripUnit => {
10964 if let Expression::LastDay(mut ld) = e {
10965 ld.unit = None; // Strip the unit (MONTH is default)
10966 match target {
10967 DialectType::PostgreSQL => {
10968 // LAST_DAY(date) -> CAST(DATE_TRUNC('MONTH', date) + INTERVAL '1 MONTH' - INTERVAL '1 DAY' AS DATE)
10969 let date_trunc = Expression::Function(Box::new(Function::new(
10970 "DATE_TRUNC".to_string(),
10971 vec![
10972 Expression::Literal(Box::new(
10973 crate::expressions::Literal::String(
10974 "MONTH".to_string(),
10975 ),
10976 )),
10977 ld.this.clone(),
10978 ],
10979 )));
10980 let plus_month =
10981 Expression::Add(Box::new(crate::expressions::BinaryOp::new(
10982 date_trunc,
10983 Expression::Interval(Box::new(
10984 crate::expressions::Interval {
10985 this: Some(Expression::Literal(Box::new(
10986 crate::expressions::Literal::String(
10987 "1 MONTH".to_string(),
10988 ),
10989 ))),
10990 unit: None,
10991 },
10992 )),
10993 )));
10994 let minus_day =
10995 Expression::Sub(Box::new(crate::expressions::BinaryOp::new(
10996 plus_month,
10997 Expression::Interval(Box::new(
10998 crate::expressions::Interval {
10999 this: Some(Expression::Literal(Box::new(
11000 crate::expressions::Literal::String(
11001 "1 DAY".to_string(),
11002 ),
11003 ))),
11004 unit: None,
11005 },
11006 )),
11007 )));
11008 Ok(Expression::Cast(Box::new(Cast {
11009 this: minus_day,
11010 to: DataType::Date,
11011 trailing_comments: vec![],
11012 double_colon_syntax: false,
11013 format: None,
11014 default: None,
11015 inferred_type: None,
11016 })))
11017 }
11018 DialectType::Presto => {
11019 // LAST_DAY(date) -> LAST_DAY_OF_MONTH(date)
11020 Ok(Expression::Function(Box::new(Function::new(
11021 "LAST_DAY_OF_MONTH".to_string(),
11022 vec![ld.this],
11023 ))))
11024 }
11025 DialectType::ClickHouse => {
11026 // ClickHouse LAST_DAY(CAST(x AS Nullable(DATE)))
11027 // Need to wrap the DATE type in Nullable
11028 let nullable_date = match ld.this {
11029 Expression::Cast(mut c) => {
11030 c.to = DataType::Nullable {
11031 inner: Box::new(DataType::Date),
11032 };
11033 Expression::Cast(c)
11034 }
11035 other => other,
11036 };
11037 ld.this = nullable_date;
11038 Ok(Expression::LastDay(ld))
11039 }
11040 _ => Ok(Expression::LastDay(ld)),
11041 }
11042 } else {
11043 Ok(e)
11044 }
11045 }
11046
11047 Action::BigQueryCastFormat => {
11048 // CAST(x AS DATE FORMAT 'fmt') -> PARSE_DATE('%m/%d/%Y', x) for BigQuery
11049 // CAST(x AS TIMESTAMP FORMAT 'fmt') -> PARSE_TIMESTAMP(...) for BigQuery
11050 // SAFE_CAST(x AS DATE FORMAT 'fmt') -> CAST(TRY_STRPTIME(x, ...) AS DATE) for DuckDB
11051 let (this, to, format_expr, is_safe) = match e {
11052 Expression::Cast(ref c) if c.format.is_some() => (
11053 c.this.clone(),
11054 c.to.clone(),
11055 c.format.as_ref().unwrap().as_ref().clone(),
11056 false,
11057 ),
11058 Expression::SafeCast(ref c) if c.format.is_some() => (
11059 c.this.clone(),
11060 c.to.clone(),
11061 c.format.as_ref().unwrap().as_ref().clone(),
11062 true,
11063 ),
11064 _ => return Ok(e),
11065 };
11066 // For CAST(x AS STRING FORMAT ...) when target is BigQuery, keep as-is
11067 if matches!(target, DialectType::BigQuery) {
11068 match &to {
11069 DataType::String { .. } | DataType::VarChar { .. } | DataType::Text => {
11070 // CAST(x AS STRING FORMAT 'fmt') stays as CAST expression for BigQuery
11071 return Ok(e);
11072 }
11073 _ => {}
11074 }
11075 }
11076 // Extract timezone from format if AT TIME ZONE is present
11077 let (actual_format_expr, timezone) = match &format_expr {
11078 Expression::AtTimeZone(ref atz) => {
11079 (atz.this.clone(), Some(atz.zone.clone()))
11080 }
11081 _ => (format_expr.clone(), None),
11082 };
11083 let strftime_fmt = Self::bq_cast_format_to_strftime(&actual_format_expr);
11084 match target {
11085 DialectType::BigQuery => {
11086 // CAST(x AS DATE FORMAT 'fmt') -> PARSE_DATE(strftime_fmt, x)
11087 // CAST(x AS TIMESTAMP FORMAT 'fmt' AT TIME ZONE 'tz') -> PARSE_TIMESTAMP(strftime_fmt, x, tz)
11088 let func_name = match &to {
11089 DataType::Date => "PARSE_DATE",
11090 DataType::Timestamp { .. } => "PARSE_TIMESTAMP",
11091 DataType::Time { .. } => "PARSE_TIMESTAMP",
11092 _ => "PARSE_TIMESTAMP",
11093 };
11094 let mut func_args = vec![strftime_fmt, this];
11095 if let Some(tz) = timezone {
11096 func_args.push(tz);
11097 }
11098 Ok(Expression::Function(Box::new(Function::new(
11099 func_name.to_string(),
11100 func_args,
11101 ))))
11102 }
11103 DialectType::DuckDB => {
11104 // SAFE_CAST(x AS DATE FORMAT 'fmt') -> CAST(TRY_STRPTIME(x, fmt) AS DATE)
11105 // CAST(x AS DATE FORMAT 'fmt') -> CAST(STRPTIME(x, fmt) AS DATE)
11106 let duck_fmt = Self::bq_format_to_duckdb(&strftime_fmt);
11107 let parse_fn_name = if is_safe { "TRY_STRPTIME" } else { "STRPTIME" };
11108 let parse_call = Expression::Function(Box::new(Function::new(
11109 parse_fn_name.to_string(),
11110 vec![this, duck_fmt],
11111 )));
11112 Ok(Expression::Cast(Box::new(Cast {
11113 this: parse_call,
11114 to,
11115 trailing_comments: vec![],
11116 double_colon_syntax: false,
11117 format: None,
11118 default: None,
11119 inferred_type: None,
11120 })))
11121 }
11122 _ => Ok(e),
11123 }
11124 }
11125
11126 Action::BigQueryFunctionNormalize => {
11127 Self::normalize_bigquery_function(e, source, target)
11128 }
11129
11130 Action::BigQueryToHexBare => {
11131 // Not used anymore - handled directly in normalize_bigquery_function
11132 Ok(e)
11133 }
11134
11135 Action::BigQueryToHexLower => {
11136 if let Expression::Lower(uf) = e {
11137 match uf.this {
11138 // BQ->BQ: LOWER(TO_HEX(x)) -> TO_HEX(x)
11139 Expression::Function(f)
11140 if matches!(target, DialectType::BigQuery)
11141 && f.name == "TO_HEX" =>
11142 {
11143 Ok(Expression::Function(f))
11144 }
11145 // LOWER(LOWER(HEX/TO_HEX(x))) patterns
11146 Expression::Lower(inner_uf) => {
11147 if matches!(target, DialectType::BigQuery) {
11148 // BQ->BQ: extract TO_HEX
11149 if let Expression::Function(f) = inner_uf.this {
11150 Ok(Expression::Function(Box::new(Function::new(
11151 "TO_HEX".to_string(),
11152 f.args,
11153 ))))
11154 } else {
11155 Ok(Expression::Lower(inner_uf))
11156 }
11157 } else {
11158 // Flatten: LOWER(LOWER(x)) -> LOWER(x)
11159 Ok(Expression::Lower(inner_uf))
11160 }
11161 }
11162 other => {
11163 Ok(Expression::Lower(Box::new(crate::expressions::UnaryFunc {
11164 this: other,
11165 original_name: None,
11166 inferred_type: None,
11167 })))
11168 }
11169 }
11170 } else {
11171 Ok(e)
11172 }
11173 }
11174
11175 Action::BigQueryToHexUpper => {
11176 // UPPER(LOWER(HEX(x))) -> HEX(x) (UPPER cancels LOWER, HEX is already uppercase)
11177 // UPPER(LOWER(TO_HEX(x))) -> TO_HEX(x) for Presto/Trino
11178 if let Expression::Upper(uf) = e {
11179 if let Expression::Lower(inner_uf) = uf.this {
11180 // For BQ->BQ: UPPER(TO_HEX(x)) should stay as UPPER(TO_HEX(x))
11181 if matches!(target, DialectType::BigQuery) {
11182 // Restore TO_HEX name in inner function
11183 if let Expression::Function(f) = inner_uf.this {
11184 let restored = Expression::Function(Box::new(Function::new(
11185 "TO_HEX".to_string(),
11186 f.args,
11187 )));
11188 Ok(Expression::Upper(Box::new(
11189 crate::expressions::UnaryFunc::new(restored),
11190 )))
11191 } else {
11192 Ok(Expression::Upper(inner_uf))
11193 }
11194 } else {
11195 // Extract the inner HEX/TO_HEX function (UPPER(LOWER(x)) = x when HEX is uppercase)
11196 Ok(inner_uf.this)
11197 }
11198 } else {
11199 Ok(Expression::Upper(uf))
11200 }
11201 } else {
11202 Ok(e)
11203 }
11204 }
11205
11206 Action::BigQueryAnyValueHaving => {
11207 // ANY_VALUE(x HAVING MAX y) -> ARG_MAX_NULL(x, y)
11208 // ANY_VALUE(x HAVING MIN y) -> ARG_MIN_NULL(x, y)
11209 if let Expression::AnyValue(agg) = e {
11210 if let Some((having_expr, is_max)) = agg.having_max {
11211 let func_name = if is_max {
11212 "ARG_MAX_NULL"
11213 } else {
11214 "ARG_MIN_NULL"
11215 };
11216 Ok(Expression::Function(Box::new(Function::new(
11217 func_name.to_string(),
11218 vec![agg.this, *having_expr],
11219 ))))
11220 } else {
11221 Ok(Expression::AnyValue(agg))
11222 }
11223 } else {
11224 Ok(e)
11225 }
11226 }
11227
11228 Action::BigQueryApproxQuantiles => {
11229 // APPROX_QUANTILES(x, n) -> APPROX_QUANTILE(x, [0, 1/n, 2/n, ..., 1])
11230 // APPROX_QUANTILES(DISTINCT x, n) -> APPROX_QUANTILE(DISTINCT x, [0, 1/n, ..., 1])
11231 if let Expression::AggregateFunction(agg) = e {
11232 if agg.args.len() >= 2 {
11233 let x_expr = agg.args[0].clone();
11234 let n_expr = &agg.args[1];
11235
11236 // Extract the numeric value from n_expr
11237 let n = match n_expr {
11238 Expression::Literal(lit)
11239 if matches!(
11240 lit.as_ref(),
11241 crate::expressions::Literal::Number(_)
11242 ) =>
11243 {
11244 let crate::expressions::Literal::Number(s) = lit.as_ref()
11245 else {
11246 unreachable!()
11247 };
11248 s.parse::<usize>().unwrap_or(2)
11249 }
11250 _ => 2,
11251 };
11252
11253 // Generate quantile array: [0, 1/n, 2/n, ..., 1]
11254 let mut quantiles = Vec::new();
11255 for i in 0..=n {
11256 let q = i as f64 / n as f64;
11257 // Format nicely: 0 -> 0, 0.25 -> 0.25, 1 -> 1
11258 if q == 0.0 {
11259 quantiles.push(Expression::number(0));
11260 } else if q == 1.0 {
11261 quantiles.push(Expression::number(1));
11262 } else {
11263 quantiles.push(Expression::Literal(Box::new(
11264 crate::expressions::Literal::Number(format!("{}", q)),
11265 )));
11266 }
11267 }
11268
11269 let array_expr =
11270 Expression::Array(Box::new(crate::expressions::Array {
11271 expressions: quantiles,
11272 }));
11273
11274 // Preserve DISTINCT modifier
11275 let mut new_func = Function::new(
11276 "APPROX_QUANTILE".to_string(),
11277 vec![x_expr, array_expr],
11278 );
11279 new_func.distinct = agg.distinct;
11280 Ok(Expression::Function(Box::new(new_func)))
11281 } else {
11282 Ok(Expression::AggregateFunction(agg))
11283 }
11284 } else {
11285 Ok(e)
11286 }
11287 }
11288
11289 Action::GenericFunctionNormalize => {
11290 // Helper closure to convert ARBITRARY to target-specific function
11291 fn convert_arbitrary(arg: Expression, target: DialectType) -> Expression {
11292 let name = match target {
11293 DialectType::ClickHouse => "any",
11294 DialectType::TSQL | DialectType::SQLite => "MAX",
11295 DialectType::Hive => "FIRST",
11296 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
11297 "ARBITRARY"
11298 }
11299 _ => "ANY_VALUE",
11300 };
11301 Expression::Function(Box::new(Function::new(name.to_string(), vec![arg])))
11302 }
11303
11304 if let Expression::Function(f) = e {
11305 let name = f.name.to_ascii_uppercase();
11306 match name.as_str() {
11307 "ARBITRARY" if f.args.len() == 1 => {
11308 let arg = f.args.into_iter().next().unwrap();
11309 Ok(convert_arbitrary(arg, target))
11310 }
11311 "TO_NUMBER" if f.args.len() == 1 => {
11312 let arg = f.args.into_iter().next().unwrap();
11313 match target {
11314 DialectType::Oracle | DialectType::Snowflake => {
11315 Ok(Expression::Function(Box::new(Function::new(
11316 "TO_NUMBER".to_string(),
11317 vec![arg],
11318 ))))
11319 }
11320 _ => Ok(Expression::Cast(Box::new(crate::expressions::Cast {
11321 this: arg,
11322 to: crate::expressions::DataType::Double {
11323 precision: None,
11324 scale: None,
11325 },
11326 double_colon_syntax: false,
11327 trailing_comments: Vec::new(),
11328 format: None,
11329 default: None,
11330 inferred_type: None,
11331 }))),
11332 }
11333 }
11334 "AGGREGATE" if f.args.len() >= 3 => match target {
11335 DialectType::DuckDB
11336 | DialectType::Hive
11337 | DialectType::Presto
11338 | DialectType::Trino => Ok(Expression::Function(Box::new(
11339 Function::new("REDUCE".to_string(), f.args),
11340 ))),
11341 _ => Ok(Expression::Function(f)),
11342 },
11343 // REGEXP_MATCHES(x, y) -> RegexpLike for most targets, keep as-is for DuckDB
11344 "REGEXP_MATCHES" if f.args.len() >= 2 => {
11345 if matches!(target, DialectType::DuckDB) {
11346 Ok(Expression::Function(f))
11347 } else {
11348 let mut args = f.args;
11349 let this = args.remove(0);
11350 let pattern = args.remove(0);
11351 let flags = if args.is_empty() {
11352 None
11353 } else {
11354 Some(args.remove(0))
11355 };
11356 Ok(Expression::RegexpLike(Box::new(
11357 crate::expressions::RegexpFunc {
11358 this,
11359 pattern,
11360 flags,
11361 },
11362 )))
11363 }
11364 }
11365 // REGEXP_FULL_MATCH (Hive REGEXP) -> RegexpLike
11366 "REGEXP_FULL_MATCH" if f.args.len() >= 2 => {
11367 if matches!(target, DialectType::DuckDB) {
11368 Ok(Expression::Function(f))
11369 } else {
11370 let mut args = f.args;
11371 let this = args.remove(0);
11372 let pattern = args.remove(0);
11373 let flags = if args.is_empty() {
11374 None
11375 } else {
11376 Some(args.remove(0))
11377 };
11378 Ok(Expression::RegexpLike(Box::new(
11379 crate::expressions::RegexpFunc {
11380 this,
11381 pattern,
11382 flags,
11383 },
11384 )))
11385 }
11386 }
11387 // STRUCT_EXTRACT(x, 'field') -> x.field (StructExtract expression)
11388 "STRUCT_EXTRACT" if f.args.len() == 2 => {
11389 let mut args = f.args;
11390 let this = args.remove(0);
11391 let field_expr = args.remove(0);
11392 // Extract string literal to get field name
11393 let field_name = match &field_expr {
11394 Expression::Literal(lit)
11395 if matches!(
11396 lit.as_ref(),
11397 crate::expressions::Literal::String(_)
11398 ) =>
11399 {
11400 let crate::expressions::Literal::String(s) = lit.as_ref()
11401 else {
11402 unreachable!()
11403 };
11404 s.clone()
11405 }
11406 Expression::Identifier(id) => id.name.clone(),
11407 _ => {
11408 return Ok(Expression::Function(Box::new(Function::new(
11409 "STRUCT_EXTRACT".to_string(),
11410 vec![this, field_expr],
11411 ))))
11412 }
11413 };
11414 Ok(Expression::StructExtract(Box::new(
11415 crate::expressions::StructExtractFunc {
11416 this,
11417 field: crate::expressions::Identifier::new(field_name),
11418 },
11419 )))
11420 }
11421 // LIST_FILTER([4,5,6], x -> x > 4) -> FILTER(ARRAY(4,5,6), x -> x > 4)
11422 "LIST_FILTER" if f.args.len() == 2 => {
11423 let name = match target {
11424 DialectType::DuckDB => "LIST_FILTER",
11425 _ => "FILTER",
11426 };
11427 Ok(Expression::Function(Box::new(Function::new(
11428 name.to_string(),
11429 f.args,
11430 ))))
11431 }
11432 // LIST_TRANSFORM(x, y -> y + 1) -> TRANSFORM(x, y -> y + 1)
11433 "LIST_TRANSFORM" if f.args.len() == 2 => {
11434 let name = match target {
11435 DialectType::DuckDB => "LIST_TRANSFORM",
11436 _ => "TRANSFORM",
11437 };
11438 Ok(Expression::Function(Box::new(Function::new(
11439 name.to_string(),
11440 f.args,
11441 ))))
11442 }
11443 // LIST_SORT(x) -> LIST_SORT(x) for DuckDB, ARRAY_SORT(x) for Presto/Trino, SORT_ARRAY(x) for others
11444 "LIST_SORT" if f.args.len() >= 1 => {
11445 let name = match target {
11446 DialectType::DuckDB => "LIST_SORT",
11447 DialectType::Presto | DialectType::Trino => "ARRAY_SORT",
11448 _ => "SORT_ARRAY",
11449 };
11450 Ok(Expression::Function(Box::new(Function::new(
11451 name.to_string(),
11452 f.args,
11453 ))))
11454 }
11455 // LIST_REVERSE_SORT(x) -> SORT_ARRAY(x, FALSE) for Spark/Hive, ARRAY_SORT(x, lambda) for Presto
11456 "LIST_REVERSE_SORT" if f.args.len() >= 1 => {
11457 match target {
11458 DialectType::DuckDB => Ok(Expression::Function(Box::new(
11459 Function::new("ARRAY_REVERSE_SORT".to_string(), f.args),
11460 ))),
11461 DialectType::Spark
11462 | DialectType::Databricks
11463 | DialectType::Hive => {
11464 let mut args = f.args;
11465 args.push(Expression::Identifier(
11466 crate::expressions::Identifier::new("FALSE"),
11467 ));
11468 Ok(Expression::Function(Box::new(Function::new(
11469 "SORT_ARRAY".to_string(),
11470 args,
11471 ))))
11472 }
11473 DialectType::Presto
11474 | DialectType::Trino
11475 | DialectType::Athena => {
11476 // ARRAY_SORT(x, (a, b) -> CASE WHEN a < b THEN 1 WHEN a > b THEN -1 ELSE 0 END)
11477 let arr = f.args.into_iter().next().unwrap();
11478 let lambda = Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
11479 parameters: vec![
11480 crate::expressions::Identifier::new("a"),
11481 crate::expressions::Identifier::new("b"),
11482 ],
11483 body: Expression::Case(Box::new(Case {
11484 operand: None,
11485 whens: vec![
11486 (
11487 Expression::Lt(Box::new(BinaryOp::new(
11488 Expression::Identifier(crate::expressions::Identifier::new("a")),
11489 Expression::Identifier(crate::expressions::Identifier::new("b")),
11490 ))),
11491 Expression::number(1),
11492 ),
11493 (
11494 Expression::Gt(Box::new(BinaryOp::new(
11495 Expression::Identifier(crate::expressions::Identifier::new("a")),
11496 Expression::Identifier(crate::expressions::Identifier::new("b")),
11497 ))),
11498 Expression::Literal(Box::new(Literal::Number("-1".to_string()))),
11499 ),
11500 ],
11501 else_: Some(Expression::number(0)),
11502 comments: Vec::new(),
11503 inferred_type: None,
11504 })),
11505 colon: false,
11506 parameter_types: Vec::new(),
11507 }));
11508 Ok(Expression::Function(Box::new(Function::new(
11509 "ARRAY_SORT".to_string(),
11510 vec![arr, lambda],
11511 ))))
11512 }
11513 _ => Ok(Expression::Function(Box::new(Function::new(
11514 "LIST_REVERSE_SORT".to_string(),
11515 f.args,
11516 )))),
11517 }
11518 }
11519 // SPLIT_TO_ARRAY(x) with 1 arg -> add default ',' separator and rename
11520 "SPLIT_TO_ARRAY" if f.args.len() == 1 => {
11521 let mut args = f.args;
11522 args.push(Expression::string(","));
11523 let name = match target {
11524 DialectType::DuckDB => "STR_SPLIT",
11525 DialectType::Presto | DialectType::Trino => "SPLIT",
11526 DialectType::Spark
11527 | DialectType::Databricks
11528 | DialectType::Hive => "SPLIT",
11529 DialectType::PostgreSQL => "STRING_TO_ARRAY",
11530 DialectType::Redshift => "SPLIT_TO_ARRAY",
11531 _ => "SPLIT",
11532 };
11533 Ok(Expression::Function(Box::new(Function::new(
11534 name.to_string(),
11535 args,
11536 ))))
11537 }
11538 // SPLIT_TO_ARRAY(x, sep) with 2 args -> rename based on target
11539 "SPLIT_TO_ARRAY" if f.args.len() == 2 => {
11540 let name = match target {
11541 DialectType::DuckDB => "STR_SPLIT",
11542 DialectType::Presto | DialectType::Trino => "SPLIT",
11543 DialectType::Spark
11544 | DialectType::Databricks
11545 | DialectType::Hive => "SPLIT",
11546 DialectType::PostgreSQL => "STRING_TO_ARRAY",
11547 DialectType::Redshift => "SPLIT_TO_ARRAY",
11548 _ => "SPLIT",
11549 };
11550 Ok(Expression::Function(Box::new(Function::new(
11551 name.to_string(),
11552 f.args,
11553 ))))
11554 }
11555 // STRING_TO_ARRAY/STR_SPLIT -> target-specific split function
11556 "STRING_TO_ARRAY" | "STR_SPLIT" if f.args.len() >= 2 => {
11557 let name = match target {
11558 DialectType::DuckDB => "STR_SPLIT",
11559 DialectType::Presto | DialectType::Trino => "SPLIT",
11560 DialectType::Spark
11561 | DialectType::Databricks
11562 | DialectType::Hive => "SPLIT",
11563 DialectType::Doris | DialectType::StarRocks => {
11564 "SPLIT_BY_STRING"
11565 }
11566 DialectType::PostgreSQL | DialectType::Redshift => {
11567 "STRING_TO_ARRAY"
11568 }
11569 _ => "SPLIT",
11570 };
11571 // For Spark/Hive, SPLIT uses regex - need to escape literal with \Q...\E
11572 if matches!(
11573 target,
11574 DialectType::Spark
11575 | DialectType::Databricks
11576 | DialectType::Hive
11577 ) {
11578 let mut args = f.args;
11579 let x = args.remove(0);
11580 let sep = args.remove(0);
11581 // Wrap separator in CONCAT('\\Q', sep, '\\E')
11582 let escaped_sep =
11583 Expression::Function(Box::new(Function::new(
11584 "CONCAT".to_string(),
11585 vec![
11586 Expression::string("\\Q"),
11587 sep,
11588 Expression::string("\\E"),
11589 ],
11590 )));
11591 Ok(Expression::Function(Box::new(Function::new(
11592 name.to_string(),
11593 vec![x, escaped_sep],
11594 ))))
11595 } else {
11596 Ok(Expression::Function(Box::new(Function::new(
11597 name.to_string(),
11598 f.args,
11599 ))))
11600 }
11601 }
11602 // STR_SPLIT_REGEX(x, 'a') / REGEXP_SPLIT(x, 'a') -> target-specific regex split
11603 "STR_SPLIT_REGEX" | "REGEXP_SPLIT" if f.args.len() == 2 => {
11604 let name = match target {
11605 DialectType::DuckDB => "STR_SPLIT_REGEX",
11606 DialectType::Presto | DialectType::Trino => "REGEXP_SPLIT",
11607 DialectType::Spark
11608 | DialectType::Databricks
11609 | DialectType::Hive => "SPLIT",
11610 _ => "REGEXP_SPLIT",
11611 };
11612 Ok(Expression::Function(Box::new(Function::new(
11613 name.to_string(),
11614 f.args,
11615 ))))
11616 }
11617 // SPLIT(str, delim) from Snowflake -> DuckDB with CASE wrapper
11618 "SPLIT"
11619 if f.args.len() == 2
11620 && matches!(source, DialectType::Snowflake)
11621 && matches!(target, DialectType::DuckDB) =>
11622 {
11623 let mut args = f.args;
11624 let str_arg = args.remove(0);
11625 let delim_arg = args.remove(0);
11626
11627 // STR_SPLIT(str, delim) as the base
11628 let base_func = Expression::Function(Box::new(Function::new(
11629 "STR_SPLIT".to_string(),
11630 vec![str_arg.clone(), delim_arg.clone()],
11631 )));
11632
11633 // [str] - array with single element
11634 let array_with_input =
11635 Expression::Array(Box::new(crate::expressions::Array {
11636 expressions: vec![str_arg],
11637 }));
11638
11639 // CASE
11640 // WHEN delim IS NULL THEN NULL
11641 // WHEN delim = '' THEN [str]
11642 // ELSE STR_SPLIT(str, delim)
11643 // END
11644 Ok(Expression::Case(Box::new(Case {
11645 operand: None,
11646 whens: vec![
11647 (
11648 Expression::Is(Box::new(BinaryOp {
11649 left: delim_arg.clone(),
11650 right: Expression::Null(Null),
11651 left_comments: vec![],
11652 operator_comments: vec![],
11653 trailing_comments: vec![],
11654 inferred_type: None,
11655 })),
11656 Expression::Null(Null),
11657 ),
11658 (
11659 Expression::Eq(Box::new(BinaryOp {
11660 left: delim_arg,
11661 right: Expression::string(""),
11662 left_comments: vec![],
11663 operator_comments: vec![],
11664 trailing_comments: vec![],
11665 inferred_type: None,
11666 })),
11667 array_with_input,
11668 ),
11669 ],
11670 else_: Some(base_func),
11671 comments: vec![],
11672 inferred_type: None,
11673 })))
11674 }
11675 // SPLIT(x, sep) from Presto/StarRocks/Doris -> target-specific split with regex escaping for Hive/Spark
11676 "SPLIT"
11677 if f.args.len() == 2
11678 && matches!(
11679 source,
11680 DialectType::Presto
11681 | DialectType::Trino
11682 | DialectType::Athena
11683 | DialectType::StarRocks
11684 | DialectType::Doris
11685 )
11686 && matches!(
11687 target,
11688 DialectType::Spark
11689 | DialectType::Databricks
11690 | DialectType::Hive
11691 ) =>
11692 {
11693 // Presto/StarRocks SPLIT is literal, Hive/Spark SPLIT is regex
11694 let mut args = f.args;
11695 let x = args.remove(0);
11696 let sep = args.remove(0);
11697 let escaped_sep = Expression::Function(Box::new(Function::new(
11698 "CONCAT".to_string(),
11699 vec![Expression::string("\\Q"), sep, Expression::string("\\E")],
11700 )));
11701 Ok(Expression::Function(Box::new(Function::new(
11702 "SPLIT".to_string(),
11703 vec![x, escaped_sep],
11704 ))))
11705 }
11706 // SUBSTRINGINDEX -> SUBSTRING_INDEX (ClickHouse camelCase to standard)
11707 // For ClickHouse target, preserve original name to maintain camelCase
11708 "SUBSTRINGINDEX" => {
11709 let name = if matches!(target, DialectType::ClickHouse) {
11710 f.name.clone()
11711 } else {
11712 "SUBSTRING_INDEX".to_string()
11713 };
11714 Ok(Expression::Function(Box::new(Function::new(name, f.args))))
11715 }
11716 // ARRAY_LENGTH/SIZE/CARDINALITY -> target-specific array length function
11717 "ARRAY_LENGTH" | "SIZE" | "CARDINALITY" => {
11718 // DuckDB source CARDINALITY -> DuckDB target: keep as CARDINALITY (used for maps)
11719 if name == "CARDINALITY"
11720 && matches!(source, DialectType::DuckDB)
11721 && matches!(target, DialectType::DuckDB)
11722 {
11723 return Ok(Expression::Function(f));
11724 }
11725 // Get the array argument (first arg, drop dimension args)
11726 let mut args = f.args;
11727 let arr = if args.is_empty() {
11728 return Ok(Expression::Function(Box::new(Function::new(
11729 name.to_string(),
11730 args,
11731 ))));
11732 } else {
11733 args.remove(0)
11734 };
11735 let name =
11736 match target {
11737 DialectType::Spark
11738 | DialectType::Databricks
11739 | DialectType::Hive => "SIZE",
11740 DialectType::Presto | DialectType::Trino => "CARDINALITY",
11741 DialectType::BigQuery => "ARRAY_LENGTH",
11742 DialectType::DuckDB => {
11743 // DuckDB: use ARRAY_LENGTH with all args
11744 let mut all_args = vec![arr];
11745 all_args.extend(args);
11746 return Ok(Expression::Function(Box::new(
11747 Function::new("ARRAY_LENGTH".to_string(), all_args),
11748 )));
11749 }
11750 DialectType::PostgreSQL | DialectType::Redshift => {
11751 // Keep ARRAY_LENGTH with dimension arg
11752 let mut all_args = vec![arr];
11753 all_args.extend(args);
11754 return Ok(Expression::Function(Box::new(
11755 Function::new("ARRAY_LENGTH".to_string(), all_args),
11756 )));
11757 }
11758 DialectType::ClickHouse => "LENGTH",
11759 _ => "ARRAY_LENGTH",
11760 };
11761 Ok(Expression::Function(Box::new(Function::new(
11762 name.to_string(),
11763 vec![arr],
11764 ))))
11765 }
11766 // TO_VARIANT(x) -> CAST(x AS VARIANT) for DuckDB
11767 "TO_VARIANT" if f.args.len() == 1 => match target {
11768 DialectType::DuckDB => {
11769 let arg = f.args.into_iter().next().unwrap();
11770 Ok(Expression::Cast(Box::new(Cast {
11771 this: arg,
11772 to: DataType::Custom {
11773 name: "VARIANT".to_string(),
11774 },
11775 double_colon_syntax: false,
11776 trailing_comments: Vec::new(),
11777 format: None,
11778 default: None,
11779 inferred_type: None,
11780 })))
11781 }
11782 _ => Ok(Expression::Function(f)),
11783 },
11784 // JSON_GROUP_ARRAY(x) -> JSON_AGG(x) for PostgreSQL
11785 "JSON_GROUP_ARRAY" if f.args.len() == 1 => match target {
11786 DialectType::PostgreSQL => Ok(Expression::Function(Box::new(
11787 Function::new("JSON_AGG".to_string(), f.args),
11788 ))),
11789 _ => Ok(Expression::Function(f)),
11790 },
11791 // JSON_GROUP_OBJECT(key, value) -> JSON_OBJECT_AGG(key, value) for PostgreSQL
11792 "JSON_GROUP_OBJECT" if f.args.len() == 2 => match target {
11793 DialectType::PostgreSQL => Ok(Expression::Function(Box::new(
11794 Function::new("JSON_OBJECT_AGG".to_string(), f.args),
11795 ))),
11796 _ => Ok(Expression::Function(f)),
11797 },
11798 // UNICODE(x) -> target-specific codepoint function
11799 "UNICODE" if f.args.len() == 1 => {
11800 match target {
11801 DialectType::SQLite | DialectType::DuckDB => {
11802 Ok(Expression::Function(Box::new(Function::new(
11803 "UNICODE".to_string(),
11804 f.args,
11805 ))))
11806 }
11807 DialectType::Oracle => {
11808 // ASCII(UNISTR(x))
11809 let inner = Expression::Function(Box::new(Function::new(
11810 "UNISTR".to_string(),
11811 f.args,
11812 )));
11813 Ok(Expression::Function(Box::new(Function::new(
11814 "ASCII".to_string(),
11815 vec![inner],
11816 ))))
11817 }
11818 DialectType::MySQL => {
11819 // ORD(CONVERT(x USING utf32))
11820 let arg = f.args.into_iter().next().unwrap();
11821 let convert_expr = Expression::ConvertToCharset(Box::new(
11822 crate::expressions::ConvertToCharset {
11823 this: Box::new(arg),
11824 dest: Some(Box::new(Expression::Identifier(
11825 crate::expressions::Identifier::new("utf32"),
11826 ))),
11827 source: None,
11828 },
11829 ));
11830 Ok(Expression::Function(Box::new(Function::new(
11831 "ORD".to_string(),
11832 vec![convert_expr],
11833 ))))
11834 }
11835 _ => Ok(Expression::Function(Box::new(Function::new(
11836 "ASCII".to_string(),
11837 f.args,
11838 )))),
11839 }
11840 }
11841 // XOR(a, b, ...) -> a XOR b XOR ... for MySQL, BITWISE_XOR for Presto/Trino, # for PostgreSQL, ^ for BigQuery
11842 "XOR" if f.args.len() >= 2 => {
11843 match target {
11844 DialectType::ClickHouse => {
11845 // ClickHouse: keep as xor() function with lowercase name
11846 Ok(Expression::Function(Box::new(Function::new(
11847 "xor".to_string(),
11848 f.args,
11849 ))))
11850 }
11851 DialectType::Presto | DialectType::Trino => {
11852 if f.args.len() == 2 {
11853 Ok(Expression::Function(Box::new(Function::new(
11854 "BITWISE_XOR".to_string(),
11855 f.args,
11856 ))))
11857 } else {
11858 // Nest: BITWISE_XOR(BITWISE_XOR(a, b), c)
11859 let mut args = f.args;
11860 let first = args.remove(0);
11861 let second = args.remove(0);
11862 let mut result =
11863 Expression::Function(Box::new(Function::new(
11864 "BITWISE_XOR".to_string(),
11865 vec![first, second],
11866 )));
11867 for arg in args {
11868 result =
11869 Expression::Function(Box::new(Function::new(
11870 "BITWISE_XOR".to_string(),
11871 vec![result, arg],
11872 )));
11873 }
11874 Ok(result)
11875 }
11876 }
11877 DialectType::MySQL
11878 | DialectType::SingleStore
11879 | DialectType::Doris
11880 | DialectType::StarRocks => {
11881 // Convert XOR(a, b, c) -> Expression::Xor with expressions list
11882 let args = f.args;
11883 Ok(Expression::Xor(Box::new(crate::expressions::Xor {
11884 this: None,
11885 expression: None,
11886 expressions: args,
11887 })))
11888 }
11889 DialectType::PostgreSQL | DialectType::Redshift => {
11890 // PostgreSQL: a # b (hash operator for XOR)
11891 let mut args = f.args;
11892 let first = args.remove(0);
11893 let second = args.remove(0);
11894 let mut result = Expression::BitwiseXor(Box::new(
11895 BinaryOp::new(first, second),
11896 ));
11897 for arg in args {
11898 result = Expression::BitwiseXor(Box::new(
11899 BinaryOp::new(result, arg),
11900 ));
11901 }
11902 Ok(result)
11903 }
11904 DialectType::DuckDB => {
11905 // DuckDB: keep as XOR function (DuckDB ^ is Power, not XOR)
11906 Ok(Expression::Function(Box::new(Function::new(
11907 "XOR".to_string(),
11908 f.args,
11909 ))))
11910 }
11911 DialectType::BigQuery => {
11912 // BigQuery: a ^ b (caret operator for XOR)
11913 let mut args = f.args;
11914 let first = args.remove(0);
11915 let second = args.remove(0);
11916 let mut result = Expression::BitwiseXor(Box::new(
11917 BinaryOp::new(first, second),
11918 ));
11919 for arg in args {
11920 result = Expression::BitwiseXor(Box::new(
11921 BinaryOp::new(result, arg),
11922 ));
11923 }
11924 Ok(result)
11925 }
11926 _ => Ok(Expression::Function(Box::new(Function::new(
11927 "XOR".to_string(),
11928 f.args,
11929 )))),
11930 }
11931 }
11932 // ARRAY_REVERSE_SORT(x) -> SORT_ARRAY(x, FALSE) for Spark/Hive, ARRAY_SORT(x, lambda) for Presto
11933 "ARRAY_REVERSE_SORT" if f.args.len() >= 1 => {
11934 match target {
11935 DialectType::Spark
11936 | DialectType::Databricks
11937 | DialectType::Hive => {
11938 let mut args = f.args;
11939 args.push(Expression::Identifier(
11940 crate::expressions::Identifier::new("FALSE"),
11941 ));
11942 Ok(Expression::Function(Box::new(Function::new(
11943 "SORT_ARRAY".to_string(),
11944 args,
11945 ))))
11946 }
11947 DialectType::Presto
11948 | DialectType::Trino
11949 | DialectType::Athena => {
11950 // ARRAY_SORT(x, (a, b) -> CASE WHEN a < b THEN 1 WHEN a > b THEN -1 ELSE 0 END)
11951 let arr = f.args.into_iter().next().unwrap();
11952 let lambda = Expression::Lambda(Box::new(
11953 crate::expressions::LambdaExpr {
11954 parameters: vec![
11955 Identifier::new("a"),
11956 Identifier::new("b"),
11957 ],
11958 colon: false,
11959 parameter_types: Vec::new(),
11960 body: Expression::Case(Box::new(Case {
11961 operand: None,
11962 whens: vec![
11963 (
11964 Expression::Lt(Box::new(
11965 BinaryOp::new(
11966 Expression::Identifier(
11967 Identifier::new("a"),
11968 ),
11969 Expression::Identifier(
11970 Identifier::new("b"),
11971 ),
11972 ),
11973 )),
11974 Expression::number(1),
11975 ),
11976 (
11977 Expression::Gt(Box::new(
11978 BinaryOp::new(
11979 Expression::Identifier(
11980 Identifier::new("a"),
11981 ),
11982 Expression::Identifier(
11983 Identifier::new("b"),
11984 ),
11985 ),
11986 )),
11987 Expression::Neg(Box::new(
11988 crate::expressions::UnaryOp {
11989 this: Expression::number(1),
11990 inferred_type: None,
11991 },
11992 )),
11993 ),
11994 ],
11995 else_: Some(Expression::number(0)),
11996 comments: Vec::new(),
11997 inferred_type: None,
11998 })),
11999 },
12000 ));
12001 Ok(Expression::Function(Box::new(Function::new(
12002 "ARRAY_SORT".to_string(),
12003 vec![arr, lambda],
12004 ))))
12005 }
12006 _ => Ok(Expression::Function(Box::new(Function::new(
12007 "ARRAY_REVERSE_SORT".to_string(),
12008 f.args,
12009 )))),
12010 }
12011 }
12012 // ENCODE(x) -> ENCODE(x, 'utf-8') for Spark/Hive, TO_UTF8(x) for Presto
12013 "ENCODE" if f.args.len() == 1 => match target {
12014 DialectType::Spark
12015 | DialectType::Databricks
12016 | DialectType::Hive => {
12017 let mut args = f.args;
12018 args.push(Expression::string("utf-8"));
12019 Ok(Expression::Function(Box::new(Function::new(
12020 "ENCODE".to_string(),
12021 args,
12022 ))))
12023 }
12024 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
12025 Ok(Expression::Function(Box::new(Function::new(
12026 "TO_UTF8".to_string(),
12027 f.args,
12028 ))))
12029 }
12030 _ => Ok(Expression::Function(Box::new(Function::new(
12031 "ENCODE".to_string(),
12032 f.args,
12033 )))),
12034 },
12035 // DECODE(x) -> DECODE(x, 'utf-8') for Spark/Hive, FROM_UTF8(x) for Presto
12036 "DECODE" if f.args.len() == 1 => match target {
12037 DialectType::Spark
12038 | DialectType::Databricks
12039 | DialectType::Hive => {
12040 let mut args = f.args;
12041 args.push(Expression::string("utf-8"));
12042 Ok(Expression::Function(Box::new(Function::new(
12043 "DECODE".to_string(),
12044 args,
12045 ))))
12046 }
12047 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
12048 Ok(Expression::Function(Box::new(Function::new(
12049 "FROM_UTF8".to_string(),
12050 f.args,
12051 ))))
12052 }
12053 _ => Ok(Expression::Function(Box::new(Function::new(
12054 "DECODE".to_string(),
12055 f.args,
12056 )))),
12057 },
12058 // QUANTILE(x, p) -> PERCENTILE(x, p) for Spark/Hive
12059 "QUANTILE" if f.args.len() == 2 => {
12060 let name = match target {
12061 DialectType::Spark
12062 | DialectType::Databricks
12063 | DialectType::Hive => "PERCENTILE",
12064 DialectType::Presto | DialectType::Trino => "APPROX_PERCENTILE",
12065 DialectType::BigQuery => "PERCENTILE_CONT",
12066 _ => "QUANTILE",
12067 };
12068 Ok(Expression::Function(Box::new(Function::new(
12069 name.to_string(),
12070 f.args,
12071 ))))
12072 }
12073 // QUANTILE_CONT(x, q) -> PERCENTILE_CONT(q) WITHIN GROUP (ORDER BY x) for PostgreSQL/Snowflake
12074 "QUANTILE_CONT" if f.args.len() == 2 => {
12075 let mut args = f.args;
12076 let column = args.remove(0);
12077 let quantile = args.remove(0);
12078 match target {
12079 DialectType::DuckDB => {
12080 Ok(Expression::Function(Box::new(Function::new(
12081 "QUANTILE_CONT".to_string(),
12082 vec![column, quantile],
12083 ))))
12084 }
12085 DialectType::PostgreSQL
12086 | DialectType::Redshift
12087 | DialectType::Snowflake => {
12088 // PERCENTILE_CONT(q) WITHIN GROUP (ORDER BY x)
12089 let inner = Expression::PercentileCont(Box::new(
12090 crate::expressions::PercentileFunc {
12091 this: column.clone(),
12092 percentile: quantile,
12093 order_by: None,
12094 filter: None,
12095 },
12096 ));
12097 Ok(Expression::WithinGroup(Box::new(
12098 crate::expressions::WithinGroup {
12099 this: inner,
12100 order_by: vec![crate::expressions::Ordered {
12101 this: column,
12102 desc: false,
12103 nulls_first: None,
12104 explicit_asc: false,
12105 with_fill: None,
12106 }],
12107 },
12108 )))
12109 }
12110 _ => Ok(Expression::Function(Box::new(Function::new(
12111 "QUANTILE_CONT".to_string(),
12112 vec![column, quantile],
12113 )))),
12114 }
12115 }
12116 // QUANTILE_DISC(x, q) -> PERCENTILE_DISC(q) WITHIN GROUP (ORDER BY x) for PostgreSQL/Snowflake
12117 "QUANTILE_DISC" if f.args.len() == 2 => {
12118 let mut args = f.args;
12119 let column = args.remove(0);
12120 let quantile = args.remove(0);
12121 match target {
12122 DialectType::DuckDB => {
12123 Ok(Expression::Function(Box::new(Function::new(
12124 "QUANTILE_DISC".to_string(),
12125 vec![column, quantile],
12126 ))))
12127 }
12128 DialectType::PostgreSQL
12129 | DialectType::Redshift
12130 | DialectType::Snowflake => {
12131 // PERCENTILE_DISC(q) WITHIN GROUP (ORDER BY x)
12132 let inner = Expression::PercentileDisc(Box::new(
12133 crate::expressions::PercentileFunc {
12134 this: column.clone(),
12135 percentile: quantile,
12136 order_by: None,
12137 filter: None,
12138 },
12139 ));
12140 Ok(Expression::WithinGroup(Box::new(
12141 crate::expressions::WithinGroup {
12142 this: inner,
12143 order_by: vec![crate::expressions::Ordered {
12144 this: column,
12145 desc: false,
12146 nulls_first: None,
12147 explicit_asc: false,
12148 with_fill: None,
12149 }],
12150 },
12151 )))
12152 }
12153 _ => Ok(Expression::Function(Box::new(Function::new(
12154 "QUANTILE_DISC".to_string(),
12155 vec![column, quantile],
12156 )))),
12157 }
12158 }
12159 // PERCENTILE_APPROX(x, p) / APPROX_PERCENTILE(x, p) -> target-specific
12160 "PERCENTILE_APPROX" | "APPROX_PERCENTILE" if f.args.len() >= 2 => {
12161 let name = match target {
12162 DialectType::Presto
12163 | DialectType::Trino
12164 | DialectType::Athena => "APPROX_PERCENTILE",
12165 DialectType::Spark
12166 | DialectType::Databricks
12167 | DialectType::Hive => "PERCENTILE_APPROX",
12168 DialectType::DuckDB => "APPROX_QUANTILE",
12169 DialectType::PostgreSQL | DialectType::Redshift => {
12170 "PERCENTILE_CONT"
12171 }
12172 _ => &f.name,
12173 };
12174 Ok(Expression::Function(Box::new(Function::new(
12175 name.to_string(),
12176 f.args,
12177 ))))
12178 }
12179 // EPOCH(x) -> UNIX_TIMESTAMP(x) for Spark/Hive
12180 "EPOCH" if f.args.len() == 1 => {
12181 let name = match target {
12182 DialectType::Spark
12183 | DialectType::Databricks
12184 | DialectType::Hive => "UNIX_TIMESTAMP",
12185 DialectType::Presto | DialectType::Trino => "TO_UNIXTIME",
12186 _ => "EPOCH",
12187 };
12188 Ok(Expression::Function(Box::new(Function::new(
12189 name.to_string(),
12190 f.args,
12191 ))))
12192 }
12193 // EPOCH_MS(x) -> target-specific epoch milliseconds conversion
12194 "EPOCH_MS" if f.args.len() == 1 => {
12195 match target {
12196 DialectType::Spark | DialectType::Databricks => {
12197 Ok(Expression::Function(Box::new(Function::new(
12198 "TIMESTAMP_MILLIS".to_string(),
12199 f.args,
12200 ))))
12201 }
12202 DialectType::Hive => {
12203 // Hive: FROM_UNIXTIME(x / 1000)
12204 let arg = f.args.into_iter().next().unwrap();
12205 let div_expr = Expression::Div(Box::new(
12206 crate::expressions::BinaryOp::new(
12207 arg,
12208 Expression::number(1000),
12209 ),
12210 ));
12211 Ok(Expression::Function(Box::new(Function::new(
12212 "FROM_UNIXTIME".to_string(),
12213 vec![div_expr],
12214 ))))
12215 }
12216 DialectType::Presto | DialectType::Trino => {
12217 Ok(Expression::Function(Box::new(Function::new(
12218 "FROM_UNIXTIME".to_string(),
12219 vec![Expression::Div(Box::new(
12220 crate::expressions::BinaryOp::new(
12221 f.args.into_iter().next().unwrap(),
12222 Expression::number(1000),
12223 ),
12224 ))],
12225 ))))
12226 }
12227 _ => Ok(Expression::Function(Box::new(Function::new(
12228 "EPOCH_MS".to_string(),
12229 f.args,
12230 )))),
12231 }
12232 }
12233 // HASHBYTES('algorithm', x) -> target-specific hash function
12234 "HASHBYTES" if f.args.len() == 2 => {
12235 // Keep HASHBYTES as-is for TSQL target
12236 if matches!(target, DialectType::TSQL) {
12237 return Ok(Expression::Function(f));
12238 }
12239 let algo_expr = &f.args[0];
12240 let algo = match algo_expr {
12241 Expression::Literal(lit)
12242 if matches!(
12243 lit.as_ref(),
12244 crate::expressions::Literal::String(_)
12245 ) =>
12246 {
12247 let crate::expressions::Literal::String(s) = lit.as_ref()
12248 else {
12249 unreachable!()
12250 };
12251 s.to_ascii_uppercase()
12252 }
12253 _ => return Ok(Expression::Function(f)),
12254 };
12255 let data_arg = f.args.into_iter().nth(1).unwrap();
12256 match algo.as_str() {
12257 "SHA1" => {
12258 let name = match target {
12259 DialectType::Spark | DialectType::Databricks => "SHA",
12260 DialectType::Hive => "SHA1",
12261 _ => "SHA1",
12262 };
12263 Ok(Expression::Function(Box::new(Function::new(
12264 name.to_string(),
12265 vec![data_arg],
12266 ))))
12267 }
12268 "SHA2_256" => {
12269 Ok(Expression::Function(Box::new(Function::new(
12270 "SHA2".to_string(),
12271 vec![data_arg, Expression::number(256)],
12272 ))))
12273 }
12274 "SHA2_512" => {
12275 Ok(Expression::Function(Box::new(Function::new(
12276 "SHA2".to_string(),
12277 vec![data_arg, Expression::number(512)],
12278 ))))
12279 }
12280 "MD5" => Ok(Expression::Function(Box::new(Function::new(
12281 "MD5".to_string(),
12282 vec![data_arg],
12283 )))),
12284 _ => Ok(Expression::Function(Box::new(Function::new(
12285 "HASHBYTES".to_string(),
12286 vec![Expression::string(&algo), data_arg],
12287 )))),
12288 }
12289 }
12290 // JSON_EXTRACT_PATH(json, key1, key2, ...) -> target-specific JSON extraction
12291 "JSON_EXTRACT_PATH" | "JSON_EXTRACT_PATH_TEXT" if f.args.len() >= 2 => {
12292 let is_text = name == "JSON_EXTRACT_PATH_TEXT";
12293 let mut args = f.args;
12294 let json_expr = args.remove(0);
12295 // Build JSON path from remaining keys: $.key1.key2 or $.key1[0]
12296 let mut json_path = "$".to_string();
12297 for a in &args {
12298 match a {
12299 Expression::Literal(lit)
12300 if matches!(
12301 lit.as_ref(),
12302 crate::expressions::Literal::String(_)
12303 ) =>
12304 {
12305 let crate::expressions::Literal::String(s) =
12306 lit.as_ref()
12307 else {
12308 unreachable!()
12309 };
12310 // Numeric string keys become array indices: [0]
12311 if s.chars().all(|c| c.is_ascii_digit()) {
12312 json_path.push('[');
12313 json_path.push_str(s);
12314 json_path.push(']');
12315 } else {
12316 json_path.push('.');
12317 json_path.push_str(s);
12318 }
12319 }
12320 _ => {
12321 json_path.push_str(".?");
12322 }
12323 }
12324 }
12325 match target {
12326 DialectType::Spark
12327 | DialectType::Databricks
12328 | DialectType::Hive => {
12329 Ok(Expression::Function(Box::new(Function::new(
12330 "GET_JSON_OBJECT".to_string(),
12331 vec![json_expr, Expression::string(&json_path)],
12332 ))))
12333 }
12334 DialectType::Presto | DialectType::Trino => {
12335 let func_name = if is_text {
12336 "JSON_EXTRACT_SCALAR"
12337 } else {
12338 "JSON_EXTRACT"
12339 };
12340 Ok(Expression::Function(Box::new(Function::new(
12341 func_name.to_string(),
12342 vec![json_expr, Expression::string(&json_path)],
12343 ))))
12344 }
12345 DialectType::BigQuery | DialectType::MySQL => {
12346 let func_name = if is_text {
12347 "JSON_EXTRACT_SCALAR"
12348 } else {
12349 "JSON_EXTRACT"
12350 };
12351 Ok(Expression::Function(Box::new(Function::new(
12352 func_name.to_string(),
12353 vec![json_expr, Expression::string(&json_path)],
12354 ))))
12355 }
12356 DialectType::PostgreSQL | DialectType::Materialize => {
12357 // Keep as JSON_EXTRACT_PATH_TEXT / JSON_EXTRACT_PATH for PostgreSQL/Materialize
12358 let func_name = if is_text {
12359 "JSON_EXTRACT_PATH_TEXT"
12360 } else {
12361 "JSON_EXTRACT_PATH"
12362 };
12363 let mut new_args = vec![json_expr];
12364 new_args.extend(args);
12365 Ok(Expression::Function(Box::new(Function::new(
12366 func_name.to_string(),
12367 new_args,
12368 ))))
12369 }
12370 DialectType::DuckDB | DialectType::SQLite => {
12371 // Use -> for JSON_EXTRACT_PATH, ->> for JSON_EXTRACT_PATH_TEXT
12372 if is_text {
12373 Ok(Expression::JsonExtractScalar(Box::new(
12374 crate::expressions::JsonExtractFunc {
12375 this: json_expr,
12376 path: Expression::string(&json_path),
12377 returning: None,
12378 arrow_syntax: true,
12379 hash_arrow_syntax: false,
12380 wrapper_option: None,
12381 quotes_option: None,
12382 on_scalar_string: false,
12383 on_error: None,
12384 },
12385 )))
12386 } else {
12387 Ok(Expression::JsonExtract(Box::new(
12388 crate::expressions::JsonExtractFunc {
12389 this: json_expr,
12390 path: Expression::string(&json_path),
12391 returning: None,
12392 arrow_syntax: true,
12393 hash_arrow_syntax: false,
12394 wrapper_option: None,
12395 quotes_option: None,
12396 on_scalar_string: false,
12397 on_error: None,
12398 },
12399 )))
12400 }
12401 }
12402 DialectType::Redshift => {
12403 // Keep as JSON_EXTRACT_PATH_TEXT for Redshift
12404 let mut new_args = vec![json_expr];
12405 new_args.extend(args);
12406 Ok(Expression::Function(Box::new(Function::new(
12407 "JSON_EXTRACT_PATH_TEXT".to_string(),
12408 new_args,
12409 ))))
12410 }
12411 DialectType::TSQL => {
12412 // ISNULL(JSON_QUERY(json, '$.path'), JSON_VALUE(json, '$.path'))
12413 let jq = Expression::Function(Box::new(Function::new(
12414 "JSON_QUERY".to_string(),
12415 vec![json_expr.clone(), Expression::string(&json_path)],
12416 )));
12417 let jv = Expression::Function(Box::new(Function::new(
12418 "JSON_VALUE".to_string(),
12419 vec![json_expr, Expression::string(&json_path)],
12420 )));
12421 Ok(Expression::Function(Box::new(Function::new(
12422 "ISNULL".to_string(),
12423 vec![jq, jv],
12424 ))))
12425 }
12426 DialectType::ClickHouse => {
12427 let func_name = if is_text {
12428 "JSONExtractString"
12429 } else {
12430 "JSONExtractRaw"
12431 };
12432 let mut new_args = vec![json_expr];
12433 new_args.extend(args);
12434 Ok(Expression::Function(Box::new(Function::new(
12435 func_name.to_string(),
12436 new_args,
12437 ))))
12438 }
12439 _ => {
12440 let func_name = if is_text {
12441 "JSON_EXTRACT_SCALAR"
12442 } else {
12443 "JSON_EXTRACT"
12444 };
12445 Ok(Expression::Function(Box::new(Function::new(
12446 func_name.to_string(),
12447 vec![json_expr, Expression::string(&json_path)],
12448 ))))
12449 }
12450 }
12451 }
12452 // APPROX_DISTINCT(x) -> APPROX_COUNT_DISTINCT(x) for Spark/Hive/BigQuery
12453 "APPROX_DISTINCT" if f.args.len() >= 1 => {
12454 let name = match target {
12455 DialectType::Spark
12456 | DialectType::Databricks
12457 | DialectType::Hive
12458 | DialectType::BigQuery => "APPROX_COUNT_DISTINCT",
12459 _ => "APPROX_DISTINCT",
12460 };
12461 let mut args = f.args;
12462 // Hive doesn't support the accuracy parameter
12463 if name == "APPROX_COUNT_DISTINCT"
12464 && matches!(target, DialectType::Hive)
12465 {
12466 args.truncate(1);
12467 }
12468 Ok(Expression::Function(Box::new(Function::new(
12469 name.to_string(),
12470 args,
12471 ))))
12472 }
12473 // REGEXP_EXTRACT(x, pattern) - normalize default group index
12474 "REGEXP_EXTRACT" if f.args.len() == 2 => {
12475 // Determine source default group index
12476 let source_default = match source {
12477 DialectType::Presto
12478 | DialectType::Trino
12479 | DialectType::DuckDB => 0,
12480 _ => 1, // Hive/Spark/Databricks default = 1
12481 };
12482 // Determine target default group index
12483 let target_default = match target {
12484 DialectType::Presto
12485 | DialectType::Trino
12486 | DialectType::DuckDB
12487 | DialectType::BigQuery => 0,
12488 DialectType::Snowflake => {
12489 // Snowflake uses REGEXP_SUBSTR
12490 return Ok(Expression::Function(Box::new(Function::new(
12491 "REGEXP_SUBSTR".to_string(),
12492 f.args,
12493 ))));
12494 }
12495 _ => 1, // Hive/Spark/Databricks default = 1
12496 };
12497 if source_default != target_default {
12498 let mut args = f.args;
12499 args.push(Expression::number(source_default));
12500 Ok(Expression::Function(Box::new(Function::new(
12501 "REGEXP_EXTRACT".to_string(),
12502 args,
12503 ))))
12504 } else {
12505 Ok(Expression::Function(Box::new(Function::new(
12506 "REGEXP_EXTRACT".to_string(),
12507 f.args,
12508 ))))
12509 }
12510 }
12511 // RLIKE(str, pattern) -> RegexpLike expression (generates as target-specific form)
12512 "RLIKE" if f.args.len() == 2 => {
12513 let mut args = f.args;
12514 let str_expr = args.remove(0);
12515 let pattern = args.remove(0);
12516 match target {
12517 DialectType::DuckDB => {
12518 // REGEXP_MATCHES(str, pattern)
12519 Ok(Expression::Function(Box::new(Function::new(
12520 "REGEXP_MATCHES".to_string(),
12521 vec![str_expr, pattern],
12522 ))))
12523 }
12524 _ => {
12525 // Convert to RegexpLike which generates as RLIKE/~/REGEXP_LIKE per dialect
12526 Ok(Expression::RegexpLike(Box::new(
12527 crate::expressions::RegexpFunc {
12528 this: str_expr,
12529 pattern,
12530 flags: None,
12531 },
12532 )))
12533 }
12534 }
12535 }
12536 // EOMONTH(date[, month_offset]) -> target-specific
12537 "EOMONTH" if f.args.len() >= 1 => {
12538 let mut args = f.args;
12539 let date_arg = args.remove(0);
12540 let month_offset = if !args.is_empty() {
12541 Some(args.remove(0))
12542 } else {
12543 None
12544 };
12545
12546 // Helper: wrap date in CAST to DATE
12547 let cast_to_date = |e: Expression| -> Expression {
12548 Expression::Cast(Box::new(Cast {
12549 this: e,
12550 to: DataType::Date,
12551 trailing_comments: vec![],
12552 double_colon_syntax: false,
12553 format: None,
12554 default: None,
12555 inferred_type: None,
12556 }))
12557 };
12558
12559 match target {
12560 DialectType::TSQL | DialectType::Fabric => {
12561 // TSQL: EOMONTH(CAST(date AS DATE)) or EOMONTH(DATEADD(MONTH, offset, CAST(date AS DATE)))
12562 let date = cast_to_date(date_arg);
12563 let date = if let Some(offset) = month_offset {
12564 Expression::Function(Box::new(Function::new(
12565 "DATEADD".to_string(),
12566 vec![
12567 Expression::Identifier(Identifier::new(
12568 "MONTH",
12569 )),
12570 offset,
12571 date,
12572 ],
12573 )))
12574 } else {
12575 date
12576 };
12577 Ok(Expression::Function(Box::new(Function::new(
12578 "EOMONTH".to_string(),
12579 vec![date],
12580 ))))
12581 }
12582 DialectType::Presto
12583 | DialectType::Trino
12584 | DialectType::Athena => {
12585 // Presto: LAST_DAY_OF_MONTH(CAST(CAST(date AS TIMESTAMP) AS DATE))
12586 // or with offset: LAST_DAY_OF_MONTH(DATE_ADD('MONTH', offset, CAST(CAST(date AS TIMESTAMP) AS DATE)))
12587 let cast_ts = Expression::Cast(Box::new(Cast {
12588 this: date_arg,
12589 to: DataType::Timestamp {
12590 timezone: false,
12591 precision: None,
12592 },
12593 trailing_comments: vec![],
12594 double_colon_syntax: false,
12595 format: None,
12596 default: None,
12597 inferred_type: None,
12598 }));
12599 let date = cast_to_date(cast_ts);
12600 let date = if let Some(offset) = month_offset {
12601 Expression::Function(Box::new(Function::new(
12602 "DATE_ADD".to_string(),
12603 vec![Expression::string("MONTH"), offset, date],
12604 )))
12605 } else {
12606 date
12607 };
12608 Ok(Expression::Function(Box::new(Function::new(
12609 "LAST_DAY_OF_MONTH".to_string(),
12610 vec![date],
12611 ))))
12612 }
12613 DialectType::PostgreSQL => {
12614 // PostgreSQL: CAST(DATE_TRUNC('MONTH', CAST(date AS DATE) [+ INTERVAL 'offset MONTH']) + INTERVAL '1 MONTH' - INTERVAL '1 DAY' AS DATE)
12615 let date = cast_to_date(date_arg);
12616 let date = if let Some(offset) = month_offset {
12617 let interval_str = format!(
12618 "{} MONTH",
12619 Self::expr_to_string_static(&offset)
12620 );
12621 Expression::Add(Box::new(
12622 crate::expressions::BinaryOp::new(
12623 date,
12624 Expression::Interval(Box::new(
12625 crate::expressions::Interval {
12626 this: Some(Expression::string(
12627 &interval_str,
12628 )),
12629 unit: None,
12630 },
12631 )),
12632 ),
12633 ))
12634 } else {
12635 date
12636 };
12637 let truncated =
12638 Expression::Function(Box::new(Function::new(
12639 "DATE_TRUNC".to_string(),
12640 vec![Expression::string("MONTH"), date],
12641 )));
12642 let plus_month = Expression::Add(Box::new(
12643 crate::expressions::BinaryOp::new(
12644 truncated,
12645 Expression::Interval(Box::new(
12646 crate::expressions::Interval {
12647 this: Some(Expression::string("1 MONTH")),
12648 unit: None,
12649 },
12650 )),
12651 ),
12652 ));
12653 let minus_day = Expression::Sub(Box::new(
12654 crate::expressions::BinaryOp::new(
12655 plus_month,
12656 Expression::Interval(Box::new(
12657 crate::expressions::Interval {
12658 this: Some(Expression::string("1 DAY")),
12659 unit: None,
12660 },
12661 )),
12662 ),
12663 ));
12664 Ok(Expression::Cast(Box::new(Cast {
12665 this: minus_day,
12666 to: DataType::Date,
12667 trailing_comments: vec![],
12668 double_colon_syntax: false,
12669 format: None,
12670 default: None,
12671 inferred_type: None,
12672 })))
12673 }
12674 DialectType::DuckDB => {
12675 // DuckDB: LAST_DAY(CAST(date AS DATE) [+ INTERVAL (offset) MONTH])
12676 let date = cast_to_date(date_arg);
12677 let date = if let Some(offset) = month_offset {
12678 // Wrap negative numbers in parentheses for DuckDB INTERVAL
12679 let interval_val =
12680 if matches!(&offset, Expression::Neg(_)) {
12681 Expression::Paren(Box::new(
12682 crate::expressions::Paren {
12683 this: offset,
12684 trailing_comments: Vec::new(),
12685 },
12686 ))
12687 } else {
12688 offset
12689 };
12690 Expression::Add(Box::new(crate::expressions::BinaryOp::new(
12691 date,
12692 Expression::Interval(Box::new(crate::expressions::Interval {
12693 this: Some(interval_val),
12694 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
12695 unit: crate::expressions::IntervalUnit::Month,
12696 use_plural: false,
12697 }),
12698 })),
12699 )))
12700 } else {
12701 date
12702 };
12703 Ok(Expression::Function(Box::new(Function::new(
12704 "LAST_DAY".to_string(),
12705 vec![date],
12706 ))))
12707 }
12708 DialectType::Snowflake | DialectType::Redshift => {
12709 // Snowflake/Redshift: LAST_DAY(TO_DATE(date) or CAST(date AS DATE))
12710 // With offset: LAST_DAY(DATEADD(MONTH, offset, TO_DATE(date)))
12711 let date = if matches!(target, DialectType::Snowflake) {
12712 Expression::Function(Box::new(Function::new(
12713 "TO_DATE".to_string(),
12714 vec![date_arg],
12715 )))
12716 } else {
12717 cast_to_date(date_arg)
12718 };
12719 let date = if let Some(offset) = month_offset {
12720 Expression::Function(Box::new(Function::new(
12721 "DATEADD".to_string(),
12722 vec![
12723 Expression::Identifier(Identifier::new(
12724 "MONTH",
12725 )),
12726 offset,
12727 date,
12728 ],
12729 )))
12730 } else {
12731 date
12732 };
12733 Ok(Expression::Function(Box::new(Function::new(
12734 "LAST_DAY".to_string(),
12735 vec![date],
12736 ))))
12737 }
12738 DialectType::Spark | DialectType::Databricks => {
12739 // Spark: LAST_DAY(TO_DATE(date))
12740 // With offset: LAST_DAY(ADD_MONTHS(TO_DATE(date), offset))
12741 let date = Expression::Function(Box::new(Function::new(
12742 "TO_DATE".to_string(),
12743 vec![date_arg],
12744 )));
12745 let date = if let Some(offset) = month_offset {
12746 Expression::Function(Box::new(Function::new(
12747 "ADD_MONTHS".to_string(),
12748 vec![date, offset],
12749 )))
12750 } else {
12751 date
12752 };
12753 Ok(Expression::Function(Box::new(Function::new(
12754 "LAST_DAY".to_string(),
12755 vec![date],
12756 ))))
12757 }
12758 DialectType::MySQL => {
12759 // MySQL: LAST_DAY(DATE(date)) - no offset
12760 // With offset: LAST_DAY(DATE_ADD(date, INTERVAL offset MONTH)) - no DATE() wrapper
12761 let date = if let Some(offset) = month_offset {
12762 let iu = crate::expressions::IntervalUnit::Month;
12763 Expression::DateAdd(Box::new(
12764 crate::expressions::DateAddFunc {
12765 this: date_arg,
12766 interval: offset,
12767 unit: iu,
12768 },
12769 ))
12770 } else {
12771 Expression::Function(Box::new(Function::new(
12772 "DATE".to_string(),
12773 vec![date_arg],
12774 )))
12775 };
12776 Ok(Expression::Function(Box::new(Function::new(
12777 "LAST_DAY".to_string(),
12778 vec![date],
12779 ))))
12780 }
12781 DialectType::BigQuery => {
12782 // BigQuery: LAST_DAY(CAST(date AS DATE))
12783 // With offset: LAST_DAY(DATE_ADD(CAST(date AS DATE), INTERVAL offset MONTH))
12784 let date = cast_to_date(date_arg);
12785 let date = if let Some(offset) = month_offset {
12786 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
12787 this: Some(offset),
12788 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
12789 unit: crate::expressions::IntervalUnit::Month,
12790 use_plural: false,
12791 }),
12792 }));
12793 Expression::Function(Box::new(Function::new(
12794 "DATE_ADD".to_string(),
12795 vec![date, interval],
12796 )))
12797 } else {
12798 date
12799 };
12800 Ok(Expression::Function(Box::new(Function::new(
12801 "LAST_DAY".to_string(),
12802 vec![date],
12803 ))))
12804 }
12805 DialectType::ClickHouse => {
12806 // ClickHouse: LAST_DAY(CAST(date AS Nullable(DATE)))
12807 let date = Expression::Cast(Box::new(Cast {
12808 this: date_arg,
12809 to: DataType::Nullable {
12810 inner: Box::new(DataType::Date),
12811 },
12812 trailing_comments: vec![],
12813 double_colon_syntax: false,
12814 format: None,
12815 default: None,
12816 inferred_type: None,
12817 }));
12818 let date = if let Some(offset) = month_offset {
12819 Expression::Function(Box::new(Function::new(
12820 "DATE_ADD".to_string(),
12821 vec![
12822 Expression::Identifier(Identifier::new(
12823 "MONTH",
12824 )),
12825 offset,
12826 date,
12827 ],
12828 )))
12829 } else {
12830 date
12831 };
12832 Ok(Expression::Function(Box::new(Function::new(
12833 "LAST_DAY".to_string(),
12834 vec![date],
12835 ))))
12836 }
12837 DialectType::Hive => {
12838 // Hive: LAST_DAY(date)
12839 let date = if let Some(offset) = month_offset {
12840 Expression::Function(Box::new(Function::new(
12841 "ADD_MONTHS".to_string(),
12842 vec![date_arg, offset],
12843 )))
12844 } else {
12845 date_arg
12846 };
12847 Ok(Expression::Function(Box::new(Function::new(
12848 "LAST_DAY".to_string(),
12849 vec![date],
12850 ))))
12851 }
12852 _ => {
12853 // Default: LAST_DAY(date)
12854 let date = if let Some(offset) = month_offset {
12855 let unit =
12856 Expression::Identifier(Identifier::new("MONTH"));
12857 Expression::Function(Box::new(Function::new(
12858 "DATEADD".to_string(),
12859 vec![unit, offset, date_arg],
12860 )))
12861 } else {
12862 date_arg
12863 };
12864 Ok(Expression::Function(Box::new(Function::new(
12865 "LAST_DAY".to_string(),
12866 vec![date],
12867 ))))
12868 }
12869 }
12870 }
12871 // LAST_DAY(x) / LAST_DAY_OF_MONTH(x) -> target-specific
12872 "LAST_DAY" | "LAST_DAY_OF_MONTH"
12873 if !matches!(source, DialectType::BigQuery)
12874 && f.args.len() >= 1 =>
12875 {
12876 let first_arg = f.args.into_iter().next().unwrap();
12877 match target {
12878 DialectType::TSQL | DialectType::Fabric => {
12879 Ok(Expression::Function(Box::new(Function::new(
12880 "EOMONTH".to_string(),
12881 vec![first_arg],
12882 ))))
12883 }
12884 DialectType::Presto
12885 | DialectType::Trino
12886 | DialectType::Athena => {
12887 Ok(Expression::Function(Box::new(Function::new(
12888 "LAST_DAY_OF_MONTH".to_string(),
12889 vec![first_arg],
12890 ))))
12891 }
12892 _ => Ok(Expression::Function(Box::new(Function::new(
12893 "LAST_DAY".to_string(),
12894 vec![first_arg],
12895 )))),
12896 }
12897 }
12898 // MAP(keys_array, vals_array) from Presto (2-arg form) -> target-specific
12899 "MAP"
12900 if f.args.len() == 2
12901 && matches!(
12902 source,
12903 DialectType::Presto
12904 | DialectType::Trino
12905 | DialectType::Athena
12906 ) =>
12907 {
12908 let keys_arg = f.args[0].clone();
12909 let vals_arg = f.args[1].clone();
12910
12911 // Helper: extract array elements from Array/ArrayFunc/Function("ARRAY") expressions
12912 fn extract_array_elements(
12913 expr: &Expression,
12914 ) -> Option<&Vec<Expression>> {
12915 match expr {
12916 Expression::Array(arr) => Some(&arr.expressions),
12917 Expression::ArrayFunc(arr) => Some(&arr.expressions),
12918 Expression::Function(f)
12919 if f.name.eq_ignore_ascii_case("ARRAY") =>
12920 {
12921 Some(&f.args)
12922 }
12923 _ => None,
12924 }
12925 }
12926
12927 match target {
12928 DialectType::Spark | DialectType::Databricks => {
12929 // Presto MAP(keys, vals) -> Spark MAP_FROM_ARRAYS(keys, vals)
12930 Ok(Expression::Function(Box::new(Function::new(
12931 "MAP_FROM_ARRAYS".to_string(),
12932 f.args,
12933 ))))
12934 }
12935 DialectType::Hive => {
12936 // Presto MAP(ARRAY[k1,k2], ARRAY[v1,v2]) -> Hive MAP(k1, v1, k2, v2)
12937 if let (Some(keys), Some(vals)) = (
12938 extract_array_elements(&keys_arg),
12939 extract_array_elements(&vals_arg),
12940 ) {
12941 if keys.len() == vals.len() {
12942 let mut interleaved = Vec::new();
12943 for (k, v) in keys.iter().zip(vals.iter()) {
12944 interleaved.push(k.clone());
12945 interleaved.push(v.clone());
12946 }
12947 Ok(Expression::Function(Box::new(Function::new(
12948 "MAP".to_string(),
12949 interleaved,
12950 ))))
12951 } else {
12952 Ok(Expression::Function(Box::new(Function::new(
12953 "MAP".to_string(),
12954 f.args,
12955 ))))
12956 }
12957 } else {
12958 Ok(Expression::Function(Box::new(Function::new(
12959 "MAP".to_string(),
12960 f.args,
12961 ))))
12962 }
12963 }
12964 DialectType::Snowflake => {
12965 // Presto MAP(ARRAY[k1,k2], ARRAY[v1,v2]) -> Snowflake OBJECT_CONSTRUCT(k1, v1, k2, v2)
12966 if let (Some(keys), Some(vals)) = (
12967 extract_array_elements(&keys_arg),
12968 extract_array_elements(&vals_arg),
12969 ) {
12970 if keys.len() == vals.len() {
12971 let mut interleaved = Vec::new();
12972 for (k, v) in keys.iter().zip(vals.iter()) {
12973 interleaved.push(k.clone());
12974 interleaved.push(v.clone());
12975 }
12976 Ok(Expression::Function(Box::new(Function::new(
12977 "OBJECT_CONSTRUCT".to_string(),
12978 interleaved,
12979 ))))
12980 } else {
12981 Ok(Expression::Function(Box::new(Function::new(
12982 "MAP".to_string(),
12983 f.args,
12984 ))))
12985 }
12986 } else {
12987 Ok(Expression::Function(Box::new(Function::new(
12988 "MAP".to_string(),
12989 f.args,
12990 ))))
12991 }
12992 }
12993 _ => Ok(Expression::Function(f)),
12994 }
12995 }
12996 // MAP() with 0 args from Spark -> MAP(ARRAY[], ARRAY[]) for Presto/Trino
12997 "MAP"
12998 if f.args.is_empty()
12999 && matches!(
13000 source,
13001 DialectType::Hive
13002 | DialectType::Spark
13003 | DialectType::Databricks
13004 )
13005 && matches!(
13006 target,
13007 DialectType::Presto
13008 | DialectType::Trino
13009 | DialectType::Athena
13010 ) =>
13011 {
13012 let empty_keys =
13013 Expression::Array(Box::new(crate::expressions::Array {
13014 expressions: vec![],
13015 }));
13016 let empty_vals =
13017 Expression::Array(Box::new(crate::expressions::Array {
13018 expressions: vec![],
13019 }));
13020 Ok(Expression::Function(Box::new(Function::new(
13021 "MAP".to_string(),
13022 vec![empty_keys, empty_vals],
13023 ))))
13024 }
13025 // MAP(k1, v1, k2, v2, ...) from Hive/Spark -> target-specific
13026 "MAP"
13027 if f.args.len() >= 2
13028 && f.args.len() % 2 == 0
13029 && matches!(
13030 source,
13031 DialectType::Hive
13032 | DialectType::Spark
13033 | DialectType::Databricks
13034 | DialectType::ClickHouse
13035 ) =>
13036 {
13037 let args = f.args;
13038 match target {
13039 DialectType::DuckDB => {
13040 // MAP([k1, k2], [v1, v2])
13041 let mut keys = Vec::new();
13042 let mut vals = Vec::new();
13043 for (i, arg) in args.into_iter().enumerate() {
13044 if i % 2 == 0 {
13045 keys.push(arg);
13046 } else {
13047 vals.push(arg);
13048 }
13049 }
13050 let keys_arr = Expression::Array(Box::new(
13051 crate::expressions::Array { expressions: keys },
13052 ));
13053 let vals_arr = Expression::Array(Box::new(
13054 crate::expressions::Array { expressions: vals },
13055 ));
13056 Ok(Expression::Function(Box::new(Function::new(
13057 "MAP".to_string(),
13058 vec![keys_arr, vals_arr],
13059 ))))
13060 }
13061 DialectType::Presto | DialectType::Trino => {
13062 // MAP(ARRAY[k1, k2], ARRAY[v1, v2])
13063 let mut keys = Vec::new();
13064 let mut vals = Vec::new();
13065 for (i, arg) in args.into_iter().enumerate() {
13066 if i % 2 == 0 {
13067 keys.push(arg);
13068 } else {
13069 vals.push(arg);
13070 }
13071 }
13072 let keys_arr = Expression::Array(Box::new(
13073 crate::expressions::Array { expressions: keys },
13074 ));
13075 let vals_arr = Expression::Array(Box::new(
13076 crate::expressions::Array { expressions: vals },
13077 ));
13078 Ok(Expression::Function(Box::new(Function::new(
13079 "MAP".to_string(),
13080 vec![keys_arr, vals_arr],
13081 ))))
13082 }
13083 DialectType::Snowflake => Ok(Expression::Function(Box::new(
13084 Function::new("OBJECT_CONSTRUCT".to_string(), args),
13085 ))),
13086 DialectType::ClickHouse => Ok(Expression::Function(Box::new(
13087 Function::new("map".to_string(), args),
13088 ))),
13089 _ => Ok(Expression::Function(Box::new(Function::new(
13090 "MAP".to_string(),
13091 args,
13092 )))),
13093 }
13094 }
13095 // COLLECT_LIST(x) -> ARRAY_AGG(x) for most targets
13096 "COLLECT_LIST" if f.args.len() >= 1 => {
13097 let name = match target {
13098 DialectType::Spark
13099 | DialectType::Databricks
13100 | DialectType::Hive => "COLLECT_LIST",
13101 DialectType::DuckDB
13102 | DialectType::PostgreSQL
13103 | DialectType::Redshift
13104 | DialectType::Snowflake
13105 | DialectType::BigQuery => "ARRAY_AGG",
13106 DialectType::Presto | DialectType::Trino => "ARRAY_AGG",
13107 _ => "ARRAY_AGG",
13108 };
13109 Ok(Expression::Function(Box::new(Function::new(
13110 name.to_string(),
13111 f.args,
13112 ))))
13113 }
13114 // COLLECT_SET(x) -> target-specific distinct array aggregation
13115 "COLLECT_SET" if f.args.len() >= 1 => {
13116 let name = match target {
13117 DialectType::Spark
13118 | DialectType::Databricks
13119 | DialectType::Hive => "COLLECT_SET",
13120 DialectType::Presto
13121 | DialectType::Trino
13122 | DialectType::Athena => "SET_AGG",
13123 DialectType::Snowflake => "ARRAY_UNIQUE_AGG",
13124 _ => "ARRAY_AGG",
13125 };
13126 Ok(Expression::Function(Box::new(Function::new(
13127 name.to_string(),
13128 f.args,
13129 ))))
13130 }
13131 // ISNAN(x) / IS_NAN(x) - normalize
13132 "ISNAN" | "IS_NAN" => {
13133 let name = match target {
13134 DialectType::Spark
13135 | DialectType::Databricks
13136 | DialectType::Hive => "ISNAN",
13137 DialectType::Presto
13138 | DialectType::Trino
13139 | DialectType::Athena => "IS_NAN",
13140 DialectType::BigQuery
13141 | DialectType::PostgreSQL
13142 | DialectType::Redshift => "IS_NAN",
13143 DialectType::ClickHouse => "IS_NAN",
13144 _ => "ISNAN",
13145 };
13146 Ok(Expression::Function(Box::new(Function::new(
13147 name.to_string(),
13148 f.args,
13149 ))))
13150 }
13151 // SPLIT_PART(str, delim, index) -> target-specific
13152 "SPLIT_PART" if f.args.len() == 3 => {
13153 match target {
13154 DialectType::Spark | DialectType::Databricks => {
13155 // Keep as SPLIT_PART (Spark 3.4+)
13156 Ok(Expression::Function(Box::new(Function::new(
13157 "SPLIT_PART".to_string(),
13158 f.args,
13159 ))))
13160 }
13161 DialectType::DuckDB
13162 if matches!(source, DialectType::Snowflake) =>
13163 {
13164 // Snowflake SPLIT_PART -> DuckDB with CASE wrapper:
13165 // - part_index 0 treated as 1
13166 // - empty delimiter: return whole string if index 1 or -1, else ''
13167 let mut args = f.args;
13168 let str_arg = args.remove(0);
13169 let delim_arg = args.remove(0);
13170 let idx_arg = args.remove(0);
13171
13172 // (CASE WHEN idx = 0 THEN 1 ELSE idx END)
13173 let adjusted_idx = Expression::Paren(Box::new(Paren {
13174 this: Expression::Case(Box::new(Case {
13175 operand: None,
13176 whens: vec![(
13177 Expression::Eq(Box::new(BinaryOp {
13178 left: idx_arg.clone(),
13179 right: Expression::number(0),
13180 left_comments: vec![],
13181 operator_comments: vec![],
13182 trailing_comments: vec![],
13183 inferred_type: None,
13184 })),
13185 Expression::number(1),
13186 )],
13187 else_: Some(idx_arg.clone()),
13188 comments: vec![],
13189 inferred_type: None,
13190 })),
13191 trailing_comments: vec![],
13192 }));
13193
13194 // SPLIT_PART(str, delim, adjusted_idx)
13195 let base_func =
13196 Expression::Function(Box::new(Function::new(
13197 "SPLIT_PART".to_string(),
13198 vec![
13199 str_arg.clone(),
13200 delim_arg.clone(),
13201 adjusted_idx.clone(),
13202 ],
13203 )));
13204
13205 // (CASE WHEN adjusted_idx = 1 OR adjusted_idx = -1 THEN str ELSE '' END)
13206 let empty_delim_case = Expression::Paren(Box::new(Paren {
13207 this: Expression::Case(Box::new(Case {
13208 operand: None,
13209 whens: vec![(
13210 Expression::Or(Box::new(BinaryOp {
13211 left: Expression::Eq(Box::new(BinaryOp {
13212 left: adjusted_idx.clone(),
13213 right: Expression::number(1),
13214 left_comments: vec![],
13215 operator_comments: vec![],
13216 trailing_comments: vec![],
13217 inferred_type: None,
13218 })),
13219 right: Expression::Eq(Box::new(BinaryOp {
13220 left: adjusted_idx,
13221 right: Expression::number(-1),
13222 left_comments: vec![],
13223 operator_comments: vec![],
13224 trailing_comments: vec![],
13225 inferred_type: None,
13226 })),
13227 left_comments: vec![],
13228 operator_comments: vec![],
13229 trailing_comments: vec![],
13230 inferred_type: None,
13231 })),
13232 str_arg,
13233 )],
13234 else_: Some(Expression::string("")),
13235 comments: vec![],
13236 inferred_type: None,
13237 })),
13238 trailing_comments: vec![],
13239 }));
13240
13241 // CASE WHEN delim = '' THEN (empty case) ELSE SPLIT_PART(...) END
13242 Ok(Expression::Case(Box::new(Case {
13243 operand: None,
13244 whens: vec![(
13245 Expression::Eq(Box::new(BinaryOp {
13246 left: delim_arg,
13247 right: Expression::string(""),
13248 left_comments: vec![],
13249 operator_comments: vec![],
13250 trailing_comments: vec![],
13251 inferred_type: None,
13252 })),
13253 empty_delim_case,
13254 )],
13255 else_: Some(base_func),
13256 comments: vec![],
13257 inferred_type: None,
13258 })))
13259 }
13260 DialectType::DuckDB
13261 | DialectType::PostgreSQL
13262 | DialectType::Snowflake
13263 | DialectType::Redshift
13264 | DialectType::Trino
13265 | DialectType::Presto => Ok(Expression::Function(Box::new(
13266 Function::new("SPLIT_PART".to_string(), f.args),
13267 ))),
13268 DialectType::Hive => {
13269 // SPLIT(str, delim)[index]
13270 // Complex conversion, just keep as-is for now
13271 Ok(Expression::Function(Box::new(Function::new(
13272 "SPLIT_PART".to_string(),
13273 f.args,
13274 ))))
13275 }
13276 _ => Ok(Expression::Function(Box::new(Function::new(
13277 "SPLIT_PART".to_string(),
13278 f.args,
13279 )))),
13280 }
13281 }
13282 // JSON_EXTRACT(json, path) -> target-specific JSON extraction
13283 "JSON_EXTRACT" | "JSON_EXTRACT_SCALAR" if f.args.len() == 2 => {
13284 let is_scalar = name == "JSON_EXTRACT_SCALAR";
13285 match target {
13286 DialectType::Spark
13287 | DialectType::Databricks
13288 | DialectType::Hive => {
13289 let mut args = f.args;
13290 // Spark/Hive don't support Presto's TRY(expr) wrapper form here.
13291 // Mirror sqlglot by unwrapping TRY(expr) to expr before GET_JSON_OBJECT.
13292 if let Some(Expression::Function(inner)) = args.first() {
13293 if inner.name.eq_ignore_ascii_case("TRY")
13294 && inner.args.len() == 1
13295 {
13296 let mut inner_args = inner.args.clone();
13297 args[0] = inner_args.remove(0);
13298 }
13299 }
13300 Ok(Expression::Function(Box::new(Function::new(
13301 "GET_JSON_OBJECT".to_string(),
13302 args,
13303 ))))
13304 }
13305 DialectType::DuckDB | DialectType::SQLite => {
13306 // json -> path syntax
13307 let mut args = f.args;
13308 let json_expr = args.remove(0);
13309 let path = args.remove(0);
13310 Ok(Expression::JsonExtract(Box::new(
13311 crate::expressions::JsonExtractFunc {
13312 this: json_expr,
13313 path,
13314 returning: None,
13315 arrow_syntax: true,
13316 hash_arrow_syntax: false,
13317 wrapper_option: None,
13318 quotes_option: None,
13319 on_scalar_string: false,
13320 on_error: None,
13321 },
13322 )))
13323 }
13324 DialectType::TSQL => {
13325 let func_name = if is_scalar {
13326 "JSON_VALUE"
13327 } else {
13328 "JSON_QUERY"
13329 };
13330 Ok(Expression::Function(Box::new(Function::new(
13331 func_name.to_string(),
13332 f.args,
13333 ))))
13334 }
13335 DialectType::PostgreSQL | DialectType::Redshift => {
13336 let func_name = if is_scalar {
13337 "JSON_EXTRACT_PATH_TEXT"
13338 } else {
13339 "JSON_EXTRACT_PATH"
13340 };
13341 Ok(Expression::Function(Box::new(Function::new(
13342 func_name.to_string(),
13343 f.args,
13344 ))))
13345 }
13346 _ => Ok(Expression::Function(Box::new(Function::new(
13347 name.to_string(),
13348 f.args,
13349 )))),
13350 }
13351 }
13352 // MySQL JSON_SEARCH(json_doc, mode, search[, escape_char[, path]]) -> DuckDB json_tree-based lookup
13353 "JSON_SEARCH"
13354 if matches!(target, DialectType::DuckDB)
13355 && (3..=5).contains(&f.args.len()) =>
13356 {
13357 let args = &f.args;
13358
13359 // Only rewrite deterministic modes and NULL/no escape-char variant.
13360 let mode = match &args[1] {
13361 Expression::Literal(lit)
13362 if matches!(
13363 lit.as_ref(),
13364 crate::expressions::Literal::String(_)
13365 ) =>
13366 {
13367 let crate::expressions::Literal::String(s) = lit.as_ref()
13368 else {
13369 unreachable!()
13370 };
13371 s.to_ascii_lowercase()
13372 }
13373 _ => return Ok(Expression::Function(f)),
13374 };
13375 if mode != "one" && mode != "all" {
13376 return Ok(Expression::Function(f));
13377 }
13378 if args.len() >= 4 && !matches!(&args[3], Expression::Null(_)) {
13379 return Ok(Expression::Function(f));
13380 }
13381
13382 let json_doc_sql = match Generator::sql(&args[0]) {
13383 Ok(sql) => sql,
13384 Err(_) => return Ok(Expression::Function(f)),
13385 };
13386 let search_sql = match Generator::sql(&args[2]) {
13387 Ok(sql) => sql,
13388 Err(_) => return Ok(Expression::Function(f)),
13389 };
13390 let path_sql = if args.len() == 5 {
13391 match Generator::sql(&args[4]) {
13392 Ok(sql) => sql,
13393 Err(_) => return Ok(Expression::Function(f)),
13394 }
13395 } else {
13396 "'$'".to_string()
13397 };
13398
13399 let rewrite_sql = if mode == "all" {
13400 format!(
13401 "(SELECT TO_JSON(LIST(__jt.fullkey)) FROM json_tree({}, {}) AS __jt WHERE __jt.atom = TO_JSON({}))",
13402 json_doc_sql, path_sql, search_sql
13403 )
13404 } else {
13405 format!(
13406 "(SELECT TO_JSON(__jt.fullkey) FROM json_tree({}, {}) AS __jt WHERE __jt.atom = TO_JSON({}) ORDER BY __jt.id LIMIT 1)",
13407 json_doc_sql, path_sql, search_sql
13408 )
13409 };
13410
13411 Ok(Expression::Raw(crate::expressions::Raw {
13412 sql: rewrite_sql,
13413 }))
13414 }
13415 // SingleStore JSON_EXTRACT_JSON(json, key1, key2, ...) -> JSON_EXTRACT(json, '$.key1.key2' or '$.key1[key2]')
13416 // BSON_EXTRACT_BSON(json, key1, ...) -> JSONB_EXTRACT(json, '$.key1')
13417 "JSON_EXTRACT_JSON" | "BSON_EXTRACT_BSON"
13418 if f.args.len() >= 2
13419 && matches!(source, DialectType::SingleStore) =>
13420 {
13421 let is_bson = name == "BSON_EXTRACT_BSON";
13422 let mut args = f.args;
13423 let json_expr = args.remove(0);
13424
13425 // Build JSONPath from remaining arguments
13426 let mut path = String::from("$");
13427 for arg in &args {
13428 if let Expression::Literal(lit) = arg {
13429 if let crate::expressions::Literal::String(s) = lit.as_ref()
13430 {
13431 // Check if it's a numeric string (array index)
13432 if s.parse::<i64>().is_ok() {
13433 path.push('[');
13434 path.push_str(s);
13435 path.push(']');
13436 } else {
13437 path.push('.');
13438 path.push_str(s);
13439 }
13440 }
13441 }
13442 }
13443
13444 let target_func = if is_bson {
13445 "JSONB_EXTRACT"
13446 } else {
13447 "JSON_EXTRACT"
13448 };
13449 Ok(Expression::Function(Box::new(Function::new(
13450 target_func.to_string(),
13451 vec![json_expr, Expression::string(&path)],
13452 ))))
13453 }
13454 // ARRAY_SUM(lambda, array) from Doris -> ClickHouse arraySum
13455 "ARRAY_SUM" if matches!(target, DialectType::ClickHouse) => {
13456 Ok(Expression::Function(Box::new(Function {
13457 name: "arraySum".to_string(),
13458 args: f.args,
13459 distinct: f.distinct,
13460 trailing_comments: f.trailing_comments,
13461 use_bracket_syntax: f.use_bracket_syntax,
13462 no_parens: f.no_parens,
13463 quoted: f.quoted,
13464 span: None,
13465 inferred_type: None,
13466 })))
13467 }
13468 // TSQL JSON_QUERY/JSON_VALUE -> target-specific
13469 // Note: For TSQL->TSQL, JsonQuery stays as Expression::JsonQuery (source transform not called)
13470 // and is handled by JsonQueryValueConvert action. This handles the case where
13471 // TSQL read transform converted JsonQuery to Function("JSON_QUERY") for cross-dialect.
13472 "JSON_QUERY" | "JSON_VALUE"
13473 if f.args.len() == 2
13474 && matches!(
13475 source,
13476 DialectType::TSQL | DialectType::Fabric
13477 ) =>
13478 {
13479 match target {
13480 DialectType::Spark
13481 | DialectType::Databricks
13482 | DialectType::Hive => Ok(Expression::Function(Box::new(
13483 Function::new("GET_JSON_OBJECT".to_string(), f.args),
13484 ))),
13485 _ => Ok(Expression::Function(Box::new(Function::new(
13486 name.to_string(),
13487 f.args,
13488 )))),
13489 }
13490 }
13491 // UNIX_TIMESTAMP(x) -> TO_UNIXTIME(x) for Presto
13492 "UNIX_TIMESTAMP" if f.args.len() == 1 => {
13493 let arg = f.args.into_iter().next().unwrap();
13494 let is_hive_source = matches!(
13495 source,
13496 DialectType::Hive
13497 | DialectType::Spark
13498 | DialectType::Databricks
13499 );
13500 match target {
13501 DialectType::DuckDB if is_hive_source => {
13502 // DuckDB: EPOCH(STRPTIME(x, '%Y-%m-%d %H:%M:%S'))
13503 let strptime =
13504 Expression::Function(Box::new(Function::new(
13505 "STRPTIME".to_string(),
13506 vec![arg, Expression::string("%Y-%m-%d %H:%M:%S")],
13507 )));
13508 Ok(Expression::Function(Box::new(Function::new(
13509 "EPOCH".to_string(),
13510 vec![strptime],
13511 ))))
13512 }
13513 DialectType::Presto | DialectType::Trino if is_hive_source => {
13514 // Presto: TO_UNIXTIME(COALESCE(TRY(DATE_PARSE(CAST(x AS VARCHAR), '%Y-%m-%d %T')), PARSE_DATETIME(DATE_FORMAT(x, '%Y-%m-%d %T'), 'yyyy-MM-dd HH:mm:ss')))
13515 let cast_varchar =
13516 Expression::Cast(Box::new(crate::expressions::Cast {
13517 this: arg.clone(),
13518 to: DataType::VarChar {
13519 length: None,
13520 parenthesized_length: false,
13521 },
13522 trailing_comments: vec![],
13523 double_colon_syntax: false,
13524 format: None,
13525 default: None,
13526 inferred_type: None,
13527 }));
13528 let date_parse =
13529 Expression::Function(Box::new(Function::new(
13530 "DATE_PARSE".to_string(),
13531 vec![
13532 cast_varchar,
13533 Expression::string("%Y-%m-%d %T"),
13534 ],
13535 )));
13536 let try_expr = Expression::Function(Box::new(
13537 Function::new("TRY".to_string(), vec![date_parse]),
13538 ));
13539 let date_format =
13540 Expression::Function(Box::new(Function::new(
13541 "DATE_FORMAT".to_string(),
13542 vec![arg, Expression::string("%Y-%m-%d %T")],
13543 )));
13544 let parse_datetime =
13545 Expression::Function(Box::new(Function::new(
13546 "PARSE_DATETIME".to_string(),
13547 vec![
13548 date_format,
13549 Expression::string("yyyy-MM-dd HH:mm:ss"),
13550 ],
13551 )));
13552 let coalesce =
13553 Expression::Function(Box::new(Function::new(
13554 "COALESCE".to_string(),
13555 vec![try_expr, parse_datetime],
13556 )));
13557 Ok(Expression::Function(Box::new(Function::new(
13558 "TO_UNIXTIME".to_string(),
13559 vec![coalesce],
13560 ))))
13561 }
13562 DialectType::Presto | DialectType::Trino => {
13563 Ok(Expression::Function(Box::new(Function::new(
13564 "TO_UNIXTIME".to_string(),
13565 vec![arg],
13566 ))))
13567 }
13568 _ => Ok(Expression::Function(Box::new(Function::new(
13569 "UNIX_TIMESTAMP".to_string(),
13570 vec![arg],
13571 )))),
13572 }
13573 }
13574 // TO_UNIX_TIMESTAMP(x) -> UNIX_TIMESTAMP(x) for Spark/Hive
13575 "TO_UNIX_TIMESTAMP" if f.args.len() >= 1 => match target {
13576 DialectType::Spark
13577 | DialectType::Databricks
13578 | DialectType::Hive => Ok(Expression::Function(Box::new(
13579 Function::new("UNIX_TIMESTAMP".to_string(), f.args),
13580 ))),
13581 _ => Ok(Expression::Function(Box::new(Function::new(
13582 "TO_UNIX_TIMESTAMP".to_string(),
13583 f.args,
13584 )))),
13585 },
13586 // CURDATE() -> CURRENT_DATE
13587 "CURDATE" => {
13588 Ok(Expression::CurrentDate(crate::expressions::CurrentDate))
13589 }
13590 // CURTIME() -> CURRENT_TIME
13591 "CURTIME" => {
13592 Ok(Expression::CurrentTime(crate::expressions::CurrentTime {
13593 precision: None,
13594 }))
13595 }
13596 // ARRAY_SORT(x) or ARRAY_SORT(x, lambda) -> SORT_ARRAY(x) for Hive, LIST_SORT for DuckDB
13597 "ARRAY_SORT" if f.args.len() >= 1 => {
13598 match target {
13599 DialectType::Hive => {
13600 let mut args = f.args;
13601 args.truncate(1); // Drop lambda comparator
13602 Ok(Expression::Function(Box::new(Function::new(
13603 "SORT_ARRAY".to_string(),
13604 args,
13605 ))))
13606 }
13607 DialectType::DuckDB
13608 if matches!(source, DialectType::Snowflake) =>
13609 {
13610 // Snowflake ARRAY_SORT(arr[, asc_bool[, nulls_first_bool]]) -> DuckDB LIST_SORT(arr[, 'ASC'/'DESC'[, 'NULLS FIRST']])
13611 let mut args_iter = f.args.into_iter();
13612 let arr = args_iter.next().unwrap();
13613 let asc_arg = args_iter.next();
13614 let nulls_first_arg = args_iter.next();
13615
13616 let is_asc_bool = asc_arg
13617 .as_ref()
13618 .map(|a| matches!(a, Expression::Boolean(_)))
13619 .unwrap_or(false);
13620 let is_nf_bool = nulls_first_arg
13621 .as_ref()
13622 .map(|a| matches!(a, Expression::Boolean(_)))
13623 .unwrap_or(false);
13624
13625 // No boolean args: pass through as-is
13626 if !is_asc_bool && !is_nf_bool {
13627 let mut result_args = vec![arr];
13628 if let Some(asc) = asc_arg {
13629 result_args.push(asc);
13630 if let Some(nf) = nulls_first_arg {
13631 result_args.push(nf);
13632 }
13633 }
13634 Ok(Expression::Function(Box::new(Function::new(
13635 "LIST_SORT".to_string(),
13636 result_args,
13637 ))))
13638 } else {
13639 // Has boolean args: convert to DuckDB LIST_SORT format
13640 let descending = matches!(&asc_arg, Some(Expression::Boolean(b)) if !b.value);
13641
13642 // Snowflake defaults: nulls_first = TRUE for DESC, FALSE for ASC
13643 let nulls_are_first = match &nulls_first_arg {
13644 Some(Expression::Boolean(b)) => b.value,
13645 None if is_asc_bool => descending, // Snowflake default
13646 _ => false,
13647 };
13648 let nulls_first_sql = if nulls_are_first {
13649 Some(Expression::string("NULLS FIRST"))
13650 } else {
13651 None
13652 };
13653
13654 if !is_asc_bool {
13655 // asc is non-boolean expression, nulls_first is boolean
13656 let mut result_args = vec![arr];
13657 if let Some(asc) = asc_arg {
13658 result_args.push(asc);
13659 }
13660 if let Some(nf) = nulls_first_sql {
13661 result_args.push(nf);
13662 }
13663 Ok(Expression::Function(Box::new(Function::new(
13664 "LIST_SORT".to_string(),
13665 result_args,
13666 ))))
13667 } else {
13668 if !descending && !nulls_are_first {
13669 // ASC, NULLS LAST (default) -> LIST_SORT(arr)
13670 Ok(Expression::Function(Box::new(
13671 Function::new(
13672 "LIST_SORT".to_string(),
13673 vec![arr],
13674 ),
13675 )))
13676 } else if descending && !nulls_are_first {
13677 // DESC, NULLS LAST -> ARRAY_REVERSE_SORT(arr)
13678 Ok(Expression::Function(Box::new(
13679 Function::new(
13680 "ARRAY_REVERSE_SORT".to_string(),
13681 vec![arr],
13682 ),
13683 )))
13684 } else {
13685 // NULLS FIRST -> LIST_SORT(arr, 'ASC'/'DESC', 'NULLS FIRST')
13686 let order_str =
13687 if descending { "DESC" } else { "ASC" };
13688 Ok(Expression::Function(Box::new(
13689 Function::new(
13690 "LIST_SORT".to_string(),
13691 vec![
13692 arr,
13693 Expression::string(order_str),
13694 Expression::string("NULLS FIRST"),
13695 ],
13696 ),
13697 )))
13698 }
13699 }
13700 }
13701 }
13702 DialectType::DuckDB => {
13703 // Non-Snowflake source: ARRAY_SORT(x, lambda) -> ARRAY_SORT(x) (drop comparator)
13704 let mut args = f.args;
13705 args.truncate(1); // Drop lambda comparator for DuckDB
13706 Ok(Expression::Function(Box::new(Function::new(
13707 "ARRAY_SORT".to_string(),
13708 args,
13709 ))))
13710 }
13711 _ => Ok(Expression::Function(f)),
13712 }
13713 }
13714 // SORT_ARRAY(x) -> LIST_SORT(x) for DuckDB, ARRAY_SORT(x) for Presto/Trino, keep for Hive/Spark
13715 "SORT_ARRAY" if f.args.len() == 1 => match target {
13716 DialectType::Hive
13717 | DialectType::Spark
13718 | DialectType::Databricks => Ok(Expression::Function(f)),
13719 DialectType::DuckDB => Ok(Expression::Function(Box::new(
13720 Function::new("LIST_SORT".to_string(), f.args),
13721 ))),
13722 _ => Ok(Expression::Function(Box::new(Function::new(
13723 "ARRAY_SORT".to_string(),
13724 f.args,
13725 )))),
13726 },
13727 // SORT_ARRAY(x, FALSE) -> ARRAY_REVERSE_SORT(x) for DuckDB, ARRAY_SORT(x, lambda) for Presto
13728 "SORT_ARRAY" if f.args.len() == 2 => {
13729 let is_desc =
13730 matches!(&f.args[1], Expression::Boolean(b) if !b.value);
13731 if is_desc {
13732 match target {
13733 DialectType::DuckDB => {
13734 Ok(Expression::Function(Box::new(Function::new(
13735 "ARRAY_REVERSE_SORT".to_string(),
13736 vec![f.args.into_iter().next().unwrap()],
13737 ))))
13738 }
13739 DialectType::Presto | DialectType::Trino => {
13740 let arr_arg = f.args.into_iter().next().unwrap();
13741 let a = Expression::Column(Box::new(
13742 crate::expressions::Column {
13743 name: crate::expressions::Identifier::new("a"),
13744 table: None,
13745 join_mark: false,
13746 trailing_comments: Vec::new(),
13747 span: None,
13748 inferred_type: None,
13749 },
13750 ));
13751 let b = Expression::Column(Box::new(
13752 crate::expressions::Column {
13753 name: crate::expressions::Identifier::new("b"),
13754 table: None,
13755 join_mark: false,
13756 trailing_comments: Vec::new(),
13757 span: None,
13758 inferred_type: None,
13759 },
13760 ));
13761 let case_expr = Expression::Case(Box::new(
13762 crate::expressions::Case {
13763 operand: None,
13764 whens: vec![
13765 (
13766 Expression::Lt(Box::new(
13767 BinaryOp::new(a.clone(), b.clone()),
13768 )),
13769 Expression::Literal(Box::new(
13770 Literal::Number("1".to_string()),
13771 )),
13772 ),
13773 (
13774 Expression::Gt(Box::new(
13775 BinaryOp::new(a.clone(), b.clone()),
13776 )),
13777 Expression::Literal(Box::new(
13778 Literal::Number("-1".to_string()),
13779 )),
13780 ),
13781 ],
13782 else_: Some(Expression::Literal(Box::new(
13783 Literal::Number("0".to_string()),
13784 ))),
13785 comments: Vec::new(),
13786 inferred_type: None,
13787 },
13788 ));
13789 let lambda = Expression::Lambda(Box::new(
13790 crate::expressions::LambdaExpr {
13791 parameters: vec![
13792 crate::expressions::Identifier::new("a"),
13793 crate::expressions::Identifier::new("b"),
13794 ],
13795 body: case_expr,
13796 colon: false,
13797 parameter_types: Vec::new(),
13798 },
13799 ));
13800 Ok(Expression::Function(Box::new(Function::new(
13801 "ARRAY_SORT".to_string(),
13802 vec![arr_arg, lambda],
13803 ))))
13804 }
13805 _ => Ok(Expression::Function(f)),
13806 }
13807 } else {
13808 // SORT_ARRAY(x, TRUE) -> LIST_SORT(x) for DuckDB, ARRAY_SORT(x) for others
13809 match target {
13810 DialectType::Hive => Ok(Expression::Function(f)),
13811 DialectType::DuckDB => {
13812 Ok(Expression::Function(Box::new(Function::new(
13813 "LIST_SORT".to_string(),
13814 vec![f.args.into_iter().next().unwrap()],
13815 ))))
13816 }
13817 _ => Ok(Expression::Function(Box::new(Function::new(
13818 "ARRAY_SORT".to_string(),
13819 vec![f.args.into_iter().next().unwrap()],
13820 )))),
13821 }
13822 }
13823 }
13824 // LEFT(x, n), RIGHT(x, n) -> SUBSTRING for targets without LEFT/RIGHT
13825 "LEFT" if f.args.len() == 2 => {
13826 match target {
13827 DialectType::Hive
13828 | DialectType::Presto
13829 | DialectType::Trino
13830 | DialectType::Athena => {
13831 let x = f.args[0].clone();
13832 let n = f.args[1].clone();
13833 Ok(Expression::Function(Box::new(Function::new(
13834 "SUBSTRING".to_string(),
13835 vec![x, Expression::number(1), n],
13836 ))))
13837 }
13838 DialectType::Spark | DialectType::Databricks
13839 if matches!(
13840 source,
13841 DialectType::TSQL | DialectType::Fabric
13842 ) =>
13843 {
13844 // TSQL LEFT(x, n) -> LEFT(CAST(x AS STRING), n) for Spark
13845 let x = f.args[0].clone();
13846 let n = f.args[1].clone();
13847 let cast_x = Expression::Cast(Box::new(Cast {
13848 this: x,
13849 to: DataType::VarChar {
13850 length: None,
13851 parenthesized_length: false,
13852 },
13853 double_colon_syntax: false,
13854 trailing_comments: Vec::new(),
13855 format: None,
13856 default: None,
13857 inferred_type: None,
13858 }));
13859 Ok(Expression::Function(Box::new(Function::new(
13860 "LEFT".to_string(),
13861 vec![cast_x, n],
13862 ))))
13863 }
13864 _ => Ok(Expression::Function(f)),
13865 }
13866 }
13867 "RIGHT" if f.args.len() == 2 => {
13868 match target {
13869 DialectType::Hive
13870 | DialectType::Presto
13871 | DialectType::Trino
13872 | DialectType::Athena => {
13873 let x = f.args[0].clone();
13874 let n = f.args[1].clone();
13875 // SUBSTRING(x, LENGTH(x) - (n - 1))
13876 let len_x = Expression::Function(Box::new(Function::new(
13877 "LENGTH".to_string(),
13878 vec![x.clone()],
13879 )));
13880 let n_minus_1 = Expression::Sub(Box::new(
13881 crate::expressions::BinaryOp::new(
13882 n,
13883 Expression::number(1),
13884 ),
13885 ));
13886 let n_minus_1_paren = Expression::Paren(Box::new(
13887 crate::expressions::Paren {
13888 this: n_minus_1,
13889 trailing_comments: Vec::new(),
13890 },
13891 ));
13892 let offset = Expression::Sub(Box::new(
13893 crate::expressions::BinaryOp::new(
13894 len_x,
13895 n_minus_1_paren,
13896 ),
13897 ));
13898 Ok(Expression::Function(Box::new(Function::new(
13899 "SUBSTRING".to_string(),
13900 vec![x, offset],
13901 ))))
13902 }
13903 DialectType::Spark | DialectType::Databricks
13904 if matches!(
13905 source,
13906 DialectType::TSQL | DialectType::Fabric
13907 ) =>
13908 {
13909 // TSQL RIGHT(x, n) -> RIGHT(CAST(x AS STRING), n) for Spark
13910 let x = f.args[0].clone();
13911 let n = f.args[1].clone();
13912 let cast_x = Expression::Cast(Box::new(Cast {
13913 this: x,
13914 to: DataType::VarChar {
13915 length: None,
13916 parenthesized_length: false,
13917 },
13918 double_colon_syntax: false,
13919 trailing_comments: Vec::new(),
13920 format: None,
13921 default: None,
13922 inferred_type: None,
13923 }));
13924 Ok(Expression::Function(Box::new(Function::new(
13925 "RIGHT".to_string(),
13926 vec![cast_x, n],
13927 ))))
13928 }
13929 _ => Ok(Expression::Function(f)),
13930 }
13931 }
13932 // MAP_FROM_ARRAYS(keys, vals) -> target-specific map construction
13933 "MAP_FROM_ARRAYS" if f.args.len() == 2 => match target {
13934 DialectType::Snowflake => Ok(Expression::Function(Box::new(
13935 Function::new("OBJECT_CONSTRUCT".to_string(), f.args),
13936 ))),
13937 DialectType::Spark | DialectType::Databricks => {
13938 Ok(Expression::Function(Box::new(Function::new(
13939 "MAP_FROM_ARRAYS".to_string(),
13940 f.args,
13941 ))))
13942 }
13943 _ => Ok(Expression::Function(Box::new(Function::new(
13944 "MAP".to_string(),
13945 f.args,
13946 )))),
13947 },
13948 // LIKE(foo, 'pat') -> foo LIKE 'pat'; LIKE(foo, 'pat', '!') -> foo LIKE 'pat' ESCAPE '!'
13949 // SQLite uses LIKE(pattern, string[, escape]) with args in reverse order
13950 "LIKE" if f.args.len() >= 2 => {
13951 let (this, pattern) = if matches!(source, DialectType::SQLite) {
13952 // SQLite: LIKE(pattern, string) -> string LIKE pattern
13953 (f.args[1].clone(), f.args[0].clone())
13954 } else {
13955 // Standard: LIKE(string, pattern) -> string LIKE pattern
13956 (f.args[0].clone(), f.args[1].clone())
13957 };
13958 let escape = if f.args.len() >= 3 {
13959 Some(f.args[2].clone())
13960 } else {
13961 None
13962 };
13963 Ok(Expression::Like(Box::new(crate::expressions::LikeOp {
13964 left: this,
13965 right: pattern,
13966 escape,
13967 quantifier: None,
13968 inferred_type: None,
13969 })))
13970 }
13971 // ILIKE(foo, 'pat') -> foo ILIKE 'pat'
13972 "ILIKE" if f.args.len() >= 2 => {
13973 let this = f.args[0].clone();
13974 let pattern = f.args[1].clone();
13975 let escape = if f.args.len() >= 3 {
13976 Some(f.args[2].clone())
13977 } else {
13978 None
13979 };
13980 Ok(Expression::ILike(Box::new(crate::expressions::LikeOp {
13981 left: this,
13982 right: pattern,
13983 escape,
13984 quantifier: None,
13985 inferred_type: None,
13986 })))
13987 }
13988 // CHAR(n) -> CHR(n) for non-MySQL/non-TSQL targets
13989 "CHAR" if f.args.len() == 1 => match target {
13990 DialectType::MySQL
13991 | DialectType::SingleStore
13992 | DialectType::TSQL => Ok(Expression::Function(f)),
13993 _ => Ok(Expression::Function(Box::new(Function::new(
13994 "CHR".to_string(),
13995 f.args,
13996 )))),
13997 },
13998 // CONCAT(a, b) -> a || b for PostgreSQL
13999 "CONCAT"
14000 if f.args.len() == 2
14001 && matches!(target, DialectType::PostgreSQL)
14002 && matches!(
14003 source,
14004 DialectType::ClickHouse | DialectType::MySQL
14005 ) =>
14006 {
14007 let mut args = f.args;
14008 let right = args.pop().unwrap();
14009 let left = args.pop().unwrap();
14010 Ok(Expression::DPipe(Box::new(crate::expressions::DPipe {
14011 this: Box::new(left),
14012 expression: Box::new(right),
14013 safe: None,
14014 })))
14015 }
14016 // ARRAY_TO_STRING(arr, delim) -> target-specific
14017 "ARRAY_TO_STRING"
14018 if f.args.len() == 2
14019 && matches!(target, DialectType::DuckDB)
14020 && matches!(source, DialectType::Snowflake) =>
14021 {
14022 let mut args = f.args;
14023 let arr = args.remove(0);
14024 let sep = args.remove(0);
14025 // sep IS NULL
14026 let sep_is_null = Expression::IsNull(Box::new(IsNull {
14027 this: sep.clone(),
14028 not: false,
14029 postfix_form: false,
14030 }));
14031 // COALESCE(CAST(x AS TEXT), '')
14032 let cast_x = Expression::Cast(Box::new(Cast {
14033 this: Expression::Identifier(Identifier::new("x")),
14034 to: DataType::Text,
14035 trailing_comments: Vec::new(),
14036 double_colon_syntax: false,
14037 format: None,
14038 default: None,
14039 inferred_type: None,
14040 }));
14041 let coalesce = Expression::Coalesce(Box::new(
14042 crate::expressions::VarArgFunc {
14043 original_name: None,
14044 expressions: vec![
14045 cast_x,
14046 Expression::Literal(Box::new(Literal::String(
14047 String::new(),
14048 ))),
14049 ],
14050 inferred_type: None,
14051 },
14052 ));
14053 let lambda =
14054 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
14055 parameters: vec![Identifier::new("x")],
14056 body: coalesce,
14057 colon: false,
14058 parameter_types: Vec::new(),
14059 }));
14060 let list_transform = Expression::Function(Box::new(Function::new(
14061 "LIST_TRANSFORM".to_string(),
14062 vec![arr, lambda],
14063 )));
14064 let array_to_string =
14065 Expression::Function(Box::new(Function::new(
14066 "ARRAY_TO_STRING".to_string(),
14067 vec![list_transform, sep],
14068 )));
14069 Ok(Expression::Case(Box::new(Case {
14070 operand: None,
14071 whens: vec![(sep_is_null, Expression::Null(Null))],
14072 else_: Some(array_to_string),
14073 comments: Vec::new(),
14074 inferred_type: None,
14075 })))
14076 }
14077 "ARRAY_TO_STRING" if f.args.len() >= 2 => match target {
14078 DialectType::Presto | DialectType::Trino => {
14079 Ok(Expression::Function(Box::new(Function::new(
14080 "ARRAY_JOIN".to_string(),
14081 f.args,
14082 ))))
14083 }
14084 DialectType::TSQL => Ok(Expression::Function(Box::new(
14085 Function::new("STRING_AGG".to_string(), f.args),
14086 ))),
14087 _ => Ok(Expression::Function(f)),
14088 },
14089 // ARRAY_CONCAT / LIST_CONCAT -> target-specific
14090 "ARRAY_CONCAT" | "LIST_CONCAT" if f.args.len() == 2 => match target {
14091 DialectType::Spark
14092 | DialectType::Databricks
14093 | DialectType::Hive => Ok(Expression::Function(Box::new(
14094 Function::new("CONCAT".to_string(), f.args),
14095 ))),
14096 DialectType::Snowflake => Ok(Expression::Function(Box::new(
14097 Function::new("ARRAY_CAT".to_string(), f.args),
14098 ))),
14099 DialectType::Redshift => Ok(Expression::Function(Box::new(
14100 Function::new("ARRAY_CONCAT".to_string(), f.args),
14101 ))),
14102 DialectType::PostgreSQL => Ok(Expression::Function(Box::new(
14103 Function::new("ARRAY_CAT".to_string(), f.args),
14104 ))),
14105 DialectType::DuckDB => Ok(Expression::Function(Box::new(
14106 Function::new("LIST_CONCAT".to_string(), f.args),
14107 ))),
14108 DialectType::Presto | DialectType::Trino => {
14109 Ok(Expression::Function(Box::new(Function::new(
14110 "CONCAT".to_string(),
14111 f.args,
14112 ))))
14113 }
14114 DialectType::BigQuery => Ok(Expression::Function(Box::new(
14115 Function::new("ARRAY_CONCAT".to_string(), f.args),
14116 ))),
14117 _ => Ok(Expression::Function(f)),
14118 },
14119 // ARRAY_CONTAINS(arr, x) / HAS(arr, x) / CONTAINS(arr, x) normalization
14120 "HAS" if f.args.len() == 2 => match target {
14121 DialectType::Spark
14122 | DialectType::Databricks
14123 | DialectType::Hive => Ok(Expression::Function(Box::new(
14124 Function::new("ARRAY_CONTAINS".to_string(), f.args),
14125 ))),
14126 DialectType::Presto | DialectType::Trino => {
14127 Ok(Expression::Function(Box::new(Function::new(
14128 "CONTAINS".to_string(),
14129 f.args,
14130 ))))
14131 }
14132 _ => Ok(Expression::Function(f)),
14133 },
14134 // NVL(a, b, c, d) -> COALESCE(a, b, c, d) - NVL should keep all args
14135 "NVL" if f.args.len() > 2 => Ok(Expression::Function(Box::new(
14136 Function::new("COALESCE".to_string(), f.args),
14137 ))),
14138 // ISNULL(x) in MySQL -> (x IS NULL)
14139 "ISNULL"
14140 if f.args.len() == 1
14141 && matches!(source, DialectType::MySQL)
14142 && matches!(target, DialectType::MySQL) =>
14143 {
14144 let arg = f.args.into_iter().next().unwrap();
14145 Ok(Expression::Paren(Box::new(crate::expressions::Paren {
14146 this: Expression::IsNull(Box::new(
14147 crate::expressions::IsNull {
14148 this: arg,
14149 not: false,
14150 postfix_form: false,
14151 },
14152 )),
14153 trailing_comments: Vec::new(),
14154 })))
14155 }
14156 // MONTHNAME(x) -> DATE_FORMAT(x, '%M') for MySQL -> MySQL
14157 "MONTHNAME"
14158 if f.args.len() == 1 && matches!(target, DialectType::MySQL) =>
14159 {
14160 let arg = f.args.into_iter().next().unwrap();
14161 Ok(Expression::Function(Box::new(Function::new(
14162 "DATE_FORMAT".to_string(),
14163 vec![arg, Expression::string("%M")],
14164 ))))
14165 }
14166 // ClickHouse splitByString('s', x) -> DuckDB STR_SPLIT(x, 's') / Hive SPLIT(x, CONCAT('\\Q', 's', '\\E'))
14167 "SPLITBYSTRING" if f.args.len() == 2 => {
14168 let sep = f.args[0].clone();
14169 let str_arg = f.args[1].clone();
14170 match target {
14171 DialectType::DuckDB => Ok(Expression::Function(Box::new(
14172 Function::new("STR_SPLIT".to_string(), vec![str_arg, sep]),
14173 ))),
14174 DialectType::Doris => {
14175 Ok(Expression::Function(Box::new(Function::new(
14176 "SPLIT_BY_STRING".to_string(),
14177 vec![str_arg, sep],
14178 ))))
14179 }
14180 DialectType::Hive
14181 | DialectType::Spark
14182 | DialectType::Databricks => {
14183 // SPLIT(x, CONCAT('\\Q', sep, '\\E'))
14184 let escaped =
14185 Expression::Function(Box::new(Function::new(
14186 "CONCAT".to_string(),
14187 vec![
14188 Expression::string("\\Q"),
14189 sep,
14190 Expression::string("\\E"),
14191 ],
14192 )));
14193 Ok(Expression::Function(Box::new(Function::new(
14194 "SPLIT".to_string(),
14195 vec![str_arg, escaped],
14196 ))))
14197 }
14198 _ => Ok(Expression::Function(f)),
14199 }
14200 }
14201 // ClickHouse splitByRegexp('pattern', x) -> DuckDB STR_SPLIT_REGEX(x, 'pattern')
14202 "SPLITBYREGEXP" if f.args.len() == 2 => {
14203 let sep = f.args[0].clone();
14204 let str_arg = f.args[1].clone();
14205 match target {
14206 DialectType::DuckDB => {
14207 Ok(Expression::Function(Box::new(Function::new(
14208 "STR_SPLIT_REGEX".to_string(),
14209 vec![str_arg, sep],
14210 ))))
14211 }
14212 DialectType::Hive
14213 | DialectType::Spark
14214 | DialectType::Databricks => {
14215 Ok(Expression::Function(Box::new(Function::new(
14216 "SPLIT".to_string(),
14217 vec![str_arg, sep],
14218 ))))
14219 }
14220 _ => Ok(Expression::Function(f)),
14221 }
14222 }
14223 // ClickHouse toMonday(x) -> DATE_TRUNC('WEEK', x) / DATE_TRUNC(x, 'WEEK') for Doris
14224 "TOMONDAY" => {
14225 if f.args.len() == 1 {
14226 let arg = f.args.into_iter().next().unwrap();
14227 match target {
14228 DialectType::Doris => {
14229 Ok(Expression::Function(Box::new(Function::new(
14230 "DATE_TRUNC".to_string(),
14231 vec![arg, Expression::string("WEEK")],
14232 ))))
14233 }
14234 _ => Ok(Expression::Function(Box::new(Function::new(
14235 "DATE_TRUNC".to_string(),
14236 vec![Expression::string("WEEK"), arg],
14237 )))),
14238 }
14239 } else {
14240 Ok(Expression::Function(f))
14241 }
14242 }
14243 // COLLECT_LIST with FILTER(WHERE x IS NOT NULL) for targets that need it
14244 "COLLECT_LIST" if f.args.len() == 1 => match target {
14245 DialectType::Spark
14246 | DialectType::Databricks
14247 | DialectType::Hive => Ok(Expression::Function(f)),
14248 _ => Ok(Expression::Function(Box::new(Function::new(
14249 "ARRAY_AGG".to_string(),
14250 f.args,
14251 )))),
14252 },
14253 // TO_CHAR(x) with 1 arg -> CAST(x AS STRING) for Doris
14254 "TO_CHAR"
14255 if f.args.len() == 1 && matches!(target, DialectType::Doris) =>
14256 {
14257 let arg = f.args.into_iter().next().unwrap();
14258 Ok(Expression::Cast(Box::new(crate::expressions::Cast {
14259 this: arg,
14260 to: DataType::Custom {
14261 name: "STRING".to_string(),
14262 },
14263 double_colon_syntax: false,
14264 trailing_comments: Vec::new(),
14265 format: None,
14266 default: None,
14267 inferred_type: None,
14268 })))
14269 }
14270 // DBMS_RANDOM.VALUE() -> RANDOM() for PostgreSQL
14271 "DBMS_RANDOM.VALUE" if f.args.is_empty() => match target {
14272 DialectType::PostgreSQL => Ok(Expression::Function(Box::new(
14273 Function::new("RANDOM".to_string(), vec![]),
14274 ))),
14275 _ => Ok(Expression::Function(f)),
14276 },
14277 // ClickHouse formatDateTime -> target-specific
14278 "FORMATDATETIME" if f.args.len() >= 2 => match target {
14279 DialectType::MySQL => Ok(Expression::Function(Box::new(
14280 Function::new("DATE_FORMAT".to_string(), f.args),
14281 ))),
14282 _ => Ok(Expression::Function(f)),
14283 },
14284 // REPLICATE('x', n) -> REPEAT('x', n) for non-TSQL targets
14285 "REPLICATE" if f.args.len() == 2 => match target {
14286 DialectType::TSQL => Ok(Expression::Function(f)),
14287 _ => Ok(Expression::Function(Box::new(Function::new(
14288 "REPEAT".to_string(),
14289 f.args,
14290 )))),
14291 },
14292 // LEN(x) -> LENGTH(x) for non-TSQL targets
14293 // No CAST needed when arg is already a string literal
14294 "LEN" if f.args.len() == 1 => {
14295 match target {
14296 DialectType::TSQL => Ok(Expression::Function(f)),
14297 DialectType::Spark | DialectType::Databricks => {
14298 let arg = f.args.into_iter().next().unwrap();
14299 // Don't wrap string literals with CAST - they're already strings
14300 let is_string = matches!(
14301 &arg,
14302 Expression::Literal(lit) if matches!(lit.as_ref(), crate::expressions::Literal::String(_))
14303 );
14304 let final_arg = if is_string {
14305 arg
14306 } else {
14307 Expression::Cast(Box::new(Cast {
14308 this: arg,
14309 to: DataType::VarChar {
14310 length: None,
14311 parenthesized_length: false,
14312 },
14313 double_colon_syntax: false,
14314 trailing_comments: Vec::new(),
14315 format: None,
14316 default: None,
14317 inferred_type: None,
14318 }))
14319 };
14320 Ok(Expression::Function(Box::new(Function::new(
14321 "LENGTH".to_string(),
14322 vec![final_arg],
14323 ))))
14324 }
14325 _ => {
14326 let arg = f.args.into_iter().next().unwrap();
14327 Ok(Expression::Function(Box::new(Function::new(
14328 "LENGTH".to_string(),
14329 vec![arg],
14330 ))))
14331 }
14332 }
14333 }
14334 // COUNT_BIG(x) -> COUNT(x) for non-TSQL targets
14335 "COUNT_BIG" if f.args.len() == 1 => match target {
14336 DialectType::TSQL => Ok(Expression::Function(f)),
14337 _ => Ok(Expression::Function(Box::new(Function::new(
14338 "COUNT".to_string(),
14339 f.args,
14340 )))),
14341 },
14342 // DATEFROMPARTS(y, m, d) -> MAKE_DATE(y, m, d) for non-TSQL targets
14343 "DATEFROMPARTS" if f.args.len() == 3 => match target {
14344 DialectType::TSQL => Ok(Expression::Function(f)),
14345 _ => Ok(Expression::Function(Box::new(Function::new(
14346 "MAKE_DATE".to_string(),
14347 f.args,
14348 )))),
14349 },
14350 // REGEXP_LIKE(str, pattern) -> RegexpLike expression (target-specific output)
14351 "REGEXP_LIKE" if f.args.len() >= 2 => {
14352 let str_expr = f.args[0].clone();
14353 let pattern = f.args[1].clone();
14354 let flags = if f.args.len() >= 3 {
14355 Some(f.args[2].clone())
14356 } else {
14357 None
14358 };
14359 match target {
14360 DialectType::DuckDB => {
14361 let mut new_args = vec![str_expr, pattern];
14362 if let Some(fl) = flags {
14363 new_args.push(fl);
14364 }
14365 Ok(Expression::Function(Box::new(Function::new(
14366 "REGEXP_MATCHES".to_string(),
14367 new_args,
14368 ))))
14369 }
14370 _ => Ok(Expression::RegexpLike(Box::new(
14371 crate::expressions::RegexpFunc {
14372 this: str_expr,
14373 pattern,
14374 flags,
14375 },
14376 ))),
14377 }
14378 }
14379 // ClickHouse arrayJoin -> UNNEST for PostgreSQL
14380 "ARRAYJOIN" if f.args.len() == 1 => match target {
14381 DialectType::PostgreSQL => Ok(Expression::Function(Box::new(
14382 Function::new("UNNEST".to_string(), f.args),
14383 ))),
14384 _ => Ok(Expression::Function(f)),
14385 },
14386 // DATETIMEFROMPARTS(y, m, d, h, mi, s, ms) -> MAKE_TIMESTAMP / TIMESTAMP_FROM_PARTS
14387 "DATETIMEFROMPARTS" if f.args.len() == 7 => {
14388 match target {
14389 DialectType::TSQL => Ok(Expression::Function(f)),
14390 DialectType::DuckDB => {
14391 // MAKE_TIMESTAMP(y, m, d, h, mi, s + (ms / 1000.0))
14392 let mut args = f.args;
14393 let ms = args.pop().unwrap();
14394 let s = args.pop().unwrap();
14395 // s + (ms / 1000.0)
14396 let ms_frac = Expression::Div(Box::new(BinaryOp::new(
14397 ms,
14398 Expression::Literal(Box::new(
14399 crate::expressions::Literal::Number(
14400 "1000.0".to_string(),
14401 ),
14402 )),
14403 )));
14404 let s_with_ms = Expression::Add(Box::new(BinaryOp::new(
14405 s,
14406 Expression::Paren(Box::new(Paren {
14407 this: ms_frac,
14408 trailing_comments: vec![],
14409 })),
14410 )));
14411 args.push(s_with_ms);
14412 Ok(Expression::Function(Box::new(Function::new(
14413 "MAKE_TIMESTAMP".to_string(),
14414 args,
14415 ))))
14416 }
14417 DialectType::Snowflake => {
14418 // TIMESTAMP_FROM_PARTS(y, m, d, h, mi, s, ms * 1000000)
14419 let mut args = f.args;
14420 let ms = args.pop().unwrap();
14421 // ms * 1000000
14422 let ns = Expression::Mul(Box::new(BinaryOp::new(
14423 ms,
14424 Expression::number(1000000),
14425 )));
14426 args.push(ns);
14427 Ok(Expression::Function(Box::new(Function::new(
14428 "TIMESTAMP_FROM_PARTS".to_string(),
14429 args,
14430 ))))
14431 }
14432 _ => {
14433 // Default: keep function name for other targets
14434 Ok(Expression::Function(Box::new(Function::new(
14435 "DATETIMEFROMPARTS".to_string(),
14436 f.args,
14437 ))))
14438 }
14439 }
14440 }
14441 // CONVERT(type, expr [, style]) -> CAST(expr AS type) for non-TSQL targets
14442 // TRY_CONVERT(type, expr [, style]) -> TRY_CAST(expr AS type) for non-TSQL targets
14443 "CONVERT" | "TRY_CONVERT" if f.args.len() >= 2 => {
14444 let is_try = name == "TRY_CONVERT";
14445 let type_expr = f.args[0].clone();
14446 let value_expr = f.args[1].clone();
14447 let style = if f.args.len() >= 3 {
14448 Some(&f.args[2])
14449 } else {
14450 None
14451 };
14452
14453 // For TSQL->TSQL, normalize types and preserve CONVERT/TRY_CONVERT
14454 if matches!(target, DialectType::TSQL) {
14455 let normalized_type = match &type_expr {
14456 Expression::DataType(dt) => {
14457 let new_dt = match dt {
14458 DataType::Int { .. } => DataType::Custom {
14459 name: "INTEGER".to_string(),
14460 },
14461 _ => dt.clone(),
14462 };
14463 Expression::DataType(new_dt)
14464 }
14465 Expression::Identifier(id) => {
14466 if id.name.eq_ignore_ascii_case("INT") {
14467 Expression::Identifier(
14468 crate::expressions::Identifier::new("INTEGER"),
14469 )
14470 } else {
14471 let upper = id.name.to_ascii_uppercase();
14472 Expression::Identifier(
14473 crate::expressions::Identifier::new(upper),
14474 )
14475 }
14476 }
14477 Expression::Column(col) => {
14478 if col.name.name.eq_ignore_ascii_case("INT") {
14479 Expression::Identifier(
14480 crate::expressions::Identifier::new("INTEGER"),
14481 )
14482 } else {
14483 let upper = col.name.name.to_ascii_uppercase();
14484 Expression::Identifier(
14485 crate::expressions::Identifier::new(upper),
14486 )
14487 }
14488 }
14489 _ => type_expr.clone(),
14490 };
14491 let func_name = if is_try { "TRY_CONVERT" } else { "CONVERT" };
14492 let mut new_args = vec![normalized_type, value_expr];
14493 if let Some(s) = style {
14494 new_args.push(s.clone());
14495 }
14496 return Ok(Expression::Function(Box::new(Function::new(
14497 func_name.to_string(),
14498 new_args,
14499 ))));
14500 }
14501
14502 // For other targets: CONVERT(type, expr) -> CAST(expr AS type)
14503 fn expr_to_datatype(e: &Expression) -> Option<DataType> {
14504 match e {
14505 Expression::DataType(dt) => {
14506 // Convert NVARCHAR/NCHAR Custom types to standard VarChar/Char
14507 match dt {
14508 DataType::Custom { name }
14509 if name.starts_with("NVARCHAR(")
14510 || name.starts_with("NCHAR(") =>
14511 {
14512 // Extract the length from "NVARCHAR(200)" or "NCHAR(40)"
14513 let inner = &name[name.find('(').unwrap() + 1
14514 ..name.len() - 1];
14515 if inner.eq_ignore_ascii_case("MAX") {
14516 Some(DataType::Text)
14517 } else if let Ok(len) = inner.parse::<u32>() {
14518 if name.starts_with("NCHAR") {
14519 Some(DataType::Char {
14520 length: Some(len),
14521 })
14522 } else {
14523 Some(DataType::VarChar {
14524 length: Some(len),
14525 parenthesized_length: false,
14526 })
14527 }
14528 } else {
14529 Some(dt.clone())
14530 }
14531 }
14532 DataType::Custom { name } if name == "NVARCHAR" => {
14533 Some(DataType::VarChar {
14534 length: None,
14535 parenthesized_length: false,
14536 })
14537 }
14538 DataType::Custom { name } if name == "NCHAR" => {
14539 Some(DataType::Char { length: None })
14540 }
14541 DataType::Custom { name }
14542 if name == "NVARCHAR(MAX)"
14543 || name == "VARCHAR(MAX)" =>
14544 {
14545 Some(DataType::Text)
14546 }
14547 _ => Some(dt.clone()),
14548 }
14549 }
14550 Expression::Identifier(id) => {
14551 let name = id.name.to_ascii_uppercase();
14552 match name.as_str() {
14553 "INT" | "INTEGER" => Some(DataType::Int {
14554 length: None,
14555 integer_spelling: false,
14556 }),
14557 "BIGINT" => Some(DataType::BigInt { length: None }),
14558 "SMALLINT" => {
14559 Some(DataType::SmallInt { length: None })
14560 }
14561 "TINYINT" => {
14562 Some(DataType::TinyInt { length: None })
14563 }
14564 "FLOAT" => Some(DataType::Float {
14565 precision: None,
14566 scale: None,
14567 real_spelling: false,
14568 }),
14569 "REAL" => Some(DataType::Float {
14570 precision: None,
14571 scale: None,
14572 real_spelling: true,
14573 }),
14574 "DATETIME" | "DATETIME2" => {
14575 Some(DataType::Timestamp {
14576 timezone: false,
14577 precision: None,
14578 })
14579 }
14580 "DATE" => Some(DataType::Date),
14581 "BIT" => Some(DataType::Boolean),
14582 "TEXT" => Some(DataType::Text),
14583 "NUMERIC" => Some(DataType::Decimal {
14584 precision: None,
14585 scale: None,
14586 }),
14587 "MONEY" => Some(DataType::Decimal {
14588 precision: Some(15),
14589 scale: Some(4),
14590 }),
14591 "SMALLMONEY" => Some(DataType::Decimal {
14592 precision: Some(6),
14593 scale: Some(4),
14594 }),
14595 "VARCHAR" => Some(DataType::VarChar {
14596 length: None,
14597 parenthesized_length: false,
14598 }),
14599 "NVARCHAR" => Some(DataType::VarChar {
14600 length: None,
14601 parenthesized_length: false,
14602 }),
14603 "CHAR" => Some(DataType::Char { length: None }),
14604 "NCHAR" => Some(DataType::Char { length: None }),
14605 _ => Some(DataType::Custom { name }),
14606 }
14607 }
14608 Expression::Column(col) => {
14609 let name = col.name.name.to_ascii_uppercase();
14610 match name.as_str() {
14611 "INT" | "INTEGER" => Some(DataType::Int {
14612 length: None,
14613 integer_spelling: false,
14614 }),
14615 "BIGINT" => Some(DataType::BigInt { length: None }),
14616 "FLOAT" => Some(DataType::Float {
14617 precision: None,
14618 scale: None,
14619 real_spelling: false,
14620 }),
14621 "DATETIME" | "DATETIME2" => {
14622 Some(DataType::Timestamp {
14623 timezone: false,
14624 precision: None,
14625 })
14626 }
14627 "DATE" => Some(DataType::Date),
14628 "NUMERIC" => Some(DataType::Decimal {
14629 precision: None,
14630 scale: None,
14631 }),
14632 "VARCHAR" => Some(DataType::VarChar {
14633 length: None,
14634 parenthesized_length: false,
14635 }),
14636 "NVARCHAR" => Some(DataType::VarChar {
14637 length: None,
14638 parenthesized_length: false,
14639 }),
14640 "CHAR" => Some(DataType::Char { length: None }),
14641 "NCHAR" => Some(DataType::Char { length: None }),
14642 _ => Some(DataType::Custom { name }),
14643 }
14644 }
14645 // NVARCHAR(200) parsed as Function("NVARCHAR", [200])
14646 Expression::Function(f) => {
14647 let fname = f.name.to_ascii_uppercase();
14648 match fname.as_str() {
14649 "VARCHAR" | "NVARCHAR" => {
14650 let len = f.args.first().and_then(|a| {
14651 if let Expression::Literal(lit) = a
14652 {
14653 if let crate::expressions::Literal::Number(n) = lit.as_ref() {
14654 n.parse::<u32>().ok()
14655 } else { None }
14656 } else if let Expression::Identifier(id) = a
14657 {
14658 if id.name.eq_ignore_ascii_case("MAX") {
14659 None
14660 } else {
14661 None
14662 }
14663 } else {
14664 None
14665 }
14666 });
14667 // Check for VARCHAR(MAX) -> TEXT
14668 let is_max = f.args.first().map_or(false, |a| {
14669 matches!(a, Expression::Identifier(id) if id.name.eq_ignore_ascii_case("MAX"))
14670 || matches!(a, Expression::Column(col) if col.name.name.eq_ignore_ascii_case("MAX"))
14671 });
14672 if is_max {
14673 Some(DataType::Text)
14674 } else {
14675 Some(DataType::VarChar {
14676 length: len,
14677 parenthesized_length: false,
14678 })
14679 }
14680 }
14681 "NCHAR" | "CHAR" => {
14682 let len = f.args.first().and_then(|a| {
14683 if let Expression::Literal(lit) = a
14684 {
14685 if let crate::expressions::Literal::Number(n) = lit.as_ref() {
14686 n.parse::<u32>().ok()
14687 } else { None }
14688 } else {
14689 None
14690 }
14691 });
14692 Some(DataType::Char { length: len })
14693 }
14694 "NUMERIC" | "DECIMAL" => {
14695 let precision = f.args.first().and_then(|a| {
14696 if let Expression::Literal(lit) = a
14697 {
14698 if let crate::expressions::Literal::Number(n) = lit.as_ref() {
14699 n.parse::<u32>().ok()
14700 } else { None }
14701 } else {
14702 None
14703 }
14704 });
14705 let scale = f.args.get(1).and_then(|a| {
14706 if let Expression::Literal(lit) = a
14707 {
14708 if let crate::expressions::Literal::Number(n) = lit.as_ref() {
14709 n.parse::<u32>().ok()
14710 } else { None }
14711 } else {
14712 None
14713 }
14714 });
14715 Some(DataType::Decimal { precision, scale })
14716 }
14717 _ => None,
14718 }
14719 }
14720 _ => None,
14721 }
14722 }
14723
14724 if let Some(mut dt) = expr_to_datatype(&type_expr) {
14725 // For TSQL source: VARCHAR/CHAR without length defaults to 30
14726 let is_tsql_source =
14727 matches!(source, DialectType::TSQL | DialectType::Fabric);
14728 if is_tsql_source {
14729 match &dt {
14730 DataType::VarChar { length: None, .. } => {
14731 dt = DataType::VarChar {
14732 length: Some(30),
14733 parenthesized_length: false,
14734 };
14735 }
14736 DataType::Char { length: None } => {
14737 dt = DataType::Char { length: Some(30) };
14738 }
14739 _ => {}
14740 }
14741 }
14742
14743 // Determine if this is a string type
14744 let is_string_type = matches!(
14745 dt,
14746 DataType::VarChar { .. }
14747 | DataType::Char { .. }
14748 | DataType::Text
14749 ) || matches!(&dt, DataType::Custom { name } if name == "NVARCHAR" || name == "NCHAR"
14750 || name.starts_with("NVARCHAR(") || name.starts_with("NCHAR(")
14751 || name.starts_with("VARCHAR(") || name == "VARCHAR"
14752 || name == "STRING");
14753
14754 // Determine if this is a date/time type
14755 let is_datetime_type = matches!(
14756 dt,
14757 DataType::Timestamp { .. } | DataType::Date
14758 ) || matches!(&dt, DataType::Custom { name } if name == "DATETIME"
14759 || name == "DATETIME2" || name == "SMALLDATETIME");
14760
14761 // Check for date conversion with style
14762 if style.is_some() {
14763 let style_num = style.and_then(|s| {
14764 if let Expression::Literal(lit) = s {
14765 if let crate::expressions::Literal::Number(n) =
14766 lit.as_ref()
14767 {
14768 n.parse::<u32>().ok()
14769 } else {
14770 None
14771 }
14772 } else {
14773 None
14774 }
14775 });
14776
14777 // TSQL CONVERT date styles (Java format)
14778 let format_str = style_num.and_then(|n| match n {
14779 101 => Some("MM/dd/yyyy"),
14780 102 => Some("yyyy.MM.dd"),
14781 103 => Some("dd/MM/yyyy"),
14782 104 => Some("dd.MM.yyyy"),
14783 105 => Some("dd-MM-yyyy"),
14784 108 => Some("HH:mm:ss"),
14785 110 => Some("MM-dd-yyyy"),
14786 112 => Some("yyyyMMdd"),
14787 120 | 20 => Some("yyyy-MM-dd HH:mm:ss"),
14788 121 | 21 => Some("yyyy-MM-dd HH:mm:ss.SSSSSS"),
14789 126 | 127 => Some("yyyy-MM-dd'T'HH:mm:ss.SSS"),
14790 _ => None,
14791 });
14792
14793 // Non-string, non-datetime types with style: just CAST, ignore the style
14794 if !is_string_type && !is_datetime_type {
14795 let cast_expr = if is_try {
14796 Expression::TryCast(Box::new(
14797 crate::expressions::Cast {
14798 this: value_expr,
14799 to: dt,
14800 trailing_comments: Vec::new(),
14801 double_colon_syntax: false,
14802 format: None,
14803 default: None,
14804 inferred_type: None,
14805 },
14806 ))
14807 } else {
14808 Expression::Cast(Box::new(
14809 crate::expressions::Cast {
14810 this: value_expr,
14811 to: dt,
14812 trailing_comments: Vec::new(),
14813 double_colon_syntax: false,
14814 format: None,
14815 default: None,
14816 inferred_type: None,
14817 },
14818 ))
14819 };
14820 return Ok(cast_expr);
14821 }
14822
14823 if let Some(java_fmt) = format_str {
14824 let c_fmt = java_fmt
14825 .replace("yyyy", "%Y")
14826 .replace("MM", "%m")
14827 .replace("dd", "%d")
14828 .replace("HH", "%H")
14829 .replace("mm", "%M")
14830 .replace("ss", "%S")
14831 .replace("SSSSSS", "%f")
14832 .replace("SSS", "%f")
14833 .replace("'T'", "T");
14834
14835 // For datetime target types: style is the INPUT format for parsing strings -> dates
14836 if is_datetime_type {
14837 match target {
14838 DialectType::DuckDB => {
14839 return Ok(Expression::Function(Box::new(
14840 Function::new(
14841 "STRPTIME".to_string(),
14842 vec![
14843 value_expr,
14844 Expression::string(&c_fmt),
14845 ],
14846 ),
14847 )));
14848 }
14849 DialectType::Spark
14850 | DialectType::Databricks => {
14851 // CONVERT(DATETIME, x, style) -> TO_TIMESTAMP(x, fmt)
14852 // CONVERT(DATE, x, style) -> TO_DATE(x, fmt)
14853 let func_name =
14854 if matches!(dt, DataType::Date) {
14855 "TO_DATE"
14856 } else {
14857 "TO_TIMESTAMP"
14858 };
14859 return Ok(Expression::Function(Box::new(
14860 Function::new(
14861 func_name.to_string(),
14862 vec![
14863 value_expr,
14864 Expression::string(java_fmt),
14865 ],
14866 ),
14867 )));
14868 }
14869 DialectType::Hive => {
14870 return Ok(Expression::Function(Box::new(
14871 Function::new(
14872 "TO_TIMESTAMP".to_string(),
14873 vec![
14874 value_expr,
14875 Expression::string(java_fmt),
14876 ],
14877 ),
14878 )));
14879 }
14880 _ => {
14881 return Ok(Expression::Cast(Box::new(
14882 crate::expressions::Cast {
14883 this: value_expr,
14884 to: dt,
14885 trailing_comments: Vec::new(),
14886 double_colon_syntax: false,
14887 format: None,
14888 default: None,
14889 inferred_type: None,
14890 },
14891 )));
14892 }
14893 }
14894 }
14895
14896 // For string target types: style is the OUTPUT format for dates -> strings
14897 match target {
14898 DialectType::DuckDB => Ok(Expression::Function(
14899 Box::new(Function::new(
14900 "STRPTIME".to_string(),
14901 vec![
14902 value_expr,
14903 Expression::string(&c_fmt),
14904 ],
14905 )),
14906 )),
14907 DialectType::Spark | DialectType::Databricks => {
14908 // For string target types with style: CAST(DATE_FORMAT(x, fmt) AS type)
14909 // Determine the target string type
14910 let string_dt = match &dt {
14911 DataType::VarChar {
14912 length: Some(l),
14913 ..
14914 } => DataType::VarChar {
14915 length: Some(*l),
14916 parenthesized_length: false,
14917 },
14918 DataType::Text => DataType::Custom {
14919 name: "STRING".to_string(),
14920 },
14921 _ => DataType::Custom {
14922 name: "STRING".to_string(),
14923 },
14924 };
14925 let date_format_expr = Expression::Function(
14926 Box::new(Function::new(
14927 "DATE_FORMAT".to_string(),
14928 vec![
14929 value_expr,
14930 Expression::string(java_fmt),
14931 ],
14932 )),
14933 );
14934 let cast_expr = if is_try {
14935 Expression::TryCast(Box::new(
14936 crate::expressions::Cast {
14937 this: date_format_expr,
14938 to: string_dt,
14939 trailing_comments: Vec::new(),
14940 double_colon_syntax: false,
14941 format: None,
14942 default: None,
14943 inferred_type: None,
14944 },
14945 ))
14946 } else {
14947 Expression::Cast(Box::new(
14948 crate::expressions::Cast {
14949 this: date_format_expr,
14950 to: string_dt,
14951 trailing_comments: Vec::new(),
14952 double_colon_syntax: false,
14953 format: None,
14954 default: None,
14955 inferred_type: None,
14956 },
14957 ))
14958 };
14959 Ok(cast_expr)
14960 }
14961 DialectType::MySQL | DialectType::SingleStore => {
14962 // For MySQL: CAST(DATE_FORMAT(x, mysql_fmt) AS CHAR(n))
14963 let mysql_fmt = java_fmt
14964 .replace("yyyy", "%Y")
14965 .replace("MM", "%m")
14966 .replace("dd", "%d")
14967 .replace("HH:mm:ss.SSSSSS", "%T")
14968 .replace("HH:mm:ss", "%T")
14969 .replace("HH", "%H")
14970 .replace("mm", "%i")
14971 .replace("ss", "%S");
14972 let date_format_expr = Expression::Function(
14973 Box::new(Function::new(
14974 "DATE_FORMAT".to_string(),
14975 vec![
14976 value_expr,
14977 Expression::string(&mysql_fmt),
14978 ],
14979 )),
14980 );
14981 // MySQL uses CHAR for string casts
14982 let mysql_dt = match &dt {
14983 DataType::VarChar { length, .. } => {
14984 DataType::Char { length: *length }
14985 }
14986 _ => dt,
14987 };
14988 Ok(Expression::Cast(Box::new(
14989 crate::expressions::Cast {
14990 this: date_format_expr,
14991 to: mysql_dt,
14992 trailing_comments: Vec::new(),
14993 double_colon_syntax: false,
14994 format: None,
14995 default: None,
14996 inferred_type: None,
14997 },
14998 )))
14999 }
15000 DialectType::Hive => {
15001 let func_name = "TO_TIMESTAMP";
15002 Ok(Expression::Function(Box::new(
15003 Function::new(
15004 func_name.to_string(),
15005 vec![
15006 value_expr,
15007 Expression::string(java_fmt),
15008 ],
15009 ),
15010 )))
15011 }
15012 _ => Ok(Expression::Cast(Box::new(
15013 crate::expressions::Cast {
15014 this: value_expr,
15015 to: dt,
15016 trailing_comments: Vec::new(),
15017 double_colon_syntax: false,
15018 format: None,
15019 default: None,
15020 inferred_type: None,
15021 },
15022 ))),
15023 }
15024 } else {
15025 // Unknown style, just CAST
15026 let cast_expr = if is_try {
15027 Expression::TryCast(Box::new(
15028 crate::expressions::Cast {
15029 this: value_expr,
15030 to: dt,
15031 trailing_comments: Vec::new(),
15032 double_colon_syntax: false,
15033 format: None,
15034 default: None,
15035 inferred_type: None,
15036 },
15037 ))
15038 } else {
15039 Expression::Cast(Box::new(
15040 crate::expressions::Cast {
15041 this: value_expr,
15042 to: dt,
15043 trailing_comments: Vec::new(),
15044 double_colon_syntax: false,
15045 format: None,
15046 default: None,
15047 inferred_type: None,
15048 },
15049 ))
15050 };
15051 Ok(cast_expr)
15052 }
15053 } else {
15054 // No style - simple CAST
15055 let final_dt = if matches!(
15056 target,
15057 DialectType::MySQL | DialectType::SingleStore
15058 ) {
15059 match &dt {
15060 DataType::Int { .. }
15061 | DataType::BigInt { .. }
15062 | DataType::SmallInt { .. }
15063 | DataType::TinyInt { .. } => DataType::Custom {
15064 name: "SIGNED".to_string(),
15065 },
15066 DataType::VarChar { length, .. } => {
15067 DataType::Char { length: *length }
15068 }
15069 _ => dt,
15070 }
15071 } else {
15072 dt
15073 };
15074 let cast_expr = if is_try {
15075 Expression::TryCast(Box::new(
15076 crate::expressions::Cast {
15077 this: value_expr,
15078 to: final_dt,
15079 trailing_comments: Vec::new(),
15080 double_colon_syntax: false,
15081 format: None,
15082 default: None,
15083 inferred_type: None,
15084 },
15085 ))
15086 } else {
15087 Expression::Cast(Box::new(crate::expressions::Cast {
15088 this: value_expr,
15089 to: final_dt,
15090 trailing_comments: Vec::new(),
15091 double_colon_syntax: false,
15092 format: None,
15093 default: None,
15094 inferred_type: None,
15095 }))
15096 };
15097 Ok(cast_expr)
15098 }
15099 } else {
15100 // Can't convert type expression - keep as CONVERT/TRY_CONVERT function
15101 Ok(Expression::Function(f))
15102 }
15103 }
15104 // STRFTIME(val, fmt) from DuckDB / STRFTIME(fmt, val) from SQLite -> target-specific
15105 "STRFTIME" if f.args.len() == 2 => {
15106 // SQLite uses STRFTIME(fmt, val); DuckDB uses STRFTIME(val, fmt)
15107 let (val, fmt_expr) = if matches!(source, DialectType::SQLite) {
15108 // SQLite: args[0] = format, args[1] = value
15109 (f.args[1].clone(), &f.args[0])
15110 } else {
15111 // DuckDB and others: args[0] = value, args[1] = format
15112 (f.args[0].clone(), &f.args[1])
15113 };
15114
15115 // Helper to convert C-style format to Java-style
15116 fn c_to_java_format(fmt: &str) -> String {
15117 fmt.replace("%Y", "yyyy")
15118 .replace("%m", "MM")
15119 .replace("%d", "dd")
15120 .replace("%H", "HH")
15121 .replace("%M", "mm")
15122 .replace("%S", "ss")
15123 .replace("%f", "SSSSSS")
15124 .replace("%y", "yy")
15125 .replace("%-m", "M")
15126 .replace("%-d", "d")
15127 .replace("%-H", "H")
15128 .replace("%-I", "h")
15129 .replace("%I", "hh")
15130 .replace("%p", "a")
15131 .replace("%j", "DDD")
15132 .replace("%a", "EEE")
15133 .replace("%b", "MMM")
15134 .replace("%F", "yyyy-MM-dd")
15135 .replace("%T", "HH:mm:ss")
15136 }
15137
15138 // Helper: recursively convert format strings within expressions (handles CONCAT)
15139 fn convert_fmt_expr(
15140 expr: &Expression,
15141 converter: &dyn Fn(&str) -> String,
15142 ) -> Expression {
15143 match expr {
15144 Expression::Literal(lit)
15145 if matches!(
15146 lit.as_ref(),
15147 crate::expressions::Literal::String(_)
15148 ) =>
15149 {
15150 let crate::expressions::Literal::String(s) =
15151 lit.as_ref()
15152 else {
15153 unreachable!()
15154 };
15155 Expression::string(&converter(s))
15156 }
15157 Expression::Function(func)
15158 if func.name.eq_ignore_ascii_case("CONCAT") =>
15159 {
15160 let new_args: Vec<Expression> = func
15161 .args
15162 .iter()
15163 .map(|a| convert_fmt_expr(a, converter))
15164 .collect();
15165 Expression::Function(Box::new(Function::new(
15166 "CONCAT".to_string(),
15167 new_args,
15168 )))
15169 }
15170 other => other.clone(),
15171 }
15172 }
15173
15174 match target {
15175 DialectType::DuckDB => {
15176 if matches!(source, DialectType::SQLite) {
15177 // SQLite STRFTIME(fmt, val) -> DuckDB STRFTIME(CAST(val AS TIMESTAMP), fmt)
15178 let cast_val = Expression::Cast(Box::new(Cast {
15179 this: val,
15180 to: crate::expressions::DataType::Timestamp {
15181 precision: None,
15182 timezone: false,
15183 },
15184 trailing_comments: Vec::new(),
15185 double_colon_syntax: false,
15186 format: None,
15187 default: None,
15188 inferred_type: None,
15189 }));
15190 Ok(Expression::Function(Box::new(Function::new(
15191 "STRFTIME".to_string(),
15192 vec![cast_val, fmt_expr.clone()],
15193 ))))
15194 } else {
15195 Ok(Expression::Function(f))
15196 }
15197 }
15198 DialectType::Spark
15199 | DialectType::Databricks
15200 | DialectType::Hive => {
15201 // STRFTIME(val, fmt) -> DATE_FORMAT(val, java_fmt)
15202 let converted_fmt =
15203 convert_fmt_expr(fmt_expr, &c_to_java_format);
15204 Ok(Expression::Function(Box::new(Function::new(
15205 "DATE_FORMAT".to_string(),
15206 vec![val, converted_fmt],
15207 ))))
15208 }
15209 DialectType::TSQL | DialectType::Fabric => {
15210 // STRFTIME(val, fmt) -> FORMAT(val, java_fmt)
15211 let converted_fmt =
15212 convert_fmt_expr(fmt_expr, &c_to_java_format);
15213 Ok(Expression::Function(Box::new(Function::new(
15214 "FORMAT".to_string(),
15215 vec![val, converted_fmt],
15216 ))))
15217 }
15218 DialectType::Presto
15219 | DialectType::Trino
15220 | DialectType::Athena => {
15221 // STRFTIME(val, fmt) -> DATE_FORMAT(val, presto_fmt) (convert DuckDB format to Presto)
15222 if let Expression::Literal(lit) = fmt_expr {
15223 if let crate::expressions::Literal::String(s) =
15224 lit.as_ref()
15225 {
15226 let presto_fmt = duckdb_to_presto_format(s);
15227 Ok(Expression::Function(Box::new(Function::new(
15228 "DATE_FORMAT".to_string(),
15229 vec![val, Expression::string(&presto_fmt)],
15230 ))))
15231 } else {
15232 Ok(Expression::Function(Box::new(Function::new(
15233 "DATE_FORMAT".to_string(),
15234 vec![val, fmt_expr.clone()],
15235 ))))
15236 }
15237 } else {
15238 Ok(Expression::Function(Box::new(Function::new(
15239 "DATE_FORMAT".to_string(),
15240 vec![val, fmt_expr.clone()],
15241 ))))
15242 }
15243 }
15244 DialectType::BigQuery => {
15245 // STRFTIME(val, fmt) -> FORMAT_DATE(bq_fmt, val) - note reversed arg order
15246 if let Expression::Literal(lit) = fmt_expr {
15247 if let crate::expressions::Literal::String(s) =
15248 lit.as_ref()
15249 {
15250 let bq_fmt = duckdb_to_bigquery_format(s);
15251 Ok(Expression::Function(Box::new(Function::new(
15252 "FORMAT_DATE".to_string(),
15253 vec![Expression::string(&bq_fmt), val],
15254 ))))
15255 } else {
15256 Ok(Expression::Function(Box::new(Function::new(
15257 "FORMAT_DATE".to_string(),
15258 vec![fmt_expr.clone(), val],
15259 ))))
15260 }
15261 } else {
15262 Ok(Expression::Function(Box::new(Function::new(
15263 "FORMAT_DATE".to_string(),
15264 vec![fmt_expr.clone(), val],
15265 ))))
15266 }
15267 }
15268 DialectType::PostgreSQL | DialectType::Redshift => {
15269 // STRFTIME(val, fmt) -> TO_CHAR(val, pg_fmt)
15270 if let Expression::Literal(lit) = fmt_expr {
15271 if let crate::expressions::Literal::String(s) =
15272 lit.as_ref()
15273 {
15274 let pg_fmt = s
15275 .replace("%Y", "YYYY")
15276 .replace("%m", "MM")
15277 .replace("%d", "DD")
15278 .replace("%H", "HH24")
15279 .replace("%M", "MI")
15280 .replace("%S", "SS")
15281 .replace("%y", "YY")
15282 .replace("%-m", "FMMM")
15283 .replace("%-d", "FMDD")
15284 .replace("%-H", "FMHH24")
15285 .replace("%-I", "FMHH12")
15286 .replace("%p", "AM")
15287 .replace("%F", "YYYY-MM-DD")
15288 .replace("%T", "HH24:MI:SS");
15289 Ok(Expression::Function(Box::new(Function::new(
15290 "TO_CHAR".to_string(),
15291 vec![val, Expression::string(&pg_fmt)],
15292 ))))
15293 } else {
15294 Ok(Expression::Function(Box::new(Function::new(
15295 "TO_CHAR".to_string(),
15296 vec![val, fmt_expr.clone()],
15297 ))))
15298 }
15299 } else {
15300 Ok(Expression::Function(Box::new(Function::new(
15301 "TO_CHAR".to_string(),
15302 vec![val, fmt_expr.clone()],
15303 ))))
15304 }
15305 }
15306 _ => Ok(Expression::Function(f)),
15307 }
15308 }
15309 // STRPTIME(val, fmt) from DuckDB -> target-specific date parse function
15310 "STRPTIME" if f.args.len() == 2 => {
15311 let val = f.args[0].clone();
15312 let fmt_expr = &f.args[1];
15313
15314 fn c_to_java_format_parse(fmt: &str) -> String {
15315 fmt.replace("%Y", "yyyy")
15316 .replace("%m", "MM")
15317 .replace("%d", "dd")
15318 .replace("%H", "HH")
15319 .replace("%M", "mm")
15320 .replace("%S", "ss")
15321 .replace("%f", "SSSSSS")
15322 .replace("%y", "yy")
15323 .replace("%-m", "M")
15324 .replace("%-d", "d")
15325 .replace("%-H", "H")
15326 .replace("%-I", "h")
15327 .replace("%I", "hh")
15328 .replace("%p", "a")
15329 .replace("%F", "yyyy-MM-dd")
15330 .replace("%T", "HH:mm:ss")
15331 }
15332
15333 match target {
15334 DialectType::DuckDB => Ok(Expression::Function(f)),
15335 DialectType::Spark | DialectType::Databricks => {
15336 // STRPTIME(val, fmt) -> TO_TIMESTAMP(val, java_fmt)
15337 if let Expression::Literal(lit) = fmt_expr {
15338 if let crate::expressions::Literal::String(s) =
15339 lit.as_ref()
15340 {
15341 let java_fmt = c_to_java_format_parse(s);
15342 Ok(Expression::Function(Box::new(Function::new(
15343 "TO_TIMESTAMP".to_string(),
15344 vec![val, Expression::string(&java_fmt)],
15345 ))))
15346 } else {
15347 Ok(Expression::Function(Box::new(Function::new(
15348 "TO_TIMESTAMP".to_string(),
15349 vec![val, fmt_expr.clone()],
15350 ))))
15351 }
15352 } else {
15353 Ok(Expression::Function(Box::new(Function::new(
15354 "TO_TIMESTAMP".to_string(),
15355 vec![val, fmt_expr.clone()],
15356 ))))
15357 }
15358 }
15359 DialectType::Hive => {
15360 // STRPTIME(val, fmt) -> CAST(FROM_UNIXTIME(UNIX_TIMESTAMP(val, java_fmt)) AS TIMESTAMP)
15361 if let Expression::Literal(lit) = fmt_expr {
15362 if let crate::expressions::Literal::String(s) =
15363 lit.as_ref()
15364 {
15365 let java_fmt = c_to_java_format_parse(s);
15366 let unix_ts =
15367 Expression::Function(Box::new(Function::new(
15368 "UNIX_TIMESTAMP".to_string(),
15369 vec![val, Expression::string(&java_fmt)],
15370 )));
15371 let from_unix =
15372 Expression::Function(Box::new(Function::new(
15373 "FROM_UNIXTIME".to_string(),
15374 vec![unix_ts],
15375 )));
15376 Ok(Expression::Cast(Box::new(
15377 crate::expressions::Cast {
15378 this: from_unix,
15379 to: DataType::Timestamp {
15380 timezone: false,
15381 precision: None,
15382 },
15383 trailing_comments: Vec::new(),
15384 double_colon_syntax: false,
15385 format: None,
15386 default: None,
15387 inferred_type: None,
15388 },
15389 )))
15390 } else {
15391 Ok(Expression::Function(f))
15392 }
15393 } else {
15394 Ok(Expression::Function(f))
15395 }
15396 }
15397 DialectType::Presto
15398 | DialectType::Trino
15399 | DialectType::Athena => {
15400 // STRPTIME(val, fmt) -> DATE_PARSE(val, presto_fmt) (convert DuckDB format to Presto)
15401 if let Expression::Literal(lit) = fmt_expr {
15402 if let crate::expressions::Literal::String(s) =
15403 lit.as_ref()
15404 {
15405 let presto_fmt = duckdb_to_presto_format(s);
15406 Ok(Expression::Function(Box::new(Function::new(
15407 "DATE_PARSE".to_string(),
15408 vec![val, Expression::string(&presto_fmt)],
15409 ))))
15410 } else {
15411 Ok(Expression::Function(Box::new(Function::new(
15412 "DATE_PARSE".to_string(),
15413 vec![val, fmt_expr.clone()],
15414 ))))
15415 }
15416 } else {
15417 Ok(Expression::Function(Box::new(Function::new(
15418 "DATE_PARSE".to_string(),
15419 vec![val, fmt_expr.clone()],
15420 ))))
15421 }
15422 }
15423 DialectType::BigQuery => {
15424 // STRPTIME(val, fmt) -> PARSE_TIMESTAMP(bq_fmt, val) - note reversed arg order
15425 if let Expression::Literal(lit) = fmt_expr {
15426 if let crate::expressions::Literal::String(s) =
15427 lit.as_ref()
15428 {
15429 let bq_fmt = duckdb_to_bigquery_format(s);
15430 Ok(Expression::Function(Box::new(Function::new(
15431 "PARSE_TIMESTAMP".to_string(),
15432 vec![Expression::string(&bq_fmt), val],
15433 ))))
15434 } else {
15435 Ok(Expression::Function(Box::new(Function::new(
15436 "PARSE_TIMESTAMP".to_string(),
15437 vec![fmt_expr.clone(), val],
15438 ))))
15439 }
15440 } else {
15441 Ok(Expression::Function(Box::new(Function::new(
15442 "PARSE_TIMESTAMP".to_string(),
15443 vec![fmt_expr.clone(), val],
15444 ))))
15445 }
15446 }
15447 _ => Ok(Expression::Function(f)),
15448 }
15449 }
15450 // DATE_FORMAT(val, fmt) from Presto source (C-style format) -> target-specific
15451 "DATE_FORMAT"
15452 if f.args.len() >= 2
15453 && matches!(
15454 source,
15455 DialectType::Presto
15456 | DialectType::Trino
15457 | DialectType::Athena
15458 ) =>
15459 {
15460 let val = f.args[0].clone();
15461 let fmt_expr = &f.args[1];
15462
15463 match target {
15464 DialectType::Presto
15465 | DialectType::Trino
15466 | DialectType::Athena => {
15467 // Presto -> Presto: normalize format (e.g., %H:%i:%S -> %T)
15468 if let Expression::Literal(lit) = fmt_expr {
15469 if let crate::expressions::Literal::String(s) =
15470 lit.as_ref()
15471 {
15472 let normalized = crate::dialects::presto::PrestoDialect::normalize_presto_format(s);
15473 Ok(Expression::Function(Box::new(Function::new(
15474 "DATE_FORMAT".to_string(),
15475 vec![val, Expression::string(&normalized)],
15476 ))))
15477 } else {
15478 Ok(Expression::Function(f))
15479 }
15480 } else {
15481 Ok(Expression::Function(f))
15482 }
15483 }
15484 DialectType::Hive
15485 | DialectType::Spark
15486 | DialectType::Databricks => {
15487 // Convert Presto C-style to Java-style format
15488 if let Expression::Literal(lit) = fmt_expr {
15489 if let crate::expressions::Literal::String(s) =
15490 lit.as_ref()
15491 {
15492 let java_fmt = crate::dialects::presto::PrestoDialect::presto_to_java_format(s);
15493 Ok(Expression::Function(Box::new(Function::new(
15494 "DATE_FORMAT".to_string(),
15495 vec![val, Expression::string(&java_fmt)],
15496 ))))
15497 } else {
15498 Ok(Expression::Function(f))
15499 }
15500 } else {
15501 Ok(Expression::Function(f))
15502 }
15503 }
15504 DialectType::DuckDB => {
15505 // Convert to STRFTIME(val, duckdb_fmt)
15506 if let Expression::Literal(lit) = fmt_expr {
15507 if let crate::expressions::Literal::String(s) =
15508 lit.as_ref()
15509 {
15510 let duckdb_fmt = crate::dialects::presto::PrestoDialect::presto_to_duckdb_format(s);
15511 Ok(Expression::Function(Box::new(Function::new(
15512 "STRFTIME".to_string(),
15513 vec![val, Expression::string(&duckdb_fmt)],
15514 ))))
15515 } else {
15516 Ok(Expression::Function(Box::new(Function::new(
15517 "STRFTIME".to_string(),
15518 vec![val, fmt_expr.clone()],
15519 ))))
15520 }
15521 } else {
15522 Ok(Expression::Function(Box::new(Function::new(
15523 "STRFTIME".to_string(),
15524 vec![val, fmt_expr.clone()],
15525 ))))
15526 }
15527 }
15528 DialectType::BigQuery => {
15529 // Convert to FORMAT_DATE(bq_fmt, val) - reversed args
15530 if let Expression::Literal(lit) = fmt_expr {
15531 if let crate::expressions::Literal::String(s) =
15532 lit.as_ref()
15533 {
15534 let bq_fmt = crate::dialects::presto::PrestoDialect::presto_to_bigquery_format(s);
15535 Ok(Expression::Function(Box::new(Function::new(
15536 "FORMAT_DATE".to_string(),
15537 vec![Expression::string(&bq_fmt), val],
15538 ))))
15539 } else {
15540 Ok(Expression::Function(Box::new(Function::new(
15541 "FORMAT_DATE".to_string(),
15542 vec![fmt_expr.clone(), val],
15543 ))))
15544 }
15545 } else {
15546 Ok(Expression::Function(Box::new(Function::new(
15547 "FORMAT_DATE".to_string(),
15548 vec![fmt_expr.clone(), val],
15549 ))))
15550 }
15551 }
15552 _ => Ok(Expression::Function(f)),
15553 }
15554 }
15555 // DATE_PARSE(val, fmt) from Presto source -> target-specific parse function
15556 "DATE_PARSE"
15557 if f.args.len() >= 2
15558 && matches!(
15559 source,
15560 DialectType::Presto
15561 | DialectType::Trino
15562 | DialectType::Athena
15563 ) =>
15564 {
15565 let val = f.args[0].clone();
15566 let fmt_expr = &f.args[1];
15567
15568 match target {
15569 DialectType::Presto
15570 | DialectType::Trino
15571 | DialectType::Athena => {
15572 // Presto -> Presto: normalize format
15573 if let Expression::Literal(lit) = fmt_expr {
15574 if let crate::expressions::Literal::String(s) =
15575 lit.as_ref()
15576 {
15577 let normalized = crate::dialects::presto::PrestoDialect::normalize_presto_format(s);
15578 Ok(Expression::Function(Box::new(Function::new(
15579 "DATE_PARSE".to_string(),
15580 vec![val, Expression::string(&normalized)],
15581 ))))
15582 } else {
15583 Ok(Expression::Function(f))
15584 }
15585 } else {
15586 Ok(Expression::Function(f))
15587 }
15588 }
15589 DialectType::Hive => {
15590 // Presto -> Hive: if default format, just CAST(x AS TIMESTAMP)
15591 if let Expression::Literal(lit) = fmt_expr {
15592 if let crate::expressions::Literal::String(s) =
15593 lit.as_ref()
15594 {
15595 if crate::dialects::presto::PrestoDialect::is_default_timestamp_format(s)
15596 || crate::dialects::presto::PrestoDialect::is_default_date_format(s) {
15597 Ok(Expression::Cast(Box::new(crate::expressions::Cast {
15598 this: val,
15599 to: DataType::Timestamp { timezone: false, precision: None },
15600 trailing_comments: Vec::new(),
15601 double_colon_syntax: false,
15602 format: None,
15603 default: None,
15604 inferred_type: None,
15605 })))
15606 } else {
15607 let java_fmt = crate::dialects::presto::PrestoDialect::presto_to_java_format(s);
15608 Ok(Expression::Function(Box::new(Function::new(
15609 "TO_TIMESTAMP".to_string(),
15610 vec![val, Expression::string(&java_fmt)],
15611 ))))
15612 }
15613 } else {
15614 Ok(Expression::Function(f))
15615 }
15616 } else {
15617 Ok(Expression::Function(f))
15618 }
15619 }
15620 DialectType::Spark | DialectType::Databricks => {
15621 // Presto -> Spark: TO_TIMESTAMP(val, java_fmt)
15622 if let Expression::Literal(lit) = fmt_expr {
15623 if let crate::expressions::Literal::String(s) =
15624 lit.as_ref()
15625 {
15626 let java_fmt = crate::dialects::presto::PrestoDialect::presto_to_java_format(s);
15627 Ok(Expression::Function(Box::new(Function::new(
15628 "TO_TIMESTAMP".to_string(),
15629 vec![val, Expression::string(&java_fmt)],
15630 ))))
15631 } else {
15632 Ok(Expression::Function(f))
15633 }
15634 } else {
15635 Ok(Expression::Function(f))
15636 }
15637 }
15638 DialectType::DuckDB => {
15639 // Presto -> DuckDB: STRPTIME(val, duckdb_fmt)
15640 if let Expression::Literal(lit) = fmt_expr {
15641 if let crate::expressions::Literal::String(s) =
15642 lit.as_ref()
15643 {
15644 let duckdb_fmt = crate::dialects::presto::PrestoDialect::presto_to_duckdb_format(s);
15645 Ok(Expression::Function(Box::new(Function::new(
15646 "STRPTIME".to_string(),
15647 vec![val, Expression::string(&duckdb_fmt)],
15648 ))))
15649 } else {
15650 Ok(Expression::Function(Box::new(Function::new(
15651 "STRPTIME".to_string(),
15652 vec![val, fmt_expr.clone()],
15653 ))))
15654 }
15655 } else {
15656 Ok(Expression::Function(Box::new(Function::new(
15657 "STRPTIME".to_string(),
15658 vec![val, fmt_expr.clone()],
15659 ))))
15660 }
15661 }
15662 _ => Ok(Expression::Function(f)),
15663 }
15664 }
15665 // FROM_BASE64(x) / TO_BASE64(x) from Presto -> Hive-specific renames
15666 "FROM_BASE64"
15667 if f.args.len() == 1 && matches!(target, DialectType::Hive) =>
15668 {
15669 Ok(Expression::Function(Box::new(Function::new(
15670 "UNBASE64".to_string(),
15671 f.args,
15672 ))))
15673 }
15674 "TO_BASE64"
15675 if f.args.len() == 1 && matches!(target, DialectType::Hive) =>
15676 {
15677 Ok(Expression::Function(Box::new(Function::new(
15678 "BASE64".to_string(),
15679 f.args,
15680 ))))
15681 }
15682 // FROM_UNIXTIME(x) -> CAST(FROM_UNIXTIME(x) AS TIMESTAMP) for Spark
15683 "FROM_UNIXTIME"
15684 if f.args.len() == 1
15685 && matches!(
15686 source,
15687 DialectType::Presto
15688 | DialectType::Trino
15689 | DialectType::Athena
15690 )
15691 && matches!(
15692 target,
15693 DialectType::Spark | DialectType::Databricks
15694 ) =>
15695 {
15696 // Wrap FROM_UNIXTIME(x) in CAST(... AS TIMESTAMP)
15697 let from_unix = Expression::Function(Box::new(Function::new(
15698 "FROM_UNIXTIME".to_string(),
15699 f.args,
15700 )));
15701 Ok(Expression::Cast(Box::new(crate::expressions::Cast {
15702 this: from_unix,
15703 to: DataType::Timestamp {
15704 timezone: false,
15705 precision: None,
15706 },
15707 trailing_comments: Vec::new(),
15708 double_colon_syntax: false,
15709 format: None,
15710 default: None,
15711 inferred_type: None,
15712 })))
15713 }
15714 // DATE_FORMAT(val, fmt) from Hive/Spark/MySQL -> target-specific format function
15715 "DATE_FORMAT"
15716 if f.args.len() >= 2
15717 && !matches!(
15718 target,
15719 DialectType::Hive
15720 | DialectType::Spark
15721 | DialectType::Databricks
15722 | DialectType::MySQL
15723 | DialectType::SingleStore
15724 ) =>
15725 {
15726 let val = f.args[0].clone();
15727 let fmt_expr = &f.args[1];
15728 let is_hive_source = matches!(
15729 source,
15730 DialectType::Hive
15731 | DialectType::Spark
15732 | DialectType::Databricks
15733 );
15734
15735 fn java_to_c_format(fmt: &str) -> String {
15736 // Replace Java patterns with C strftime patterns.
15737 // Uses multi-pass to handle patterns that conflict.
15738 // First pass: replace multi-char patterns (longer first)
15739 let result = fmt
15740 .replace("yyyy", "%Y")
15741 .replace("SSSSSS", "%f")
15742 .replace("EEEE", "%W")
15743 .replace("MM", "%m")
15744 .replace("dd", "%d")
15745 .replace("HH", "%H")
15746 .replace("mm", "%M")
15747 .replace("ss", "%S")
15748 .replace("yy", "%y");
15749 // Second pass: handle single-char timezone patterns
15750 // z -> %Z (timezone name), Z -> %z (timezone offset)
15751 // Must be careful not to replace 'z'/'Z' inside already-replaced %Y, %M etc.
15752 let mut out = String::new();
15753 let chars: Vec<char> = result.chars().collect();
15754 let mut i = 0;
15755 while i < chars.len() {
15756 if chars[i] == '%' && i + 1 < chars.len() {
15757 // Already a format specifier, skip both chars
15758 out.push(chars[i]);
15759 out.push(chars[i + 1]);
15760 i += 2;
15761 } else if chars[i] == 'z' {
15762 out.push_str("%Z");
15763 i += 1;
15764 } else if chars[i] == 'Z' {
15765 out.push_str("%z");
15766 i += 1;
15767 } else {
15768 out.push(chars[i]);
15769 i += 1;
15770 }
15771 }
15772 out
15773 }
15774
15775 fn java_to_presto_format(fmt: &str) -> String {
15776 // Presto uses %T for HH:MM:SS
15777 let c_fmt = java_to_c_format(fmt);
15778 c_fmt.replace("%H:%M:%S", "%T")
15779 }
15780
15781 fn java_to_bq_format(fmt: &str) -> String {
15782 // BigQuery uses %F for yyyy-MM-dd and %T for HH:mm:ss
15783 let c_fmt = java_to_c_format(fmt);
15784 c_fmt.replace("%Y-%m-%d", "%F").replace("%H:%M:%S", "%T")
15785 }
15786
15787 // For Hive source, CAST string literals to appropriate type
15788 let cast_val = if is_hive_source {
15789 match &val {
15790 Expression::Literal(lit)
15791 if matches!(
15792 lit.as_ref(),
15793 crate::expressions::Literal::String(_)
15794 ) =>
15795 {
15796 match target {
15797 DialectType::DuckDB
15798 | DialectType::Presto
15799 | DialectType::Trino
15800 | DialectType::Athena => {
15801 Self::ensure_cast_timestamp(val.clone())
15802 }
15803 DialectType::BigQuery => {
15804 // BigQuery: CAST(val AS DATETIME)
15805 Expression::Cast(Box::new(
15806 crate::expressions::Cast {
15807 this: val.clone(),
15808 to: DataType::Custom {
15809 name: "DATETIME".to_string(),
15810 },
15811 trailing_comments: vec![],
15812 double_colon_syntax: false,
15813 format: None,
15814 default: None,
15815 inferred_type: None,
15816 },
15817 ))
15818 }
15819 _ => val.clone(),
15820 }
15821 }
15822 // For CAST(x AS DATE) or DATE literal, Presto needs CAST(CAST(x AS DATE) AS TIMESTAMP)
15823 Expression::Cast(c)
15824 if matches!(c.to, DataType::Date)
15825 && matches!(
15826 target,
15827 DialectType::Presto
15828 | DialectType::Trino
15829 | DialectType::Athena
15830 ) =>
15831 {
15832 Expression::Cast(Box::new(crate::expressions::Cast {
15833 this: val.clone(),
15834 to: DataType::Timestamp {
15835 timezone: false,
15836 precision: None,
15837 },
15838 trailing_comments: vec![],
15839 double_colon_syntax: false,
15840 format: None,
15841 default: None,
15842 inferred_type: None,
15843 }))
15844 }
15845 Expression::Literal(lit)
15846 if matches!(
15847 lit.as_ref(),
15848 crate::expressions::Literal::Date(_)
15849 ) && matches!(
15850 target,
15851 DialectType::Presto
15852 | DialectType::Trino
15853 | DialectType::Athena
15854 ) =>
15855 {
15856 // DATE 'x' -> CAST(CAST('x' AS DATE) AS TIMESTAMP)
15857 let cast_date = Self::date_literal_to_cast(val.clone());
15858 Expression::Cast(Box::new(crate::expressions::Cast {
15859 this: cast_date,
15860 to: DataType::Timestamp {
15861 timezone: false,
15862 precision: None,
15863 },
15864 trailing_comments: vec![],
15865 double_colon_syntax: false,
15866 format: None,
15867 default: None,
15868 inferred_type: None,
15869 }))
15870 }
15871 _ => val.clone(),
15872 }
15873 } else {
15874 val.clone()
15875 };
15876
15877 match target {
15878 DialectType::DuckDB => {
15879 if let Expression::Literal(lit) = fmt_expr {
15880 if let crate::expressions::Literal::String(s) =
15881 lit.as_ref()
15882 {
15883 let c_fmt = if is_hive_source {
15884 java_to_c_format(s)
15885 } else {
15886 s.clone()
15887 };
15888 Ok(Expression::Function(Box::new(Function::new(
15889 "STRFTIME".to_string(),
15890 vec![cast_val, Expression::string(&c_fmt)],
15891 ))))
15892 } else {
15893 Ok(Expression::Function(Box::new(Function::new(
15894 "STRFTIME".to_string(),
15895 vec![cast_val, fmt_expr.clone()],
15896 ))))
15897 }
15898 } else {
15899 Ok(Expression::Function(Box::new(Function::new(
15900 "STRFTIME".to_string(),
15901 vec![cast_val, fmt_expr.clone()],
15902 ))))
15903 }
15904 }
15905 DialectType::Presto
15906 | DialectType::Trino
15907 | DialectType::Athena => {
15908 if is_hive_source {
15909 if let Expression::Literal(lit) = fmt_expr {
15910 if let crate::expressions::Literal::String(s) =
15911 lit.as_ref()
15912 {
15913 let p_fmt = java_to_presto_format(s);
15914 Ok(Expression::Function(Box::new(
15915 Function::new(
15916 "DATE_FORMAT".to_string(),
15917 vec![
15918 cast_val,
15919 Expression::string(&p_fmt),
15920 ],
15921 ),
15922 )))
15923 } else {
15924 Ok(Expression::Function(Box::new(
15925 Function::new(
15926 "DATE_FORMAT".to_string(),
15927 vec![cast_val, fmt_expr.clone()],
15928 ),
15929 )))
15930 }
15931 } else {
15932 Ok(Expression::Function(Box::new(Function::new(
15933 "DATE_FORMAT".to_string(),
15934 vec![cast_val, fmt_expr.clone()],
15935 ))))
15936 }
15937 } else {
15938 Ok(Expression::Function(Box::new(Function::new(
15939 "DATE_FORMAT".to_string(),
15940 f.args,
15941 ))))
15942 }
15943 }
15944 DialectType::BigQuery => {
15945 // DATE_FORMAT(val, fmt) -> FORMAT_DATE(fmt, val)
15946 if let Expression::Literal(lit) = fmt_expr {
15947 if let crate::expressions::Literal::String(s) =
15948 lit.as_ref()
15949 {
15950 let bq_fmt = if is_hive_source {
15951 java_to_bq_format(s)
15952 } else {
15953 java_to_c_format(s)
15954 };
15955 Ok(Expression::Function(Box::new(Function::new(
15956 "FORMAT_DATE".to_string(),
15957 vec![Expression::string(&bq_fmt), cast_val],
15958 ))))
15959 } else {
15960 Ok(Expression::Function(Box::new(Function::new(
15961 "FORMAT_DATE".to_string(),
15962 vec![fmt_expr.clone(), cast_val],
15963 ))))
15964 }
15965 } else {
15966 Ok(Expression::Function(Box::new(Function::new(
15967 "FORMAT_DATE".to_string(),
15968 vec![fmt_expr.clone(), cast_val],
15969 ))))
15970 }
15971 }
15972 DialectType::PostgreSQL | DialectType::Redshift => {
15973 if let Expression::Literal(lit) = fmt_expr {
15974 if let crate::expressions::Literal::String(s) =
15975 lit.as_ref()
15976 {
15977 let pg_fmt = s
15978 .replace("yyyy", "YYYY")
15979 .replace("MM", "MM")
15980 .replace("dd", "DD")
15981 .replace("HH", "HH24")
15982 .replace("mm", "MI")
15983 .replace("ss", "SS")
15984 .replace("yy", "YY");
15985 Ok(Expression::Function(Box::new(Function::new(
15986 "TO_CHAR".to_string(),
15987 vec![val, Expression::string(&pg_fmt)],
15988 ))))
15989 } else {
15990 Ok(Expression::Function(Box::new(Function::new(
15991 "TO_CHAR".to_string(),
15992 vec![val, fmt_expr.clone()],
15993 ))))
15994 }
15995 } else {
15996 Ok(Expression::Function(Box::new(Function::new(
15997 "TO_CHAR".to_string(),
15998 vec![val, fmt_expr.clone()],
15999 ))))
16000 }
16001 }
16002 _ => Ok(Expression::Function(f)),
16003 }
16004 }
16005 // DATEDIFF(unit, start, end) - 3-arg form
16006 // SQLite uses DATEDIFF(date1, date2, unit_string) instead
16007 "DATEDIFF" if f.args.len() == 3 => {
16008 let mut args = f.args;
16009 // SQLite source: args = (date1, date2, unit_string)
16010 // Standard source: args = (unit, start, end)
16011 let (_arg0, arg1, arg2, unit_str) =
16012 if matches!(source, DialectType::SQLite) {
16013 let date1 = args.remove(0);
16014 let date2 = args.remove(0);
16015 let unit_expr = args.remove(0);
16016 let unit_s = Self::get_unit_str_static(&unit_expr);
16017
16018 // For SQLite target, generate JULIANDAY arithmetic directly
16019 if matches!(target, DialectType::SQLite) {
16020 let jd_first = Expression::Function(Box::new(
16021 Function::new("JULIANDAY".to_string(), vec![date1]),
16022 ));
16023 let jd_second = Expression::Function(Box::new(
16024 Function::new("JULIANDAY".to_string(), vec![date2]),
16025 ));
16026 let diff = Expression::Sub(Box::new(
16027 crate::expressions::BinaryOp::new(
16028 jd_first, jd_second,
16029 ),
16030 ));
16031 let paren_diff = Expression::Paren(Box::new(
16032 crate::expressions::Paren {
16033 this: diff,
16034 trailing_comments: Vec::new(),
16035 },
16036 ));
16037 let adjusted = match unit_s.as_str() {
16038 "HOUR" => Expression::Mul(Box::new(
16039 crate::expressions::BinaryOp::new(
16040 paren_diff,
16041 Expression::Literal(Box::new(
16042 Literal::Number("24.0".to_string()),
16043 )),
16044 ),
16045 )),
16046 "MINUTE" => Expression::Mul(Box::new(
16047 crate::expressions::BinaryOp::new(
16048 paren_diff,
16049 Expression::Literal(Box::new(
16050 Literal::Number("1440.0".to_string()),
16051 )),
16052 ),
16053 )),
16054 "SECOND" => Expression::Mul(Box::new(
16055 crate::expressions::BinaryOp::new(
16056 paren_diff,
16057 Expression::Literal(Box::new(
16058 Literal::Number("86400.0".to_string()),
16059 )),
16060 ),
16061 )),
16062 "MONTH" => Expression::Div(Box::new(
16063 crate::expressions::BinaryOp::new(
16064 paren_diff,
16065 Expression::Literal(Box::new(
16066 Literal::Number("30.0".to_string()),
16067 )),
16068 ),
16069 )),
16070 "YEAR" => Expression::Div(Box::new(
16071 crate::expressions::BinaryOp::new(
16072 paren_diff,
16073 Expression::Literal(Box::new(
16074 Literal::Number("365.0".to_string()),
16075 )),
16076 ),
16077 )),
16078 _ => paren_diff,
16079 };
16080 return Ok(Expression::Cast(Box::new(Cast {
16081 this: adjusted,
16082 to: DataType::Int {
16083 length: None,
16084 integer_spelling: true,
16085 },
16086 trailing_comments: vec![],
16087 double_colon_syntax: false,
16088 format: None,
16089 default: None,
16090 inferred_type: None,
16091 })));
16092 }
16093
16094 // For other targets, remap to standard (unit, start, end) form
16095 let unit_ident =
16096 Expression::Identifier(Identifier::new(&unit_s));
16097 (unit_ident, date1, date2, unit_s)
16098 } else {
16099 let arg0 = args.remove(0);
16100 let arg1 = args.remove(0);
16101 let arg2 = args.remove(0);
16102 let unit_s = Self::get_unit_str_static(&arg0);
16103 (arg0, arg1, arg2, unit_s)
16104 };
16105
16106 // For Hive/Spark source, string literal dates need to be cast
16107 // Note: Databricks is excluded - it handles string args like standard SQL
16108 let is_hive_spark =
16109 matches!(source, DialectType::Hive | DialectType::Spark);
16110
16111 match target {
16112 DialectType::Snowflake => {
16113 let unit =
16114 Expression::Identifier(Identifier::new(&unit_str));
16115 // Use ensure_to_date_preserved to add TO_DATE with a marker
16116 // that prevents the Snowflake TO_DATE handler from converting it to CAST
16117 let d1 = if is_hive_spark {
16118 Self::ensure_to_date_preserved(arg1)
16119 } else {
16120 arg1
16121 };
16122 let d2 = if is_hive_spark {
16123 Self::ensure_to_date_preserved(arg2)
16124 } else {
16125 arg2
16126 };
16127 Ok(Expression::Function(Box::new(Function::new(
16128 "DATEDIFF".to_string(),
16129 vec![unit, d1, d2],
16130 ))))
16131 }
16132 DialectType::Redshift => {
16133 let unit =
16134 Expression::Identifier(Identifier::new(&unit_str));
16135 let d1 = if is_hive_spark {
16136 Self::ensure_cast_date(arg1)
16137 } else {
16138 arg1
16139 };
16140 let d2 = if is_hive_spark {
16141 Self::ensure_cast_date(arg2)
16142 } else {
16143 arg2
16144 };
16145 Ok(Expression::Function(Box::new(Function::new(
16146 "DATEDIFF".to_string(),
16147 vec![unit, d1, d2],
16148 ))))
16149 }
16150 DialectType::TSQL => {
16151 let unit =
16152 Expression::Identifier(Identifier::new(&unit_str));
16153 Ok(Expression::Function(Box::new(Function::new(
16154 "DATEDIFF".to_string(),
16155 vec![unit, arg1, arg2],
16156 ))))
16157 }
16158 DialectType::DuckDB => {
16159 let is_redshift_tsql = matches!(
16160 source,
16161 DialectType::Redshift | DialectType::TSQL
16162 );
16163 if is_hive_spark {
16164 // For Hive/Spark source, CAST string args to DATE and emit DATE_DIFF directly
16165 let d1 = Self::ensure_cast_date(arg1);
16166 let d2 = Self::ensure_cast_date(arg2);
16167 Ok(Expression::Function(Box::new(Function::new(
16168 "DATE_DIFF".to_string(),
16169 vec![Expression::string(&unit_str), d1, d2],
16170 ))))
16171 } else if matches!(source, DialectType::Snowflake) {
16172 // For Snowflake source: special handling per unit
16173 match unit_str.as_str() {
16174 "NANOSECOND" => {
16175 // DATEDIFF(NANOSECOND, start, end) -> EPOCH_NS(CAST(end AS TIMESTAMP_NS)) - EPOCH_NS(CAST(start AS TIMESTAMP_NS))
16176 fn cast_to_timestamp_ns(
16177 expr: Expression,
16178 ) -> Expression
16179 {
16180 Expression::Cast(Box::new(Cast {
16181 this: expr,
16182 to: DataType::Custom {
16183 name: "TIMESTAMP_NS".to_string(),
16184 },
16185 trailing_comments: vec![],
16186 double_colon_syntax: false,
16187 format: None,
16188 default: None,
16189 inferred_type: None,
16190 }))
16191 }
16192 let epoch_end = Expression::Function(Box::new(
16193 Function::new(
16194 "EPOCH_NS".to_string(),
16195 vec![cast_to_timestamp_ns(arg2)],
16196 ),
16197 ));
16198 let epoch_start = Expression::Function(
16199 Box::new(Function::new(
16200 "EPOCH_NS".to_string(),
16201 vec![cast_to_timestamp_ns(arg1)],
16202 )),
16203 );
16204 Ok(Expression::Sub(Box::new(BinaryOp::new(
16205 epoch_end,
16206 epoch_start,
16207 ))))
16208 }
16209 "WEEK" => {
16210 // DATE_DIFF('WEEK', DATE_TRUNC('WEEK', CAST(x AS DATE)), DATE_TRUNC('WEEK', CAST(y AS DATE)))
16211 let d1 = Self::force_cast_date(arg1);
16212 let d2 = Self::force_cast_date(arg2);
16213 let dt1 = Expression::Function(Box::new(
16214 Function::new(
16215 "DATE_TRUNC".to_string(),
16216 vec![Expression::string("WEEK"), d1],
16217 ),
16218 ));
16219 let dt2 = Expression::Function(Box::new(
16220 Function::new(
16221 "DATE_TRUNC".to_string(),
16222 vec![Expression::string("WEEK"), d2],
16223 ),
16224 ));
16225 Ok(Expression::Function(Box::new(
16226 Function::new(
16227 "DATE_DIFF".to_string(),
16228 vec![
16229 Expression::string(&unit_str),
16230 dt1,
16231 dt2,
16232 ],
16233 ),
16234 )))
16235 }
16236 _ => {
16237 // YEAR, MONTH, QUARTER, DAY, etc.: CAST to DATE
16238 let d1 = Self::force_cast_date(arg1);
16239 let d2 = Self::force_cast_date(arg2);
16240 Ok(Expression::Function(Box::new(
16241 Function::new(
16242 "DATE_DIFF".to_string(),
16243 vec![
16244 Expression::string(&unit_str),
16245 d1,
16246 d2,
16247 ],
16248 ),
16249 )))
16250 }
16251 }
16252 } else if is_redshift_tsql {
16253 // For Redshift/TSQL source, CAST args to TIMESTAMP (always)
16254 let d1 = Self::force_cast_timestamp(arg1);
16255 let d2 = Self::force_cast_timestamp(arg2);
16256 Ok(Expression::Function(Box::new(Function::new(
16257 "DATE_DIFF".to_string(),
16258 vec![Expression::string(&unit_str), d1, d2],
16259 ))))
16260 } else {
16261 // Keep as DATEDIFF so DuckDB's transform_datediff handles
16262 // DATE_TRUNC for WEEK, CAST for string literals, etc.
16263 let unit =
16264 Expression::Identifier(Identifier::new(&unit_str));
16265 Ok(Expression::Function(Box::new(Function::new(
16266 "DATEDIFF".to_string(),
16267 vec![unit, arg1, arg2],
16268 ))))
16269 }
16270 }
16271 DialectType::BigQuery => {
16272 let is_redshift_tsql = matches!(
16273 source,
16274 DialectType::Redshift
16275 | DialectType::TSQL
16276 | DialectType::Snowflake
16277 );
16278 let cast_d1 = if is_hive_spark {
16279 Self::ensure_cast_date(arg1)
16280 } else if is_redshift_tsql {
16281 Self::force_cast_datetime(arg1)
16282 } else {
16283 Self::ensure_cast_datetime(arg1)
16284 };
16285 let cast_d2 = if is_hive_spark {
16286 Self::ensure_cast_date(arg2)
16287 } else if is_redshift_tsql {
16288 Self::force_cast_datetime(arg2)
16289 } else {
16290 Self::ensure_cast_datetime(arg2)
16291 };
16292 let unit =
16293 Expression::Identifier(Identifier::new(&unit_str));
16294 Ok(Expression::Function(Box::new(Function::new(
16295 "DATE_DIFF".to_string(),
16296 vec![cast_d2, cast_d1, unit],
16297 ))))
16298 }
16299 DialectType::Presto
16300 | DialectType::Trino
16301 | DialectType::Athena => {
16302 // For Hive/Spark source, string literals need double-cast: CAST(CAST(x AS TIMESTAMP) AS DATE)
16303 // For Redshift/TSQL source, args need CAST to TIMESTAMP (always)
16304 let is_redshift_tsql = matches!(
16305 source,
16306 DialectType::Redshift
16307 | DialectType::TSQL
16308 | DialectType::Snowflake
16309 );
16310 let d1 = if is_hive_spark {
16311 Self::double_cast_timestamp_date(arg1)
16312 } else if is_redshift_tsql {
16313 Self::force_cast_timestamp(arg1)
16314 } else {
16315 arg1
16316 };
16317 let d2 = if is_hive_spark {
16318 Self::double_cast_timestamp_date(arg2)
16319 } else if is_redshift_tsql {
16320 Self::force_cast_timestamp(arg2)
16321 } else {
16322 arg2
16323 };
16324 Ok(Expression::Function(Box::new(Function::new(
16325 "DATE_DIFF".to_string(),
16326 vec![Expression::string(&unit_str), d1, d2],
16327 ))))
16328 }
16329 DialectType::Hive => match unit_str.as_str() {
16330 "MONTH" => Ok(Expression::Cast(Box::new(Cast {
16331 this: Expression::Function(Box::new(Function::new(
16332 "MONTHS_BETWEEN".to_string(),
16333 vec![arg2, arg1],
16334 ))),
16335 to: DataType::Int {
16336 length: None,
16337 integer_spelling: false,
16338 },
16339 trailing_comments: vec![],
16340 double_colon_syntax: false,
16341 format: None,
16342 default: None,
16343 inferred_type: None,
16344 }))),
16345 "WEEK" => Ok(Expression::Cast(Box::new(Cast {
16346 this: Expression::Div(Box::new(
16347 crate::expressions::BinaryOp::new(
16348 Expression::Function(Box::new(Function::new(
16349 "DATEDIFF".to_string(),
16350 vec![arg2, arg1],
16351 ))),
16352 Expression::number(7),
16353 ),
16354 )),
16355 to: DataType::Int {
16356 length: None,
16357 integer_spelling: false,
16358 },
16359 trailing_comments: vec![],
16360 double_colon_syntax: false,
16361 format: None,
16362 default: None,
16363 inferred_type: None,
16364 }))),
16365 _ => Ok(Expression::Function(Box::new(Function::new(
16366 "DATEDIFF".to_string(),
16367 vec![arg2, arg1],
16368 )))),
16369 },
16370 DialectType::Spark | DialectType::Databricks => {
16371 let unit =
16372 Expression::Identifier(Identifier::new(&unit_str));
16373 Ok(Expression::Function(Box::new(Function::new(
16374 "DATEDIFF".to_string(),
16375 vec![unit, arg1, arg2],
16376 ))))
16377 }
16378 _ => {
16379 // For Hive/Spark source targeting PostgreSQL etc., cast string literals to DATE
16380 let d1 = if is_hive_spark {
16381 Self::ensure_cast_date(arg1)
16382 } else {
16383 arg1
16384 };
16385 let d2 = if is_hive_spark {
16386 Self::ensure_cast_date(arg2)
16387 } else {
16388 arg2
16389 };
16390 let unit =
16391 Expression::Identifier(Identifier::new(&unit_str));
16392 Ok(Expression::Function(Box::new(Function::new(
16393 "DATEDIFF".to_string(),
16394 vec![unit, d1, d2],
16395 ))))
16396 }
16397 }
16398 }
16399 // DATEDIFF(end, start) - 2-arg form from Hive/MySQL
16400 "DATEDIFF" if f.args.len() == 2 => {
16401 let mut args = f.args;
16402 let arg0 = args.remove(0);
16403 let arg1 = args.remove(0);
16404
16405 // Helper: unwrap TO_DATE(x) -> x (extracts inner arg)
16406 // Also recognizes TryCast/Cast to DATE that may have been produced by
16407 // cross-dialect TO_DATE -> TRY_CAST conversion
16408 let unwrap_to_date = |e: Expression| -> (Expression, bool) {
16409 if let Expression::Function(ref f) = e {
16410 if f.name.eq_ignore_ascii_case("TO_DATE")
16411 && f.args.len() == 1
16412 {
16413 return (f.args[0].clone(), true);
16414 }
16415 }
16416 // Also recognize TryCast(x, Date) as an already-converted TO_DATE
16417 if let Expression::TryCast(ref c) = e {
16418 if matches!(c.to, DataType::Date) {
16419 return (e, true); // Already properly cast, return as-is
16420 }
16421 }
16422 (e, false)
16423 };
16424
16425 match target {
16426 DialectType::DuckDB => {
16427 // For Hive source, always CAST to DATE
16428 // If arg is TO_DATE(x) or TRY_CAST(x AS DATE), use it directly
16429 let cast_d0 = if matches!(
16430 source,
16431 DialectType::Hive
16432 | DialectType::Spark
16433 | DialectType::Databricks
16434 ) {
16435 let (inner, was_to_date) = unwrap_to_date(arg1);
16436 if was_to_date {
16437 // Already a date expression, use directly
16438 if matches!(&inner, Expression::TryCast(_)) {
16439 inner // Already TRY_CAST(x AS DATE)
16440 } else {
16441 Self::try_cast_date(inner)
16442 }
16443 } else {
16444 Self::force_cast_date(inner)
16445 }
16446 } else {
16447 Self::ensure_cast_date(arg1)
16448 };
16449 let cast_d1 = if matches!(
16450 source,
16451 DialectType::Hive
16452 | DialectType::Spark
16453 | DialectType::Databricks
16454 ) {
16455 let (inner, was_to_date) = unwrap_to_date(arg0);
16456 if was_to_date {
16457 if matches!(&inner, Expression::TryCast(_)) {
16458 inner
16459 } else {
16460 Self::try_cast_date(inner)
16461 }
16462 } else {
16463 Self::force_cast_date(inner)
16464 }
16465 } else {
16466 Self::ensure_cast_date(arg0)
16467 };
16468 Ok(Expression::Function(Box::new(Function::new(
16469 "DATE_DIFF".to_string(),
16470 vec![Expression::string("DAY"), cast_d0, cast_d1],
16471 ))))
16472 }
16473 DialectType::Presto
16474 | DialectType::Trino
16475 | DialectType::Athena => {
16476 // For Hive/Spark source, apply double_cast_timestamp_date
16477 // For other sources (MySQL etc.), just swap args without casting
16478 if matches!(
16479 source,
16480 DialectType::Hive
16481 | DialectType::Spark
16482 | DialectType::Databricks
16483 ) {
16484 let cast_fn = |e: Expression| -> Expression {
16485 let (inner, was_to_date) = unwrap_to_date(e);
16486 if was_to_date {
16487 let first_cast =
16488 Self::double_cast_timestamp_date(inner);
16489 Self::double_cast_timestamp_date(first_cast)
16490 } else {
16491 Self::double_cast_timestamp_date(inner)
16492 }
16493 };
16494 Ok(Expression::Function(Box::new(Function::new(
16495 "DATE_DIFF".to_string(),
16496 vec![
16497 Expression::string("DAY"),
16498 cast_fn(arg1),
16499 cast_fn(arg0),
16500 ],
16501 ))))
16502 } else {
16503 Ok(Expression::Function(Box::new(Function::new(
16504 "DATE_DIFF".to_string(),
16505 vec![Expression::string("DAY"), arg1, arg0],
16506 ))))
16507 }
16508 }
16509 DialectType::Redshift => {
16510 let unit = Expression::Identifier(Identifier::new("DAY"));
16511 Ok(Expression::Function(Box::new(Function::new(
16512 "DATEDIFF".to_string(),
16513 vec![unit, arg1, arg0],
16514 ))))
16515 }
16516 _ => Ok(Expression::Function(Box::new(Function::new(
16517 "DATEDIFF".to_string(),
16518 vec![arg0, arg1],
16519 )))),
16520 }
16521 }
16522 // DATE_DIFF(unit, start, end) - 3-arg with string unit (ClickHouse/DuckDB style)
16523 "DATE_DIFF" if f.args.len() == 3 => {
16524 let mut args = f.args;
16525 let arg0 = args.remove(0);
16526 let arg1 = args.remove(0);
16527 let arg2 = args.remove(0);
16528 let unit_str = Self::get_unit_str_static(&arg0);
16529
16530 match target {
16531 DialectType::DuckDB => {
16532 // DuckDB: DATE_DIFF('UNIT', start, end)
16533 Ok(Expression::Function(Box::new(Function::new(
16534 "DATE_DIFF".to_string(),
16535 vec![Expression::string(&unit_str), arg1, arg2],
16536 ))))
16537 }
16538 DialectType::Presto
16539 | DialectType::Trino
16540 | DialectType::Athena => {
16541 Ok(Expression::Function(Box::new(Function::new(
16542 "DATE_DIFF".to_string(),
16543 vec![Expression::string(&unit_str), arg1, arg2],
16544 ))))
16545 }
16546 DialectType::ClickHouse => {
16547 // ClickHouse: DATE_DIFF(UNIT, start, end) - identifier unit
16548 let unit =
16549 Expression::Identifier(Identifier::new(&unit_str));
16550 Ok(Expression::Function(Box::new(Function::new(
16551 "DATE_DIFF".to_string(),
16552 vec![unit, arg1, arg2],
16553 ))))
16554 }
16555 DialectType::Snowflake | DialectType::Redshift => {
16556 let unit =
16557 Expression::Identifier(Identifier::new(&unit_str));
16558 Ok(Expression::Function(Box::new(Function::new(
16559 "DATEDIFF".to_string(),
16560 vec![unit, arg1, arg2],
16561 ))))
16562 }
16563 _ => {
16564 let unit =
16565 Expression::Identifier(Identifier::new(&unit_str));
16566 Ok(Expression::Function(Box::new(Function::new(
16567 "DATEDIFF".to_string(),
16568 vec![unit, arg1, arg2],
16569 ))))
16570 }
16571 }
16572 }
16573 // DATEADD(unit, val, date) - 3-arg form
16574 "DATEADD" if f.args.len() == 3 => {
16575 let mut args = f.args;
16576 let arg0 = args.remove(0);
16577 let arg1 = args.remove(0);
16578 let arg2 = args.remove(0);
16579 let unit_str = Self::get_unit_str_static(&arg0);
16580
16581 // Normalize TSQL unit abbreviations to standard names
16582 let unit_str = match unit_str.as_str() {
16583 "YY" | "YYYY" => "YEAR".to_string(),
16584 "QQ" | "Q" => "QUARTER".to_string(),
16585 "MM" | "M" => "MONTH".to_string(),
16586 "WK" | "WW" => "WEEK".to_string(),
16587 "DD" | "D" | "DY" => "DAY".to_string(),
16588 "HH" => "HOUR".to_string(),
16589 "MI" | "N" => "MINUTE".to_string(),
16590 "SS" | "S" => "SECOND".to_string(),
16591 "MS" => "MILLISECOND".to_string(),
16592 "MCS" | "US" => "MICROSECOND".to_string(),
16593 _ => unit_str,
16594 };
16595 match target {
16596 DialectType::Snowflake => {
16597 let unit =
16598 Expression::Identifier(Identifier::new(&unit_str));
16599 // Cast string literal to TIMESTAMP, but not for Snowflake source
16600 // (Snowflake natively accepts string literals in DATEADD)
16601 let arg2 = if matches!(
16602 &arg2,
16603 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_))
16604 ) && !matches!(source, DialectType::Snowflake)
16605 {
16606 Expression::Cast(Box::new(Cast {
16607 this: arg2,
16608 to: DataType::Timestamp {
16609 precision: None,
16610 timezone: false,
16611 },
16612 trailing_comments: Vec::new(),
16613 double_colon_syntax: false,
16614 format: None,
16615 default: None,
16616 inferred_type: None,
16617 }))
16618 } else {
16619 arg2
16620 };
16621 Ok(Expression::Function(Box::new(Function::new(
16622 "DATEADD".to_string(),
16623 vec![unit, arg1, arg2],
16624 ))))
16625 }
16626 DialectType::TSQL => {
16627 let unit =
16628 Expression::Identifier(Identifier::new(&unit_str));
16629 // Cast string literal to DATETIME2, but not when source is Spark/Databricks family
16630 let arg2 = if matches!(
16631 &arg2,
16632 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_))
16633 ) && !matches!(
16634 source,
16635 DialectType::Spark
16636 | DialectType::Databricks
16637 | DialectType::Hive
16638 ) {
16639 Expression::Cast(Box::new(Cast {
16640 this: arg2,
16641 to: DataType::Custom {
16642 name: "DATETIME2".to_string(),
16643 },
16644 trailing_comments: Vec::new(),
16645 double_colon_syntax: false,
16646 format: None,
16647 default: None,
16648 inferred_type: None,
16649 }))
16650 } else {
16651 arg2
16652 };
16653 Ok(Expression::Function(Box::new(Function::new(
16654 "DATEADD".to_string(),
16655 vec![unit, arg1, arg2],
16656 ))))
16657 }
16658 DialectType::Redshift => {
16659 let unit =
16660 Expression::Identifier(Identifier::new(&unit_str));
16661 Ok(Expression::Function(Box::new(Function::new(
16662 "DATEADD".to_string(),
16663 vec![unit, arg1, arg2],
16664 ))))
16665 }
16666 DialectType::Databricks => {
16667 let unit =
16668 Expression::Identifier(Identifier::new(&unit_str));
16669 // Sources with native DATEADD (TSQL, Databricks, Snowflake) -> DATEADD
16670 // Other sources (Redshift TsOrDsAdd, etc.) -> DATE_ADD
16671 let func_name = if matches!(
16672 source,
16673 DialectType::TSQL
16674 | DialectType::Fabric
16675 | DialectType::Databricks
16676 | DialectType::Snowflake
16677 ) {
16678 "DATEADD"
16679 } else {
16680 "DATE_ADD"
16681 };
16682 Ok(Expression::Function(Box::new(Function::new(
16683 func_name.to_string(),
16684 vec![unit, arg1, arg2],
16685 ))))
16686 }
16687 DialectType::DuckDB => {
16688 // Special handling for NANOSECOND from Snowflake
16689 if unit_str == "NANOSECOND"
16690 && matches!(source, DialectType::Snowflake)
16691 {
16692 // DATEADD(NANOSECOND, offset, ts) -> MAKE_TIMESTAMP_NS(EPOCH_NS(CAST(ts AS TIMESTAMP_NS)) + offset)
16693 let cast_ts = Expression::Cast(Box::new(Cast {
16694 this: arg2,
16695 to: DataType::Custom {
16696 name: "TIMESTAMP_NS".to_string(),
16697 },
16698 trailing_comments: vec![],
16699 double_colon_syntax: false,
16700 format: None,
16701 default: None,
16702 inferred_type: None,
16703 }));
16704 let epoch_ns =
16705 Expression::Function(Box::new(Function::new(
16706 "EPOCH_NS".to_string(),
16707 vec![cast_ts],
16708 )));
16709 let sum = Expression::Add(Box::new(BinaryOp::new(
16710 epoch_ns, arg1,
16711 )));
16712 Ok(Expression::Function(Box::new(Function::new(
16713 "MAKE_TIMESTAMP_NS".to_string(),
16714 vec![sum],
16715 ))))
16716 } else {
16717 // DuckDB: convert to date + INTERVAL syntax with CAST
16718 let iu = Self::parse_interval_unit_static(&unit_str);
16719 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
16720 this: Some(arg1),
16721 unit: Some(crate::expressions::IntervalUnitSpec::Simple { unit: iu, use_plural: false }),
16722 }));
16723 // Cast string literal to TIMESTAMP
16724 let arg2 = if matches!(
16725 &arg2,
16726 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_))
16727 ) {
16728 Expression::Cast(Box::new(Cast {
16729 this: arg2,
16730 to: DataType::Timestamp {
16731 precision: None,
16732 timezone: false,
16733 },
16734 trailing_comments: Vec::new(),
16735 double_colon_syntax: false,
16736 format: None,
16737 default: None,
16738 inferred_type: None,
16739 }))
16740 } else {
16741 arg2
16742 };
16743 Ok(Expression::Add(Box::new(
16744 crate::expressions::BinaryOp::new(arg2, interval),
16745 )))
16746 }
16747 }
16748 DialectType::Spark => {
16749 // For TSQL source: convert to ADD_MONTHS/DATE_ADD(date, val)
16750 // For other sources: keep 3-arg DATE_ADD(UNIT, val, date) form
16751 if matches!(source, DialectType::TSQL | DialectType::Fabric)
16752 {
16753 fn multiply_expr_spark(
16754 expr: Expression,
16755 factor: i64,
16756 ) -> Expression
16757 {
16758 if let Expression::Literal(lit) = &expr {
16759 if let crate::expressions::Literal::Number(n) =
16760 lit.as_ref()
16761 {
16762 if let Ok(val) = n.parse::<i64>() {
16763 return Expression::Literal(Box::new(
16764 crate::expressions::Literal::Number(
16765 (val * factor).to_string(),
16766 ),
16767 ));
16768 }
16769 }
16770 }
16771 Expression::Mul(Box::new(
16772 crate::expressions::BinaryOp::new(
16773 expr,
16774 Expression::Literal(Box::new(
16775 crate::expressions::Literal::Number(
16776 factor.to_string(),
16777 ),
16778 )),
16779 ),
16780 ))
16781 }
16782 let normalized_unit = match unit_str.as_str() {
16783 "YEAR" | "YY" | "YYYY" => "YEAR",
16784 "QUARTER" | "QQ" | "Q" => "QUARTER",
16785 "MONTH" | "MM" | "M" => "MONTH",
16786 "WEEK" | "WK" | "WW" => "WEEK",
16787 "DAY" | "DD" | "D" | "DY" => "DAY",
16788 _ => &unit_str,
16789 };
16790 match normalized_unit {
16791 "YEAR" => {
16792 let months = multiply_expr_spark(arg1, 12);
16793 Ok(Expression::Function(Box::new(
16794 Function::new(
16795 "ADD_MONTHS".to_string(),
16796 vec![arg2, months],
16797 ),
16798 )))
16799 }
16800 "QUARTER" => {
16801 let months = multiply_expr_spark(arg1, 3);
16802 Ok(Expression::Function(Box::new(
16803 Function::new(
16804 "ADD_MONTHS".to_string(),
16805 vec![arg2, months],
16806 ),
16807 )))
16808 }
16809 "MONTH" => Ok(Expression::Function(Box::new(
16810 Function::new(
16811 "ADD_MONTHS".to_string(),
16812 vec![arg2, arg1],
16813 ),
16814 ))),
16815 "WEEK" => {
16816 let days = multiply_expr_spark(arg1, 7);
16817 Ok(Expression::Function(Box::new(
16818 Function::new(
16819 "DATE_ADD".to_string(),
16820 vec![arg2, days],
16821 ),
16822 )))
16823 }
16824 "DAY" => Ok(Expression::Function(Box::new(
16825 Function::new(
16826 "DATE_ADD".to_string(),
16827 vec![arg2, arg1],
16828 ),
16829 ))),
16830 _ => {
16831 let unit = Expression::Identifier(
16832 Identifier::new(&unit_str),
16833 );
16834 Ok(Expression::Function(Box::new(
16835 Function::new(
16836 "DATE_ADD".to_string(),
16837 vec![unit, arg1, arg2],
16838 ),
16839 )))
16840 }
16841 }
16842 } else {
16843 // Non-TSQL source: keep 3-arg DATE_ADD(UNIT, val, date)
16844 let unit =
16845 Expression::Identifier(Identifier::new(&unit_str));
16846 Ok(Expression::Function(Box::new(Function::new(
16847 "DATE_ADD".to_string(),
16848 vec![unit, arg1, arg2],
16849 ))))
16850 }
16851 }
16852 DialectType::Hive => match unit_str.as_str() {
16853 "MONTH" => {
16854 Ok(Expression::Function(Box::new(Function::new(
16855 "ADD_MONTHS".to_string(),
16856 vec![arg2, arg1],
16857 ))))
16858 }
16859 _ => Ok(Expression::Function(Box::new(Function::new(
16860 "DATE_ADD".to_string(),
16861 vec![arg2, arg1],
16862 )))),
16863 },
16864 DialectType::Presto
16865 | DialectType::Trino
16866 | DialectType::Athena => {
16867 // Cast string literal date to TIMESTAMP
16868 let arg2 = if matches!(
16869 &arg2,
16870 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_))
16871 ) {
16872 Expression::Cast(Box::new(Cast {
16873 this: arg2,
16874 to: DataType::Timestamp {
16875 precision: None,
16876 timezone: false,
16877 },
16878 trailing_comments: Vec::new(),
16879 double_colon_syntax: false,
16880 format: None,
16881 default: None,
16882 inferred_type: None,
16883 }))
16884 } else {
16885 arg2
16886 };
16887 Ok(Expression::Function(Box::new(Function::new(
16888 "DATE_ADD".to_string(),
16889 vec![Expression::string(&unit_str), arg1, arg2],
16890 ))))
16891 }
16892 DialectType::MySQL => {
16893 let iu = Self::parse_interval_unit_static(&unit_str);
16894 Ok(Expression::DateAdd(Box::new(
16895 crate::expressions::DateAddFunc {
16896 this: arg2,
16897 interval: arg1,
16898 unit: iu,
16899 },
16900 )))
16901 }
16902 DialectType::PostgreSQL => {
16903 // Cast string literal date to TIMESTAMP
16904 let arg2 = if matches!(
16905 &arg2,
16906 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_))
16907 ) {
16908 Expression::Cast(Box::new(Cast {
16909 this: arg2,
16910 to: DataType::Timestamp {
16911 precision: None,
16912 timezone: false,
16913 },
16914 trailing_comments: Vec::new(),
16915 double_colon_syntax: false,
16916 format: None,
16917 default: None,
16918 inferred_type: None,
16919 }))
16920 } else {
16921 arg2
16922 };
16923 let interval = Expression::Interval(Box::new(
16924 crate::expressions::Interval {
16925 this: Some(Expression::string(&format!(
16926 "{} {}",
16927 Self::expr_to_string_static(&arg1),
16928 unit_str
16929 ))),
16930 unit: None,
16931 },
16932 ));
16933 Ok(Expression::Add(Box::new(
16934 crate::expressions::BinaryOp::new(arg2, interval),
16935 )))
16936 }
16937 DialectType::BigQuery => {
16938 let iu = Self::parse_interval_unit_static(&unit_str);
16939 let interval = Expression::Interval(Box::new(
16940 crate::expressions::Interval {
16941 this: Some(arg1),
16942 unit: Some(
16943 crate::expressions::IntervalUnitSpec::Simple {
16944 unit: iu,
16945 use_plural: false,
16946 },
16947 ),
16948 },
16949 ));
16950 // Non-TSQL sources: CAST string literal to DATETIME
16951 let arg2 = if !matches!(
16952 source,
16953 DialectType::TSQL | DialectType::Fabric
16954 ) && matches!(
16955 &arg2,
16956 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_))
16957 ) {
16958 Expression::Cast(Box::new(Cast {
16959 this: arg2,
16960 to: DataType::Custom {
16961 name: "DATETIME".to_string(),
16962 },
16963 trailing_comments: Vec::new(),
16964 double_colon_syntax: false,
16965 format: None,
16966 default: None,
16967 inferred_type: None,
16968 }))
16969 } else {
16970 arg2
16971 };
16972 Ok(Expression::Function(Box::new(Function::new(
16973 "DATE_ADD".to_string(),
16974 vec![arg2, interval],
16975 ))))
16976 }
16977 _ => {
16978 let unit =
16979 Expression::Identifier(Identifier::new(&unit_str));
16980 Ok(Expression::Function(Box::new(Function::new(
16981 "DATEADD".to_string(),
16982 vec![unit, arg1, arg2],
16983 ))))
16984 }
16985 }
16986 }
16987 // DATE_ADD - 3-arg: either (unit, val, date) from Presto/ClickHouse
16988 // or (date, val, 'UNIT') from Generic canonical form
16989 "DATE_ADD" if f.args.len() == 3 => {
16990 let mut args = f.args;
16991 let arg0 = args.remove(0);
16992 let arg1 = args.remove(0);
16993 let arg2 = args.remove(0);
16994 // Detect Generic canonical form: DATE_ADD(date, amount, 'UNIT')
16995 // where arg2 is a string literal matching a unit name
16996 let arg2_unit = match &arg2 {
16997 Expression::Literal(lit)
16998 if matches!(lit.as_ref(), Literal::String(_)) =>
16999 {
17000 let Literal::String(s) = lit.as_ref() else {
17001 unreachable!()
17002 };
17003 let u = s.to_ascii_uppercase();
17004 if matches!(
17005 u.as_str(),
17006 "DAY"
17007 | "MONTH"
17008 | "YEAR"
17009 | "HOUR"
17010 | "MINUTE"
17011 | "SECOND"
17012 | "WEEK"
17013 | "QUARTER"
17014 | "MILLISECOND"
17015 | "MICROSECOND"
17016 ) {
17017 Some(u)
17018 } else {
17019 None
17020 }
17021 }
17022 _ => None,
17023 };
17024 // Reorder: if arg2 is the unit, swap to (unit, val, date) form
17025 let (unit_str, val, date) = if let Some(u) = arg2_unit {
17026 (u, arg1, arg0)
17027 } else {
17028 (Self::get_unit_str_static(&arg0), arg1, arg2)
17029 };
17030 // Alias for backward compat with the rest of the match
17031 let arg1 = val;
17032 let arg2 = date;
17033
17034 match target {
17035 DialectType::Presto
17036 | DialectType::Trino
17037 | DialectType::Athena => {
17038 Ok(Expression::Function(Box::new(Function::new(
17039 "DATE_ADD".to_string(),
17040 vec![Expression::string(&unit_str), arg1, arg2],
17041 ))))
17042 }
17043 DialectType::DuckDB => {
17044 let iu = Self::parse_interval_unit_static(&unit_str);
17045 let interval = Expression::Interval(Box::new(
17046 crate::expressions::Interval {
17047 this: Some(arg1),
17048 unit: Some(
17049 crate::expressions::IntervalUnitSpec::Simple {
17050 unit: iu,
17051 use_plural: false,
17052 },
17053 ),
17054 },
17055 ));
17056 Ok(Expression::Add(Box::new(
17057 crate::expressions::BinaryOp::new(arg2, interval),
17058 )))
17059 }
17060 DialectType::PostgreSQL
17061 | DialectType::Materialize
17062 | DialectType::RisingWave => {
17063 // PostgreSQL: x + INTERVAL '1 DAY'
17064 let amount_str = Self::expr_to_string_static(&arg1);
17065 let interval = Expression::Interval(Box::new(
17066 crate::expressions::Interval {
17067 this: Some(Expression::string(&format!(
17068 "{} {}",
17069 amount_str, unit_str
17070 ))),
17071 unit: None,
17072 },
17073 ));
17074 Ok(Expression::Add(Box::new(
17075 crate::expressions::BinaryOp::new(arg2, interval),
17076 )))
17077 }
17078 DialectType::Snowflake
17079 | DialectType::TSQL
17080 | DialectType::Redshift => {
17081 let unit =
17082 Expression::Identifier(Identifier::new(&unit_str));
17083 Ok(Expression::Function(Box::new(Function::new(
17084 "DATEADD".to_string(),
17085 vec![unit, arg1, arg2],
17086 ))))
17087 }
17088 DialectType::BigQuery
17089 | DialectType::MySQL
17090 | DialectType::Doris
17091 | DialectType::StarRocks
17092 | DialectType::Drill => {
17093 // DATE_ADD(date, INTERVAL amount UNIT)
17094 let iu = Self::parse_interval_unit_static(&unit_str);
17095 let interval = Expression::Interval(Box::new(
17096 crate::expressions::Interval {
17097 this: Some(arg1),
17098 unit: Some(
17099 crate::expressions::IntervalUnitSpec::Simple {
17100 unit: iu,
17101 use_plural: false,
17102 },
17103 ),
17104 },
17105 ));
17106 Ok(Expression::Function(Box::new(Function::new(
17107 "DATE_ADD".to_string(),
17108 vec![arg2, interval],
17109 ))))
17110 }
17111 DialectType::SQLite => {
17112 // SQLite: DATE(x, '1 DAY')
17113 // Build the string '1 DAY' from amount and unit
17114 let amount_str = match &arg1 {
17115 Expression::Literal(lit)
17116 if matches!(lit.as_ref(), Literal::Number(_)) =>
17117 {
17118 let Literal::Number(n) = lit.as_ref() else {
17119 unreachable!()
17120 };
17121 n.clone()
17122 }
17123 _ => "1".to_string(),
17124 };
17125 Ok(Expression::Function(Box::new(Function::new(
17126 "DATE".to_string(),
17127 vec![
17128 arg2,
17129 Expression::string(format!(
17130 "{} {}",
17131 amount_str, unit_str
17132 )),
17133 ],
17134 ))))
17135 }
17136 DialectType::Dremio => {
17137 // Dremio: DATE_ADD(date, amount) - drops unit
17138 Ok(Expression::Function(Box::new(Function::new(
17139 "DATE_ADD".to_string(),
17140 vec![arg2, arg1],
17141 ))))
17142 }
17143 DialectType::Spark => {
17144 // Spark: DATE_ADD(date, val) for DAY, or DATEADD(UNIT, val, date)
17145 if unit_str == "DAY" {
17146 Ok(Expression::Function(Box::new(Function::new(
17147 "DATE_ADD".to_string(),
17148 vec![arg2, arg1],
17149 ))))
17150 } else {
17151 let unit =
17152 Expression::Identifier(Identifier::new(&unit_str));
17153 Ok(Expression::Function(Box::new(Function::new(
17154 "DATE_ADD".to_string(),
17155 vec![unit, arg1, arg2],
17156 ))))
17157 }
17158 }
17159 DialectType::Databricks => {
17160 let unit =
17161 Expression::Identifier(Identifier::new(&unit_str));
17162 Ok(Expression::Function(Box::new(Function::new(
17163 "DATE_ADD".to_string(),
17164 vec![unit, arg1, arg2],
17165 ))))
17166 }
17167 DialectType::Hive => {
17168 // Hive: DATE_ADD(date, val) for DAY
17169 Ok(Expression::Function(Box::new(Function::new(
17170 "DATE_ADD".to_string(),
17171 vec![arg2, arg1],
17172 ))))
17173 }
17174 _ => {
17175 let unit =
17176 Expression::Identifier(Identifier::new(&unit_str));
17177 Ok(Expression::Function(Box::new(Function::new(
17178 "DATE_ADD".to_string(),
17179 vec![unit, arg1, arg2],
17180 ))))
17181 }
17182 }
17183 }
17184 // DATE_ADD(date, days) - 2-arg Hive/Spark/Generic form (add days)
17185 "DATE_ADD"
17186 if f.args.len() == 2
17187 && matches!(
17188 source,
17189 DialectType::Hive
17190 | DialectType::Spark
17191 | DialectType::Databricks
17192 | DialectType::Generic
17193 ) =>
17194 {
17195 let mut args = f.args;
17196 let date = args.remove(0);
17197 let days = args.remove(0);
17198 match target {
17199 DialectType::Hive | DialectType::Spark => {
17200 // Keep as DATE_ADD(date, days) for Hive/Spark
17201 Ok(Expression::Function(Box::new(Function::new(
17202 "DATE_ADD".to_string(),
17203 vec![date, days],
17204 ))))
17205 }
17206 DialectType::Databricks => {
17207 // Databricks: DATEADD(DAY, days, date)
17208 Ok(Expression::Function(Box::new(Function::new(
17209 "DATEADD".to_string(),
17210 vec![
17211 Expression::Identifier(Identifier::new("DAY")),
17212 days,
17213 date,
17214 ],
17215 ))))
17216 }
17217 DialectType::DuckDB => {
17218 // DuckDB: CAST(date AS DATE) + INTERVAL days DAY
17219 let cast_date = Self::ensure_cast_date(date);
17220 // Wrap complex expressions (like Mul from DATE_SUB negation) in Paren
17221 let interval_val = if matches!(
17222 days,
17223 Expression::Mul(_)
17224 | Expression::Sub(_)
17225 | Expression::Add(_)
17226 ) {
17227 Expression::Paren(Box::new(crate::expressions::Paren {
17228 this: days,
17229 trailing_comments: vec![],
17230 }))
17231 } else {
17232 days
17233 };
17234 let interval = Expression::Interval(Box::new(
17235 crate::expressions::Interval {
17236 this: Some(interval_val),
17237 unit: Some(
17238 crate::expressions::IntervalUnitSpec::Simple {
17239 unit: crate::expressions::IntervalUnit::Day,
17240 use_plural: false,
17241 },
17242 ),
17243 },
17244 ));
17245 Ok(Expression::Add(Box::new(
17246 crate::expressions::BinaryOp::new(cast_date, interval),
17247 )))
17248 }
17249 DialectType::Snowflake => {
17250 // For Hive source with string literal date, use CAST(CAST(date AS TIMESTAMP) AS DATE)
17251 let cast_date = if matches!(
17252 source,
17253 DialectType::Hive
17254 | DialectType::Spark
17255 | DialectType::Databricks
17256 ) {
17257 if matches!(
17258 date,
17259 Expression::Literal(ref lit) if matches!(lit.as_ref(), Literal::String(_))
17260 ) {
17261 Self::double_cast_timestamp_date(date)
17262 } else {
17263 date
17264 }
17265 } else {
17266 date
17267 };
17268 Ok(Expression::Function(Box::new(Function::new(
17269 "DATEADD".to_string(),
17270 vec![
17271 Expression::Identifier(Identifier::new("DAY")),
17272 days,
17273 cast_date,
17274 ],
17275 ))))
17276 }
17277 DialectType::Redshift => {
17278 Ok(Expression::Function(Box::new(Function::new(
17279 "DATEADD".to_string(),
17280 vec![
17281 Expression::Identifier(Identifier::new("DAY")),
17282 days,
17283 date,
17284 ],
17285 ))))
17286 }
17287 DialectType::TSQL | DialectType::Fabric => {
17288 // For Hive source with string literal date, use CAST(CAST(date AS DATETIME2) AS DATE)
17289 // But Databricks DATE_ADD doesn't need this wrapping for TSQL
17290 let cast_date = if matches!(
17291 source,
17292 DialectType::Hive | DialectType::Spark
17293 ) {
17294 if matches!(
17295 date,
17296 Expression::Literal(ref lit) if matches!(lit.as_ref(), Literal::String(_))
17297 ) {
17298 Self::double_cast_datetime2_date(date)
17299 } else {
17300 date
17301 }
17302 } else {
17303 date
17304 };
17305 Ok(Expression::Function(Box::new(Function::new(
17306 "DATEADD".to_string(),
17307 vec![
17308 Expression::Identifier(Identifier::new("DAY")),
17309 days,
17310 cast_date,
17311 ],
17312 ))))
17313 }
17314 DialectType::Presto
17315 | DialectType::Trino
17316 | DialectType::Athena => {
17317 // For Hive source with string literal date, use CAST(CAST(date AS TIMESTAMP) AS DATE)
17318 let cast_date = if matches!(
17319 source,
17320 DialectType::Hive
17321 | DialectType::Spark
17322 | DialectType::Databricks
17323 ) {
17324 if matches!(
17325 date,
17326 Expression::Literal(ref lit) if matches!(lit.as_ref(), Literal::String(_))
17327 ) {
17328 Self::double_cast_timestamp_date(date)
17329 } else {
17330 date
17331 }
17332 } else {
17333 date
17334 };
17335 Ok(Expression::Function(Box::new(Function::new(
17336 "DATE_ADD".to_string(),
17337 vec![Expression::string("DAY"), days, cast_date],
17338 ))))
17339 }
17340 DialectType::BigQuery => {
17341 // For Hive/Spark source, wrap date in CAST(CAST(date AS DATETIME) AS DATE)
17342 let cast_date = if matches!(
17343 source,
17344 DialectType::Hive
17345 | DialectType::Spark
17346 | DialectType::Databricks
17347 ) {
17348 Self::double_cast_datetime_date(date)
17349 } else {
17350 date
17351 };
17352 // Wrap complex expressions in Paren for interval
17353 let interval_val = if matches!(
17354 days,
17355 Expression::Mul(_)
17356 | Expression::Sub(_)
17357 | Expression::Add(_)
17358 ) {
17359 Expression::Paren(Box::new(crate::expressions::Paren {
17360 this: days,
17361 trailing_comments: vec![],
17362 }))
17363 } else {
17364 days
17365 };
17366 let interval = Expression::Interval(Box::new(
17367 crate::expressions::Interval {
17368 this: Some(interval_val),
17369 unit: Some(
17370 crate::expressions::IntervalUnitSpec::Simple {
17371 unit: crate::expressions::IntervalUnit::Day,
17372 use_plural: false,
17373 },
17374 ),
17375 },
17376 ));
17377 Ok(Expression::Function(Box::new(Function::new(
17378 "DATE_ADD".to_string(),
17379 vec![cast_date, interval],
17380 ))))
17381 }
17382 DialectType::MySQL => {
17383 let iu = crate::expressions::IntervalUnit::Day;
17384 Ok(Expression::DateAdd(Box::new(
17385 crate::expressions::DateAddFunc {
17386 this: date,
17387 interval: days,
17388 unit: iu,
17389 },
17390 )))
17391 }
17392 DialectType::PostgreSQL => {
17393 let interval = Expression::Interval(Box::new(
17394 crate::expressions::Interval {
17395 this: Some(Expression::string(&format!(
17396 "{} DAY",
17397 Self::expr_to_string_static(&days)
17398 ))),
17399 unit: None,
17400 },
17401 ));
17402 Ok(Expression::Add(Box::new(
17403 crate::expressions::BinaryOp::new(date, interval),
17404 )))
17405 }
17406 DialectType::Doris
17407 | DialectType::StarRocks
17408 | DialectType::Drill => {
17409 // DATE_ADD(date, INTERVAL days DAY)
17410 let interval = Expression::Interval(Box::new(
17411 crate::expressions::Interval {
17412 this: Some(days),
17413 unit: Some(
17414 crate::expressions::IntervalUnitSpec::Simple {
17415 unit: crate::expressions::IntervalUnit::Day,
17416 use_plural: false,
17417 },
17418 ),
17419 },
17420 ));
17421 Ok(Expression::Function(Box::new(Function::new(
17422 "DATE_ADD".to_string(),
17423 vec![date, interval],
17424 ))))
17425 }
17426 _ => Ok(Expression::Function(Box::new(Function::new(
17427 "DATE_ADD".to_string(),
17428 vec![date, days],
17429 )))),
17430 }
17431 }
17432 // DATE_ADD(date, INTERVAL val UNIT) - MySQL 2-arg form with INTERVAL as 2nd arg
17433 "DATE_ADD"
17434 if f.args.len() == 2
17435 && matches!(
17436 source,
17437 DialectType::MySQL | DialectType::SingleStore
17438 )
17439 && matches!(&f.args[1], Expression::Interval(_)) =>
17440 {
17441 let mut args = f.args;
17442 let date = args.remove(0);
17443 let interval_expr = args.remove(0);
17444 let (val, unit) = Self::extract_interval_parts(&interval_expr);
17445 let unit_str = Self::interval_unit_to_string(&unit);
17446 let is_literal = matches!(&val,
17447 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(_) | Literal::String(_))
17448 );
17449
17450 match target {
17451 DialectType::MySQL | DialectType::SingleStore => {
17452 // Keep as DATE_ADD(date, INTERVAL val UNIT)
17453 Ok(Expression::Function(Box::new(Function::new(
17454 "DATE_ADD".to_string(),
17455 vec![date, interval_expr],
17456 ))))
17457 }
17458 DialectType::PostgreSQL => {
17459 if is_literal {
17460 // Literal: date + INTERVAL 'val UNIT'
17461 let interval = Expression::Interval(Box::new(
17462 crate::expressions::Interval {
17463 this: Some(Expression::Literal(Box::new(
17464 Literal::String(format!(
17465 "{} {}",
17466 Self::expr_to_string(&val),
17467 unit_str
17468 )),
17469 ))),
17470 unit: None,
17471 },
17472 ));
17473 Ok(Expression::Add(Box::new(
17474 crate::expressions::BinaryOp::new(date, interval),
17475 )))
17476 } else {
17477 // Non-literal (column ref): date + INTERVAL '1 UNIT' * val
17478 let interval_one = Expression::Interval(Box::new(
17479 crate::expressions::Interval {
17480 this: Some(Expression::Literal(Box::new(
17481 Literal::String(format!("1 {}", unit_str)),
17482 ))),
17483 unit: None,
17484 },
17485 ));
17486 let mul = Expression::Mul(Box::new(
17487 crate::expressions::BinaryOp::new(
17488 interval_one,
17489 val,
17490 ),
17491 ));
17492 Ok(Expression::Add(Box::new(
17493 crate::expressions::BinaryOp::new(date, mul),
17494 )))
17495 }
17496 }
17497 _ => {
17498 // Default: keep as DATE_ADD(date, interval)
17499 Ok(Expression::Function(Box::new(Function::new(
17500 "DATE_ADD".to_string(),
17501 vec![date, interval_expr],
17502 ))))
17503 }
17504 }
17505 }
17506 // DATE_SUB(date, days) - 2-arg Hive/Spark form (subtract days)
17507 "DATE_SUB"
17508 if f.args.len() == 2
17509 && matches!(
17510 source,
17511 DialectType::Hive
17512 | DialectType::Spark
17513 | DialectType::Databricks
17514 ) =>
17515 {
17516 let mut args = f.args;
17517 let date = args.remove(0);
17518 let days = args.remove(0);
17519 // Helper to create days * -1
17520 let make_neg_days = |d: Expression| -> Expression {
17521 Expression::Mul(Box::new(crate::expressions::BinaryOp::new(
17522 d,
17523 Expression::Literal(Box::new(Literal::Number(
17524 "-1".to_string(),
17525 ))),
17526 )))
17527 };
17528 let is_string_literal = matches!(date, Expression::Literal(ref lit) if matches!(lit.as_ref(), Literal::String(_)));
17529 match target {
17530 DialectType::Hive
17531 | DialectType::Spark
17532 | DialectType::Databricks => {
17533 // Keep as DATE_SUB(date, days) for Hive/Spark
17534 Ok(Expression::Function(Box::new(Function::new(
17535 "DATE_SUB".to_string(),
17536 vec![date, days],
17537 ))))
17538 }
17539 DialectType::DuckDB => {
17540 let cast_date = Self::ensure_cast_date(date);
17541 let neg = make_neg_days(days);
17542 let interval = Expression::Interval(Box::new(
17543 crate::expressions::Interval {
17544 this: Some(Expression::Paren(Box::new(
17545 crate::expressions::Paren {
17546 this: neg,
17547 trailing_comments: vec![],
17548 },
17549 ))),
17550 unit: Some(
17551 crate::expressions::IntervalUnitSpec::Simple {
17552 unit: crate::expressions::IntervalUnit::Day,
17553 use_plural: false,
17554 },
17555 ),
17556 },
17557 ));
17558 Ok(Expression::Add(Box::new(
17559 crate::expressions::BinaryOp::new(cast_date, interval),
17560 )))
17561 }
17562 DialectType::Snowflake => {
17563 let cast_date = if is_string_literal {
17564 Self::double_cast_timestamp_date(date)
17565 } else {
17566 date
17567 };
17568 let neg = make_neg_days(days);
17569 Ok(Expression::Function(Box::new(Function::new(
17570 "DATEADD".to_string(),
17571 vec![
17572 Expression::Identifier(Identifier::new("DAY")),
17573 neg,
17574 cast_date,
17575 ],
17576 ))))
17577 }
17578 DialectType::Redshift => {
17579 let neg = make_neg_days(days);
17580 Ok(Expression::Function(Box::new(Function::new(
17581 "DATEADD".to_string(),
17582 vec![
17583 Expression::Identifier(Identifier::new("DAY")),
17584 neg,
17585 date,
17586 ],
17587 ))))
17588 }
17589 DialectType::TSQL | DialectType::Fabric => {
17590 let cast_date = if is_string_literal {
17591 Self::double_cast_datetime2_date(date)
17592 } else {
17593 date
17594 };
17595 let neg = make_neg_days(days);
17596 Ok(Expression::Function(Box::new(Function::new(
17597 "DATEADD".to_string(),
17598 vec![
17599 Expression::Identifier(Identifier::new("DAY")),
17600 neg,
17601 cast_date,
17602 ],
17603 ))))
17604 }
17605 DialectType::Presto
17606 | DialectType::Trino
17607 | DialectType::Athena => {
17608 let cast_date = if is_string_literal {
17609 Self::double_cast_timestamp_date(date)
17610 } else {
17611 date
17612 };
17613 let neg = make_neg_days(days);
17614 Ok(Expression::Function(Box::new(Function::new(
17615 "DATE_ADD".to_string(),
17616 vec![Expression::string("DAY"), neg, cast_date],
17617 ))))
17618 }
17619 DialectType::BigQuery => {
17620 let cast_date = if is_string_literal {
17621 Self::double_cast_datetime_date(date)
17622 } else {
17623 date
17624 };
17625 let neg = make_neg_days(days);
17626 let interval = Expression::Interval(Box::new(
17627 crate::expressions::Interval {
17628 this: Some(Expression::Paren(Box::new(
17629 crate::expressions::Paren {
17630 this: neg,
17631 trailing_comments: vec![],
17632 },
17633 ))),
17634 unit: Some(
17635 crate::expressions::IntervalUnitSpec::Simple {
17636 unit: crate::expressions::IntervalUnit::Day,
17637 use_plural: false,
17638 },
17639 ),
17640 },
17641 ));
17642 Ok(Expression::Function(Box::new(Function::new(
17643 "DATE_ADD".to_string(),
17644 vec![cast_date, interval],
17645 ))))
17646 }
17647 _ => Ok(Expression::Function(Box::new(Function::new(
17648 "DATE_SUB".to_string(),
17649 vec![date, days],
17650 )))),
17651 }
17652 }
17653 // ADD_MONTHS(date, val) -> target-specific
17654 "ADD_MONTHS" if f.args.len() == 2 => {
17655 let mut args = f.args;
17656 let date = args.remove(0);
17657 let val = args.remove(0);
17658 match target {
17659 DialectType::TSQL => {
17660 let cast_date = Self::ensure_cast_datetime2(date);
17661 Ok(Expression::Function(Box::new(Function::new(
17662 "DATEADD".to_string(),
17663 vec![
17664 Expression::Identifier(Identifier::new("MONTH")),
17665 val,
17666 cast_date,
17667 ],
17668 ))))
17669 }
17670 DialectType::DuckDB => {
17671 let interval = Expression::Interval(Box::new(
17672 crate::expressions::Interval {
17673 this: Some(val),
17674 unit: Some(
17675 crate::expressions::IntervalUnitSpec::Simple {
17676 unit:
17677 crate::expressions::IntervalUnit::Month,
17678 use_plural: false,
17679 },
17680 ),
17681 },
17682 ));
17683 Ok(Expression::Add(Box::new(
17684 crate::expressions::BinaryOp::new(date, interval),
17685 )))
17686 }
17687 DialectType::Snowflake => {
17688 // Keep ADD_MONTHS when source is Snowflake
17689 if matches!(source, DialectType::Snowflake) {
17690 Ok(Expression::Function(Box::new(Function::new(
17691 "ADD_MONTHS".to_string(),
17692 vec![date, val],
17693 ))))
17694 } else {
17695 Ok(Expression::Function(Box::new(Function::new(
17696 "DATEADD".to_string(),
17697 vec![
17698 Expression::Identifier(Identifier::new(
17699 "MONTH",
17700 )),
17701 val,
17702 date,
17703 ],
17704 ))))
17705 }
17706 }
17707 DialectType::Redshift => {
17708 Ok(Expression::Function(Box::new(Function::new(
17709 "DATEADD".to_string(),
17710 vec![
17711 Expression::Identifier(Identifier::new("MONTH")),
17712 val,
17713 date,
17714 ],
17715 ))))
17716 }
17717 DialectType::Presto
17718 | DialectType::Trino
17719 | DialectType::Athena => {
17720 Ok(Expression::Function(Box::new(Function::new(
17721 "DATE_ADD".to_string(),
17722 vec![Expression::string("MONTH"), val, date],
17723 ))))
17724 }
17725 DialectType::BigQuery => {
17726 let interval = Expression::Interval(Box::new(
17727 crate::expressions::Interval {
17728 this: Some(val),
17729 unit: Some(
17730 crate::expressions::IntervalUnitSpec::Simple {
17731 unit:
17732 crate::expressions::IntervalUnit::Month,
17733 use_plural: false,
17734 },
17735 ),
17736 },
17737 ));
17738 Ok(Expression::Function(Box::new(Function::new(
17739 "DATE_ADD".to_string(),
17740 vec![date, interval],
17741 ))))
17742 }
17743 _ => Ok(Expression::Function(Box::new(Function::new(
17744 "ADD_MONTHS".to_string(),
17745 vec![date, val],
17746 )))),
17747 }
17748 }
17749 // DATETRUNC(unit, date) - TSQL form -> DATE_TRUNC for other targets
17750 "DATETRUNC" if f.args.len() == 2 => {
17751 let mut args = f.args;
17752 let arg0 = args.remove(0);
17753 let arg1 = args.remove(0);
17754 let unit_str = Self::get_unit_str_static(&arg0);
17755 match target {
17756 DialectType::TSQL | DialectType::Fabric => {
17757 // Keep as DATETRUNC for TSQL - the target handler will uppercase the unit
17758 Ok(Expression::Function(Box::new(Function::new(
17759 "DATETRUNC".to_string(),
17760 vec![
17761 Expression::Identifier(Identifier::new(&unit_str)),
17762 arg1,
17763 ],
17764 ))))
17765 }
17766 DialectType::DuckDB => {
17767 // DuckDB: DATE_TRUNC('UNIT', expr) with CAST for string literals
17768 let date = Self::ensure_cast_timestamp(arg1);
17769 Ok(Expression::Function(Box::new(Function::new(
17770 "DATE_TRUNC".to_string(),
17771 vec![Expression::string(&unit_str), date],
17772 ))))
17773 }
17774 DialectType::ClickHouse => {
17775 // ClickHouse: dateTrunc('UNIT', expr)
17776 Ok(Expression::Function(Box::new(Function::new(
17777 "dateTrunc".to_string(),
17778 vec![Expression::string(&unit_str), arg1],
17779 ))))
17780 }
17781 _ => {
17782 // Standard: DATE_TRUNC('UNIT', expr)
17783 let unit = Expression::string(&unit_str);
17784 Ok(Expression::Function(Box::new(Function::new(
17785 "DATE_TRUNC".to_string(),
17786 vec![unit, arg1],
17787 ))))
17788 }
17789 }
17790 }
17791 // GETDATE() -> CURRENT_TIMESTAMP for non-TSQL targets
17792 "GETDATE" if f.args.is_empty() => match target {
17793 DialectType::TSQL => Ok(Expression::Function(f)),
17794 DialectType::Redshift => Ok(Expression::Function(Box::new(
17795 Function::new("GETDATE".to_string(), vec![]),
17796 ))),
17797 _ => Ok(Expression::CurrentTimestamp(
17798 crate::expressions::CurrentTimestamp {
17799 precision: None,
17800 sysdate: false,
17801 },
17802 )),
17803 },
17804 // TO_HEX(x) / HEX(x) -> target-specific hex function
17805 "TO_HEX" | "HEX" if f.args.len() == 1 => {
17806 let name = match target {
17807 DialectType::Presto | DialectType::Trino => "TO_HEX",
17808 DialectType::Spark
17809 | DialectType::Databricks
17810 | DialectType::Hive => "HEX",
17811 DialectType::DuckDB
17812 | DialectType::PostgreSQL
17813 | DialectType::Redshift => "TO_HEX",
17814 _ => &f.name,
17815 };
17816 Ok(Expression::Function(Box::new(Function::new(
17817 name.to_string(),
17818 f.args,
17819 ))))
17820 }
17821 // FROM_HEX(x) / UNHEX(x) -> target-specific hex decode function
17822 "FROM_HEX" | "UNHEX" if f.args.len() == 1 => {
17823 match target {
17824 DialectType::BigQuery => {
17825 // BigQuery: UNHEX(x) -> FROM_HEX(x)
17826 // Special case: UNHEX(MD5(x)) -> FROM_HEX(TO_HEX(MD5(x)))
17827 // because BigQuery MD5 returns BYTES, not hex string
17828 let arg = &f.args[0];
17829 let wrapped_arg = match arg {
17830 Expression::Function(inner_f)
17831 if inner_f.name.eq_ignore_ascii_case("MD5")
17832 || inner_f
17833 .name
17834 .eq_ignore_ascii_case("SHA1")
17835 || inner_f
17836 .name
17837 .eq_ignore_ascii_case("SHA256")
17838 || inner_f
17839 .name
17840 .eq_ignore_ascii_case("SHA512") =>
17841 {
17842 // Wrap hash function in TO_HEX for BigQuery
17843 Expression::Function(Box::new(Function::new(
17844 "TO_HEX".to_string(),
17845 vec![arg.clone()],
17846 )))
17847 }
17848 _ => f.args.into_iter().next().unwrap(),
17849 };
17850 Ok(Expression::Function(Box::new(Function::new(
17851 "FROM_HEX".to_string(),
17852 vec![wrapped_arg],
17853 ))))
17854 }
17855 _ => {
17856 let name = match target {
17857 DialectType::Presto | DialectType::Trino => "FROM_HEX",
17858 DialectType::Spark
17859 | DialectType::Databricks
17860 | DialectType::Hive => "UNHEX",
17861 _ => &f.name,
17862 };
17863 Ok(Expression::Function(Box::new(Function::new(
17864 name.to_string(),
17865 f.args,
17866 ))))
17867 }
17868 }
17869 }
17870 // TO_UTF8(x) -> ENCODE(x, 'utf-8') for Spark
17871 "TO_UTF8" if f.args.len() == 1 => match target {
17872 DialectType::Spark | DialectType::Databricks => {
17873 let mut args = f.args;
17874 args.push(Expression::string("utf-8"));
17875 Ok(Expression::Function(Box::new(Function::new(
17876 "ENCODE".to_string(),
17877 args,
17878 ))))
17879 }
17880 _ => Ok(Expression::Function(f)),
17881 },
17882 // FROM_UTF8(x) -> DECODE(x, 'utf-8') for Spark
17883 "FROM_UTF8" if f.args.len() == 1 => match target {
17884 DialectType::Spark | DialectType::Databricks => {
17885 let mut args = f.args;
17886 args.push(Expression::string("utf-8"));
17887 Ok(Expression::Function(Box::new(Function::new(
17888 "DECODE".to_string(),
17889 args,
17890 ))))
17891 }
17892 _ => Ok(Expression::Function(f)),
17893 },
17894 // STARTS_WITH(x, y) / STARTSWITH(x, y) -> target-specific
17895 "STARTS_WITH" | "STARTSWITH" if f.args.len() == 2 => {
17896 let name = match target {
17897 DialectType::Spark | DialectType::Databricks => "STARTSWITH",
17898 DialectType::Presto | DialectType::Trino => "STARTS_WITH",
17899 DialectType::PostgreSQL | DialectType::Redshift => {
17900 "STARTS_WITH"
17901 }
17902 _ => &f.name,
17903 };
17904 Ok(Expression::Function(Box::new(Function::new(
17905 name.to_string(),
17906 f.args,
17907 ))))
17908 }
17909 // APPROX_COUNT_DISTINCT(x) <-> APPROX_DISTINCT(x)
17910 "APPROX_COUNT_DISTINCT" if f.args.len() >= 1 => {
17911 let name = match target {
17912 DialectType::Presto
17913 | DialectType::Trino
17914 | DialectType::Athena => "APPROX_DISTINCT",
17915 _ => "APPROX_COUNT_DISTINCT",
17916 };
17917 Ok(Expression::Function(Box::new(Function::new(
17918 name.to_string(),
17919 f.args,
17920 ))))
17921 }
17922 // JSON_EXTRACT -> GET_JSON_OBJECT for Spark/Hive
17923 "JSON_EXTRACT"
17924 if f.args.len() == 2
17925 && !matches!(source, DialectType::BigQuery)
17926 && matches!(
17927 target,
17928 DialectType::Spark
17929 | DialectType::Databricks
17930 | DialectType::Hive
17931 ) =>
17932 {
17933 Ok(Expression::Function(Box::new(Function::new(
17934 "GET_JSON_OBJECT".to_string(),
17935 f.args,
17936 ))))
17937 }
17938 // JSON_EXTRACT(x, path) -> x -> path for SQLite (arrow syntax)
17939 "JSON_EXTRACT"
17940 if f.args.len() == 2 && matches!(target, DialectType::SQLite) =>
17941 {
17942 let mut args = f.args;
17943 let path = args.remove(1);
17944 let this = args.remove(0);
17945 Ok(Expression::JsonExtract(Box::new(
17946 crate::expressions::JsonExtractFunc {
17947 this,
17948 path,
17949 returning: None,
17950 arrow_syntax: true,
17951 hash_arrow_syntax: false,
17952 wrapper_option: None,
17953 quotes_option: None,
17954 on_scalar_string: false,
17955 on_error: None,
17956 },
17957 )))
17958 }
17959 // JSON_FORMAT(x) -> TO_JSON(x) for Spark, TO_JSON_STRING for BigQuery, CAST(TO_JSON(x) AS TEXT) for DuckDB
17960 "JSON_FORMAT" if f.args.len() == 1 => {
17961 match target {
17962 DialectType::Spark | DialectType::Databricks => {
17963 // Presto JSON_FORMAT(JSON '...') needs Spark's string-unquoting flow:
17964 // REGEXP_EXTRACT(TO_JSON(FROM_JSON('[...]', SCHEMA_OF_JSON('[...]'))), '^.(.*).$', 1)
17965 if matches!(
17966 source,
17967 DialectType::Presto
17968 | DialectType::Trino
17969 | DialectType::Athena
17970 ) {
17971 if let Some(Expression::ParseJson(pj)) = f.args.first()
17972 {
17973 if let Expression::Literal(lit) = &pj.this {
17974 if let Literal::String(s) = lit.as_ref() {
17975 let wrapped =
17976 Expression::Literal(Box::new(
17977 Literal::String(format!("[{}]", s)),
17978 ));
17979 let schema_of_json = Expression::Function(
17980 Box::new(Function::new(
17981 "SCHEMA_OF_JSON".to_string(),
17982 vec![wrapped.clone()],
17983 )),
17984 );
17985 let from_json = Expression::Function(
17986 Box::new(Function::new(
17987 "FROM_JSON".to_string(),
17988 vec![wrapped, schema_of_json],
17989 )),
17990 );
17991 let to_json = Expression::Function(
17992 Box::new(Function::new(
17993 "TO_JSON".to_string(),
17994 vec![from_json],
17995 )),
17996 );
17997 return Ok(Expression::Function(Box::new(
17998 Function::new(
17999 "REGEXP_EXTRACT".to_string(),
18000 vec![
18001 to_json,
18002 Expression::Literal(Box::new(
18003 Literal::String(
18004 "^.(.*).$".to_string(),
18005 ),
18006 )),
18007 Expression::Literal(Box::new(
18008 Literal::Number(
18009 "1".to_string(),
18010 ),
18011 )),
18012 ],
18013 ),
18014 )));
18015 }
18016 }
18017 }
18018 }
18019
18020 // Strip inner CAST(... AS JSON) or TO_JSON() if present
18021 // The CastToJsonForSpark may have already converted CAST(x AS JSON) to TO_JSON(x)
18022 let mut args = f.args;
18023 if let Some(Expression::Cast(ref c)) = args.first() {
18024 if matches!(&c.to, DataType::Json | DataType::JsonB) {
18025 args = vec![c.this.clone()];
18026 }
18027 } else if let Some(Expression::Function(ref inner_f)) =
18028 args.first()
18029 {
18030 if inner_f.name.eq_ignore_ascii_case("TO_JSON")
18031 && inner_f.args.len() == 1
18032 {
18033 // Already TO_JSON(x) from CastToJsonForSpark, just use the inner arg
18034 args = inner_f.args.clone();
18035 }
18036 }
18037 Ok(Expression::Function(Box::new(Function::new(
18038 "TO_JSON".to_string(),
18039 args,
18040 ))))
18041 }
18042 DialectType::BigQuery => Ok(Expression::Function(Box::new(
18043 Function::new("TO_JSON_STRING".to_string(), f.args),
18044 ))),
18045 DialectType::DuckDB => {
18046 // CAST(TO_JSON(x) AS TEXT)
18047 let to_json = Expression::Function(Box::new(
18048 Function::new("TO_JSON".to_string(), f.args),
18049 ));
18050 Ok(Expression::Cast(Box::new(Cast {
18051 this: to_json,
18052 to: DataType::Text,
18053 trailing_comments: Vec::new(),
18054 double_colon_syntax: false,
18055 format: None,
18056 default: None,
18057 inferred_type: None,
18058 })))
18059 }
18060 _ => Ok(Expression::Function(f)),
18061 }
18062 }
18063 // SYSDATE -> CURRENT_TIMESTAMP for non-Oracle/Redshift/Snowflake targets
18064 "SYSDATE" if f.args.is_empty() => {
18065 match target {
18066 DialectType::Oracle | DialectType::Redshift => {
18067 Ok(Expression::Function(f))
18068 }
18069 DialectType::Snowflake => {
18070 // Snowflake uses SYSDATE() with parens
18071 let mut f = *f;
18072 f.no_parens = false;
18073 Ok(Expression::Function(Box::new(f)))
18074 }
18075 DialectType::DuckDB => {
18076 // DuckDB: SYSDATE() -> CURRENT_TIMESTAMP AT TIME ZONE 'UTC'
18077 Ok(Expression::AtTimeZone(Box::new(
18078 crate::expressions::AtTimeZone {
18079 this: Expression::CurrentTimestamp(
18080 crate::expressions::CurrentTimestamp {
18081 precision: None,
18082 sysdate: false,
18083 },
18084 ),
18085 zone: Expression::Literal(Box::new(
18086 Literal::String("UTC".to_string()),
18087 )),
18088 },
18089 )))
18090 }
18091 _ => Ok(Expression::CurrentTimestamp(
18092 crate::expressions::CurrentTimestamp {
18093 precision: None,
18094 sysdate: true,
18095 },
18096 )),
18097 }
18098 }
18099 // LOGICAL_OR(x) -> BOOL_OR(x)
18100 "LOGICAL_OR" if f.args.len() == 1 => {
18101 let name = match target {
18102 DialectType::Spark | DialectType::Databricks => "BOOL_OR",
18103 _ => &f.name,
18104 };
18105 Ok(Expression::Function(Box::new(Function::new(
18106 name.to_string(),
18107 f.args,
18108 ))))
18109 }
18110 // LOGICAL_AND(x) -> BOOL_AND(x)
18111 "LOGICAL_AND" if f.args.len() == 1 => {
18112 let name = match target {
18113 DialectType::Spark | DialectType::Databricks => "BOOL_AND",
18114 _ => &f.name,
18115 };
18116 Ok(Expression::Function(Box::new(Function::new(
18117 name.to_string(),
18118 f.args,
18119 ))))
18120 }
18121 // MONTHS_ADD(d, n) -> ADD_MONTHS(d, n) for Oracle
18122 "MONTHS_ADD" if f.args.len() == 2 => match target {
18123 DialectType::Oracle => Ok(Expression::Function(Box::new(
18124 Function::new("ADD_MONTHS".to_string(), f.args),
18125 ))),
18126 _ => Ok(Expression::Function(f)),
18127 },
18128 // ARRAY_JOIN(arr, sep[, null_replacement]) -> target-specific
18129 "ARRAY_JOIN" if f.args.len() >= 2 => {
18130 match target {
18131 DialectType::Spark | DialectType::Databricks => {
18132 // Keep as ARRAY_JOIN for Spark (it supports null_replacement)
18133 Ok(Expression::Function(f))
18134 }
18135 DialectType::Hive => {
18136 // ARRAY_JOIN(arr, sep[, null_rep]) -> CONCAT_WS(sep, arr) (drop null_replacement)
18137 let mut args = f.args;
18138 let arr = args.remove(0);
18139 let sep = args.remove(0);
18140 // Drop any remaining args (null_replacement)
18141 Ok(Expression::Function(Box::new(Function::new(
18142 "CONCAT_WS".to_string(),
18143 vec![sep, arr],
18144 ))))
18145 }
18146 DialectType::Presto | DialectType::Trino => {
18147 Ok(Expression::Function(f))
18148 }
18149 _ => Ok(Expression::Function(f)),
18150 }
18151 }
18152 // LOCATE(substr, str, pos) 3-arg -> target-specific
18153 // For Presto/DuckDB: STRPOS doesn't support 3-arg, need complex expansion
18154 "LOCATE"
18155 if f.args.len() == 3
18156 && matches!(
18157 target,
18158 DialectType::Presto
18159 | DialectType::Trino
18160 | DialectType::Athena
18161 | DialectType::DuckDB
18162 ) =>
18163 {
18164 let mut args = f.args;
18165 let substr = args.remove(0);
18166 let string = args.remove(0);
18167 let pos = args.remove(0);
18168 // STRPOS(SUBSTRING(string, pos), substr)
18169 let substring_call = Expression::Function(Box::new(Function::new(
18170 "SUBSTRING".to_string(),
18171 vec![string.clone(), pos.clone()],
18172 )));
18173 let strpos_call = Expression::Function(Box::new(Function::new(
18174 "STRPOS".to_string(),
18175 vec![substring_call, substr.clone()],
18176 )));
18177 // STRPOS(...) + pos - 1
18178 let pos_adjusted =
18179 Expression::Sub(Box::new(crate::expressions::BinaryOp::new(
18180 Expression::Add(Box::new(
18181 crate::expressions::BinaryOp::new(
18182 strpos_call.clone(),
18183 pos.clone(),
18184 ),
18185 )),
18186 Expression::number(1),
18187 )));
18188 // STRPOS(...) = 0
18189 let is_zero =
18190 Expression::Eq(Box::new(crate::expressions::BinaryOp::new(
18191 strpos_call.clone(),
18192 Expression::number(0),
18193 )));
18194
18195 match target {
18196 DialectType::Presto
18197 | DialectType::Trino
18198 | DialectType::Athena => {
18199 // IF(STRPOS(...) = 0, 0, STRPOS(...) + pos - 1)
18200 Ok(Expression::Function(Box::new(Function::new(
18201 "IF".to_string(),
18202 vec![is_zero, Expression::number(0), pos_adjusted],
18203 ))))
18204 }
18205 DialectType::DuckDB => {
18206 // CASE WHEN STRPOS(...) = 0 THEN 0 ELSE STRPOS(...) + pos - 1 END
18207 Ok(Expression::Case(Box::new(crate::expressions::Case {
18208 operand: None,
18209 whens: vec![(is_zero, Expression::number(0))],
18210 else_: Some(pos_adjusted),
18211 comments: Vec::new(),
18212 inferred_type: None,
18213 })))
18214 }
18215 _ => Ok(Expression::Function(Box::new(Function::new(
18216 "LOCATE".to_string(),
18217 vec![substr, string, pos],
18218 )))),
18219 }
18220 }
18221 // STRPOS(haystack, needle, occurrence) 3-arg -> INSTR(haystack, needle, 1, occurrence)
18222 "STRPOS"
18223 if f.args.len() == 3
18224 && matches!(
18225 target,
18226 DialectType::BigQuery
18227 | DialectType::Oracle
18228 | DialectType::Teradata
18229 ) =>
18230 {
18231 let mut args = f.args;
18232 let haystack = args.remove(0);
18233 let needle = args.remove(0);
18234 let occurrence = args.remove(0);
18235 Ok(Expression::Function(Box::new(Function::new(
18236 "INSTR".to_string(),
18237 vec![haystack, needle, Expression::number(1), occurrence],
18238 ))))
18239 }
18240 // SCHEMA_NAME(id) -> target-specific
18241 "SCHEMA_NAME" if f.args.len() <= 1 => match target {
18242 DialectType::MySQL | DialectType::SingleStore => {
18243 Ok(Expression::Function(Box::new(Function::new(
18244 "SCHEMA".to_string(),
18245 vec![],
18246 ))))
18247 }
18248 DialectType::PostgreSQL => Ok(Expression::CurrentSchema(Box::new(
18249 crate::expressions::CurrentSchema { this: None },
18250 ))),
18251 DialectType::SQLite => Ok(Expression::string("main")),
18252 _ => Ok(Expression::Function(f)),
18253 },
18254 // STRTOL(str, base) -> FROM_BASE(str, base) for Trino/Presto
18255 "STRTOL" if f.args.len() == 2 => match target {
18256 DialectType::Presto | DialectType::Trino => {
18257 Ok(Expression::Function(Box::new(Function::new(
18258 "FROM_BASE".to_string(),
18259 f.args,
18260 ))))
18261 }
18262 _ => Ok(Expression::Function(f)),
18263 },
18264 // EDITDIST3(a, b) -> LEVENSHTEIN(a, b) for Spark
18265 "EDITDIST3" if f.args.len() == 2 => match target {
18266 DialectType::Spark | DialectType::Databricks => {
18267 Ok(Expression::Function(Box::new(Function::new(
18268 "LEVENSHTEIN".to_string(),
18269 f.args,
18270 ))))
18271 }
18272 _ => Ok(Expression::Function(f)),
18273 },
18274 // FORMAT(num, decimals) from MySQL -> DuckDB FORMAT('{:,.Xf}', num)
18275 "FORMAT"
18276 if f.args.len() == 2
18277 && matches!(
18278 source,
18279 DialectType::MySQL | DialectType::SingleStore
18280 )
18281 && matches!(target, DialectType::DuckDB) =>
18282 {
18283 let mut args = f.args;
18284 let num_expr = args.remove(0);
18285 let decimals_expr = args.remove(0);
18286 // Extract decimal count
18287 let dec_count = match &decimals_expr {
18288 Expression::Literal(lit)
18289 if matches!(lit.as_ref(), Literal::Number(_)) =>
18290 {
18291 let Literal::Number(n) = lit.as_ref() else {
18292 unreachable!()
18293 };
18294 n.clone()
18295 }
18296 _ => "0".to_string(),
18297 };
18298 let fmt_str = format!("{{:,.{}f}}", dec_count);
18299 Ok(Expression::Function(Box::new(Function::new(
18300 "FORMAT".to_string(),
18301 vec![Expression::string(&fmt_str), num_expr],
18302 ))))
18303 }
18304 // FORMAT(x, fmt) from TSQL -> DATE_FORMAT for Spark, or expand short codes
18305 "FORMAT"
18306 if f.args.len() == 2
18307 && matches!(
18308 source,
18309 DialectType::TSQL | DialectType::Fabric
18310 ) =>
18311 {
18312 let val_expr = f.args[0].clone();
18313 let fmt_expr = f.args[1].clone();
18314 // Expand unambiguous .NET single-char date format shortcodes to full patterns.
18315 // Only expand shortcodes that are NOT also valid numeric format specifiers.
18316 // Ambiguous: d, D, f, F, g, G (used for both dates and numbers)
18317 // Unambiguous date: m/M (Month day), t/T (Time), y/Y (Year month)
18318 let (expanded_fmt, is_shortcode) = match &fmt_expr {
18319 Expression::Literal(lit)
18320 if matches!(
18321 lit.as_ref(),
18322 crate::expressions::Literal::String(_)
18323 ) =>
18324 {
18325 let crate::expressions::Literal::String(s) = lit.as_ref()
18326 else {
18327 unreachable!()
18328 };
18329 match s.as_str() {
18330 "m" | "M" => (Expression::string("MMMM d"), true),
18331 "t" => (Expression::string("h:mm tt"), true),
18332 "T" => (Expression::string("h:mm:ss tt"), true),
18333 "y" | "Y" => (Expression::string("MMMM yyyy"), true),
18334 _ => (fmt_expr.clone(), false),
18335 }
18336 }
18337 _ => (fmt_expr.clone(), false),
18338 };
18339 // Check if the format looks like a date format
18340 let is_date_format = is_shortcode
18341 || match &expanded_fmt {
18342 Expression::Literal(lit)
18343 if matches!(
18344 lit.as_ref(),
18345 crate::expressions::Literal::String(_)
18346 ) =>
18347 {
18348 let crate::expressions::Literal::String(s) =
18349 lit.as_ref()
18350 else {
18351 unreachable!()
18352 };
18353 // Date formats typically contain yyyy, MM, dd, MMMM, HH, etc.
18354 s.contains("yyyy")
18355 || s.contains("YYYY")
18356 || s.contains("MM")
18357 || s.contains("dd")
18358 || s.contains("MMMM")
18359 || s.contains("HH")
18360 || s.contains("hh")
18361 || s.contains("ss")
18362 }
18363 _ => false,
18364 };
18365 match target {
18366 DialectType::Spark | DialectType::Databricks => {
18367 let func_name = if is_date_format {
18368 "DATE_FORMAT"
18369 } else {
18370 "FORMAT_NUMBER"
18371 };
18372 Ok(Expression::Function(Box::new(Function::new(
18373 func_name.to_string(),
18374 vec![val_expr, expanded_fmt],
18375 ))))
18376 }
18377 _ => {
18378 // For TSQL and other targets, expand shortcodes but keep FORMAT
18379 if is_shortcode {
18380 Ok(Expression::Function(Box::new(Function::new(
18381 "FORMAT".to_string(),
18382 vec![val_expr, expanded_fmt],
18383 ))))
18384 } else {
18385 Ok(Expression::Function(f))
18386 }
18387 }
18388 }
18389 }
18390 // FORMAT('%s', x) from Trino/Presto -> target-specific
18391 "FORMAT"
18392 if f.args.len() >= 2
18393 && matches!(
18394 source,
18395 DialectType::Trino
18396 | DialectType::Presto
18397 | DialectType::Athena
18398 ) =>
18399 {
18400 let fmt_expr = f.args[0].clone();
18401 let value_args: Vec<Expression> = f.args[1..].to_vec();
18402 match target {
18403 // DuckDB: replace %s with {} in format string
18404 DialectType::DuckDB => {
18405 let new_fmt = match &fmt_expr {
18406 Expression::Literal(lit)
18407 if matches!(lit.as_ref(), Literal::String(_)) =>
18408 {
18409 let Literal::String(s) = lit.as_ref() else {
18410 unreachable!()
18411 };
18412 Expression::Literal(Box::new(Literal::String(
18413 s.replace("%s", "{}"),
18414 )))
18415 }
18416 _ => fmt_expr,
18417 };
18418 let mut args = vec![new_fmt];
18419 args.extend(value_args);
18420 Ok(Expression::Function(Box::new(Function::new(
18421 "FORMAT".to_string(),
18422 args,
18423 ))))
18424 }
18425 // Snowflake: FORMAT('%s', x) -> TO_CHAR(x) when just %s
18426 DialectType::Snowflake => match &fmt_expr {
18427 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(s) if s == "%s" && value_args.len() == 1) =>
18428 {
18429 let Literal::String(_) = lit.as_ref() else {
18430 unreachable!()
18431 };
18432 Ok(Expression::Function(Box::new(Function::new(
18433 "TO_CHAR".to_string(),
18434 value_args,
18435 ))))
18436 }
18437 _ => Ok(Expression::Function(f)),
18438 },
18439 // Default: keep FORMAT as-is
18440 _ => Ok(Expression::Function(f)),
18441 }
18442 }
18443 // LIST_CONTAINS / LIST_HAS / ARRAY_CONTAINS -> target-specific
18444 "LIST_CONTAINS" | "LIST_HAS" | "ARRAY_CONTAINS"
18445 if f.args.len() == 2 =>
18446 {
18447 // When coming from Snowflake source: ARRAY_CONTAINS(value, array)
18448 // args[0]=value, args[1]=array. For DuckDB target, swap and add NULL-aware CASE.
18449 if matches!(target, DialectType::DuckDB)
18450 && matches!(source, DialectType::Snowflake)
18451 && f.name.eq_ignore_ascii_case("ARRAY_CONTAINS")
18452 {
18453 let value = f.args[0].clone();
18454 let array = f.args[1].clone();
18455
18456 // value IS NULL
18457 let value_is_null =
18458 Expression::IsNull(Box::new(crate::expressions::IsNull {
18459 this: value.clone(),
18460 not: false,
18461 postfix_form: false,
18462 }));
18463
18464 // ARRAY_LENGTH(array)
18465 let array_length =
18466 Expression::Function(Box::new(Function::new(
18467 "ARRAY_LENGTH".to_string(),
18468 vec![array.clone()],
18469 )));
18470 // LIST_COUNT(array)
18471 let list_count = Expression::Function(Box::new(Function::new(
18472 "LIST_COUNT".to_string(),
18473 vec![array.clone()],
18474 )));
18475 // ARRAY_LENGTH(array) <> LIST_COUNT(array)
18476 let neq =
18477 Expression::Neq(Box::new(crate::expressions::BinaryOp {
18478 left: array_length,
18479 right: list_count,
18480 left_comments: vec![],
18481 operator_comments: vec![],
18482 trailing_comments: vec![],
18483 inferred_type: None,
18484 }));
18485 // NULLIF(ARRAY_LENGTH(array) <> LIST_COUNT(array), FALSE)
18486 let nullif =
18487 Expression::Nullif(Box::new(crate::expressions::Nullif {
18488 this: Box::new(neq),
18489 expression: Box::new(Expression::Boolean(
18490 crate::expressions::BooleanLiteral { value: false },
18491 )),
18492 }));
18493
18494 // ARRAY_CONTAINS(array, value) - DuckDB syntax: array first, value second
18495 let array_contains =
18496 Expression::Function(Box::new(Function::new(
18497 "ARRAY_CONTAINS".to_string(),
18498 vec![array, value],
18499 )));
18500
18501 // CASE WHEN value IS NULL THEN NULLIF(...) ELSE ARRAY_CONTAINS(array, value) END
18502 return Ok(Expression::Case(Box::new(Case {
18503 operand: None,
18504 whens: vec![(value_is_null, nullif)],
18505 else_: Some(array_contains),
18506 comments: Vec::new(),
18507 inferred_type: None,
18508 })));
18509 }
18510 match target {
18511 DialectType::PostgreSQL | DialectType::Redshift => {
18512 // CASE WHEN needle IS NULL THEN NULL ELSE COALESCE(needle = ANY(arr), FALSE) END
18513 let arr = f.args[0].clone();
18514 let needle = f.args[1].clone();
18515 // Convert [] to ARRAY[] for PostgreSQL
18516 let pg_arr = match arr {
18517 Expression::Array(a) => Expression::ArrayFunc(
18518 Box::new(crate::expressions::ArrayConstructor {
18519 expressions: a.expressions,
18520 bracket_notation: false,
18521 use_list_keyword: false,
18522 }),
18523 ),
18524 _ => arr,
18525 };
18526 // needle = ANY(arr) using the Any quantified expression
18527 let any_expr = Expression::Any(Box::new(
18528 crate::expressions::QuantifiedExpr {
18529 this: needle.clone(),
18530 subquery: pg_arr,
18531 op: Some(crate::expressions::QuantifiedOp::Eq),
18532 },
18533 ));
18534 let coalesce = Expression::Coalesce(Box::new(
18535 crate::expressions::VarArgFunc {
18536 expressions: vec![
18537 any_expr,
18538 Expression::Boolean(
18539 crate::expressions::BooleanLiteral {
18540 value: false,
18541 },
18542 ),
18543 ],
18544 original_name: None,
18545 inferred_type: None,
18546 },
18547 ));
18548 let is_null_check = Expression::IsNull(Box::new(
18549 crate::expressions::IsNull {
18550 this: needle,
18551 not: false,
18552 postfix_form: false,
18553 },
18554 ));
18555 Ok(Expression::Case(Box::new(Case {
18556 operand: None,
18557 whens: vec![(
18558 is_null_check,
18559 Expression::Null(crate::expressions::Null),
18560 )],
18561 else_: Some(coalesce),
18562 comments: Vec::new(),
18563 inferred_type: None,
18564 })))
18565 }
18566 _ => Ok(Expression::Function(Box::new(Function::new(
18567 "ARRAY_CONTAINS".to_string(),
18568 f.args,
18569 )))),
18570 }
18571 }
18572 // LIST_HAS_ANY / ARRAY_HAS_ANY -> target-specific overlap operator
18573 "LIST_HAS_ANY" | "ARRAY_HAS_ANY" if f.args.len() == 2 => {
18574 match target {
18575 DialectType::PostgreSQL | DialectType::Redshift => {
18576 // arr1 && arr2 with ARRAY[] syntax
18577 let mut args = f.args;
18578 let arr1 = args.remove(0);
18579 let arr2 = args.remove(0);
18580 let pg_arr1 = match arr1 {
18581 Expression::Array(a) => Expression::ArrayFunc(
18582 Box::new(crate::expressions::ArrayConstructor {
18583 expressions: a.expressions,
18584 bracket_notation: false,
18585 use_list_keyword: false,
18586 }),
18587 ),
18588 _ => arr1,
18589 };
18590 let pg_arr2 = match arr2 {
18591 Expression::Array(a) => Expression::ArrayFunc(
18592 Box::new(crate::expressions::ArrayConstructor {
18593 expressions: a.expressions,
18594 bracket_notation: false,
18595 use_list_keyword: false,
18596 }),
18597 ),
18598 _ => arr2,
18599 };
18600 Ok(Expression::ArrayOverlaps(Box::new(BinaryOp::new(
18601 pg_arr1, pg_arr2,
18602 ))))
18603 }
18604 DialectType::DuckDB => {
18605 // DuckDB: arr1 && arr2 (native support)
18606 let mut args = f.args;
18607 let arr1 = args.remove(0);
18608 let arr2 = args.remove(0);
18609 Ok(Expression::ArrayOverlaps(Box::new(BinaryOp::new(
18610 arr1, arr2,
18611 ))))
18612 }
18613 _ => Ok(Expression::Function(Box::new(Function::new(
18614 "LIST_HAS_ANY".to_string(),
18615 f.args,
18616 )))),
18617 }
18618 }
18619 // APPROX_QUANTILE(x, q) -> target-specific
18620 "APPROX_QUANTILE" if f.args.len() == 2 => match target {
18621 DialectType::Snowflake => Ok(Expression::Function(Box::new(
18622 Function::new("APPROX_PERCENTILE".to_string(), f.args),
18623 ))),
18624 DialectType::DuckDB => Ok(Expression::Function(f)),
18625 _ => Ok(Expression::Function(f)),
18626 },
18627 // MAKE_DATE(y, m, d) -> DATE(y, m, d) for BigQuery
18628 "MAKE_DATE" if f.args.len() == 3 => match target {
18629 DialectType::BigQuery => Ok(Expression::Function(Box::new(
18630 Function::new("DATE".to_string(), f.args),
18631 ))),
18632 _ => Ok(Expression::Function(f)),
18633 },
18634 // RANGE(start, end[, step]) -> target-specific
18635 "RANGE"
18636 if f.args.len() >= 2 && !matches!(target, DialectType::DuckDB) =>
18637 {
18638 let start = f.args[0].clone();
18639 let end = f.args[1].clone();
18640 let step = f.args.get(2).cloned();
18641 match target {
18642 // Snowflake ARRAY_GENERATE_RANGE uses exclusive end (same as DuckDB RANGE),
18643 // so just rename without adjusting the end argument.
18644 DialectType::Snowflake => {
18645 let mut args = vec![start, end];
18646 if let Some(s) = step {
18647 args.push(s);
18648 }
18649 Ok(Expression::Function(Box::new(Function::new(
18650 "ARRAY_GENERATE_RANGE".to_string(),
18651 args,
18652 ))))
18653 }
18654 DialectType::Spark | DialectType::Databricks => {
18655 // RANGE(start, end) -> SEQUENCE(start, end-1)
18656 // RANGE(start, end, step) -> SEQUENCE(start, end-step, step) when step constant
18657 // RANGE(start, start) -> ARRAY() (empty)
18658 // RANGE(start, end, 0) -> ARRAY() (empty)
18659 // When end is variable: IF((end - 1) <= start, ARRAY(), SEQUENCE(start, (end - 1)))
18660
18661 // Check for constant args
18662 fn extract_i64(e: &Expression) -> Option<i64> {
18663 match e {
18664 Expression::Literal(lit)
18665 if matches!(
18666 lit.as_ref(),
18667 Literal::Number(_)
18668 ) =>
18669 {
18670 let Literal::Number(n) = lit.as_ref() else {
18671 unreachable!()
18672 };
18673 n.parse::<i64>().ok()
18674 }
18675 Expression::Neg(u) => {
18676 if let Expression::Literal(lit) = &u.this {
18677 if let Literal::Number(n) = lit.as_ref() {
18678 n.parse::<i64>().ok().map(|v| -v)
18679 } else {
18680 None
18681 }
18682 } else {
18683 None
18684 }
18685 }
18686 _ => None,
18687 }
18688 }
18689 let start_val = extract_i64(&start);
18690 let end_val = extract_i64(&end);
18691 let step_val = step.as_ref().and_then(|s| extract_i64(s));
18692
18693 // Check for RANGE(x, x) or RANGE(x, y, 0) -> empty array
18694 if step_val == Some(0) {
18695 return Ok(Expression::Function(Box::new(
18696 Function::new("ARRAY".to_string(), vec![]),
18697 )));
18698 }
18699 if let (Some(s), Some(e_val)) = (start_val, end_val) {
18700 if s == e_val {
18701 return Ok(Expression::Function(Box::new(
18702 Function::new("ARRAY".to_string(), vec![]),
18703 )));
18704 }
18705 }
18706
18707 if let (Some(_s_val), Some(e_val)) = (start_val, end_val) {
18708 // All constants - compute new end = end - step (if step provided) or end - 1
18709 match step_val {
18710 Some(st) if st < 0 => {
18711 // Negative step: SEQUENCE(start, end - step, step)
18712 let new_end = e_val - st; // end - step (= end + |step|)
18713 let mut args =
18714 vec![start, Expression::number(new_end)];
18715 if let Some(s) = step {
18716 args.push(s);
18717 }
18718 Ok(Expression::Function(Box::new(
18719 Function::new("SEQUENCE".to_string(), args),
18720 )))
18721 }
18722 Some(st) => {
18723 let new_end = e_val - st;
18724 let mut args =
18725 vec![start, Expression::number(new_end)];
18726 if let Some(s) = step {
18727 args.push(s);
18728 }
18729 Ok(Expression::Function(Box::new(
18730 Function::new("SEQUENCE".to_string(), args),
18731 )))
18732 }
18733 None => {
18734 // No step: SEQUENCE(start, end - 1)
18735 let new_end = e_val - 1;
18736 Ok(Expression::Function(Box::new(
18737 Function::new(
18738 "SEQUENCE".to_string(),
18739 vec![
18740 start,
18741 Expression::number(new_end),
18742 ],
18743 ),
18744 )))
18745 }
18746 }
18747 } else {
18748 // Variable end: IF((end - 1) < start, ARRAY(), SEQUENCE(start, (end - 1)))
18749 let end_m1 = Expression::Sub(Box::new(BinaryOp::new(
18750 end.clone(),
18751 Expression::number(1),
18752 )));
18753 let cond = Expression::Lt(Box::new(BinaryOp::new(
18754 Expression::Paren(Box::new(Paren {
18755 this: end_m1.clone(),
18756 trailing_comments: Vec::new(),
18757 })),
18758 start.clone(),
18759 )));
18760 let empty = Expression::Function(Box::new(
18761 Function::new("ARRAY".to_string(), vec![]),
18762 ));
18763 let mut seq_args = vec![
18764 start,
18765 Expression::Paren(Box::new(Paren {
18766 this: end_m1,
18767 trailing_comments: Vec::new(),
18768 })),
18769 ];
18770 if let Some(s) = step {
18771 seq_args.push(s);
18772 }
18773 let seq = Expression::Function(Box::new(
18774 Function::new("SEQUENCE".to_string(), seq_args),
18775 ));
18776 Ok(Expression::IfFunc(Box::new(
18777 crate::expressions::IfFunc {
18778 condition: cond,
18779 true_value: empty,
18780 false_value: Some(seq),
18781 original_name: None,
18782 inferred_type: None,
18783 },
18784 )))
18785 }
18786 }
18787 DialectType::SQLite => {
18788 // RANGE(start, end) -> GENERATE_SERIES(start, end)
18789 // The subquery wrapping is handled at the Alias level
18790 let mut args = vec![start, end];
18791 if let Some(s) = step {
18792 args.push(s);
18793 }
18794 Ok(Expression::Function(Box::new(Function::new(
18795 "GENERATE_SERIES".to_string(),
18796 args,
18797 ))))
18798 }
18799 _ => Ok(Expression::Function(f)),
18800 }
18801 }
18802 // ARRAY_REVERSE_SORT -> target-specific
18803 // (handled above as well, but also need DuckDB self-normalization)
18804 // MAP_FROM_ARRAYS(keys, values) -> target-specific map construction
18805 "MAP_FROM_ARRAYS" if f.args.len() == 2 => match target {
18806 DialectType::Snowflake => Ok(Expression::Function(Box::new(
18807 Function::new("OBJECT_CONSTRUCT".to_string(), f.args),
18808 ))),
18809 DialectType::Spark | DialectType::Databricks => {
18810 Ok(Expression::Function(Box::new(Function::new(
18811 "MAP_FROM_ARRAYS".to_string(),
18812 f.args,
18813 ))))
18814 }
18815 _ => Ok(Expression::Function(Box::new(Function::new(
18816 "MAP".to_string(),
18817 f.args,
18818 )))),
18819 },
18820 // VARIANCE(x) -> varSamp(x) for ClickHouse
18821 "VARIANCE" if f.args.len() == 1 => match target {
18822 DialectType::ClickHouse => Ok(Expression::Function(Box::new(
18823 Function::new("varSamp".to_string(), f.args),
18824 ))),
18825 _ => Ok(Expression::Function(f)),
18826 },
18827 // STDDEV(x) -> stddevSamp(x) for ClickHouse
18828 "STDDEV" if f.args.len() == 1 => match target {
18829 DialectType::ClickHouse => Ok(Expression::Function(Box::new(
18830 Function::new("stddevSamp".to_string(), f.args),
18831 ))),
18832 _ => Ok(Expression::Function(f)),
18833 },
18834 // ISINF(x) -> IS_INF(x) for BigQuery
18835 "ISINF" if f.args.len() == 1 => match target {
18836 DialectType::BigQuery => Ok(Expression::Function(Box::new(
18837 Function::new("IS_INF".to_string(), f.args),
18838 ))),
18839 _ => Ok(Expression::Function(f)),
18840 },
18841 // CONTAINS(arr, x) -> ARRAY_CONTAINS(arr, x) for Spark/Hive
18842 "CONTAINS" if f.args.len() == 2 => match target {
18843 DialectType::Spark
18844 | DialectType::Databricks
18845 | DialectType::Hive => Ok(Expression::Function(Box::new(
18846 Function::new("ARRAY_CONTAINS".to_string(), f.args),
18847 ))),
18848 _ => Ok(Expression::Function(f)),
18849 },
18850 // ARRAY_CONTAINS(arr, x) -> CONTAINS(arr, x) for Presto
18851 "ARRAY_CONTAINS" if f.args.len() == 2 => match target {
18852 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
18853 Ok(Expression::Function(Box::new(Function::new(
18854 "CONTAINS".to_string(),
18855 f.args,
18856 ))))
18857 }
18858 DialectType::DuckDB => Ok(Expression::Function(Box::new(
18859 Function::new("ARRAY_CONTAINS".to_string(), f.args),
18860 ))),
18861 _ => Ok(Expression::Function(f)),
18862 },
18863 // TO_UNIXTIME(x) -> UNIX_TIMESTAMP(x) for Hive/Spark
18864 "TO_UNIXTIME" if f.args.len() == 1 => match target {
18865 DialectType::Hive
18866 | DialectType::Spark
18867 | DialectType::Databricks => Ok(Expression::Function(Box::new(
18868 Function::new("UNIX_TIMESTAMP".to_string(), f.args),
18869 ))),
18870 _ => Ok(Expression::Function(f)),
18871 },
18872 // FROM_UNIXTIME(x) -> target-specific
18873 "FROM_UNIXTIME" if f.args.len() == 1 => {
18874 match target {
18875 DialectType::Hive
18876 | DialectType::Spark
18877 | DialectType::Databricks
18878 | DialectType::Presto
18879 | DialectType::Trino => Ok(Expression::Function(f)),
18880 DialectType::DuckDB => {
18881 // DuckDB: TO_TIMESTAMP(x)
18882 let arg = f.args.into_iter().next().unwrap();
18883 Ok(Expression::Function(Box::new(Function::new(
18884 "TO_TIMESTAMP".to_string(),
18885 vec![arg],
18886 ))))
18887 }
18888 DialectType::PostgreSQL => {
18889 // PG: TO_TIMESTAMP(col)
18890 let arg = f.args.into_iter().next().unwrap();
18891 Ok(Expression::Function(Box::new(Function::new(
18892 "TO_TIMESTAMP".to_string(),
18893 vec![arg],
18894 ))))
18895 }
18896 DialectType::Redshift => {
18897 // Redshift: (TIMESTAMP 'epoch' + col * INTERVAL '1 SECOND')
18898 let arg = f.args.into_iter().next().unwrap();
18899 let epoch_ts = Expression::Literal(Box::new(
18900 Literal::Timestamp("epoch".to_string()),
18901 ));
18902 let interval = Expression::Interval(Box::new(
18903 crate::expressions::Interval {
18904 this: Some(Expression::string("1 SECOND")),
18905 unit: None,
18906 },
18907 ));
18908 let mul =
18909 Expression::Mul(Box::new(BinaryOp::new(arg, interval)));
18910 let add =
18911 Expression::Add(Box::new(BinaryOp::new(epoch_ts, mul)));
18912 Ok(Expression::Paren(Box::new(crate::expressions::Paren {
18913 this: add,
18914 trailing_comments: Vec::new(),
18915 })))
18916 }
18917 _ => Ok(Expression::Function(f)),
18918 }
18919 }
18920 // FROM_UNIXTIME(x, fmt) with 2 args from Hive/Spark -> target-specific
18921 "FROM_UNIXTIME"
18922 if f.args.len() == 2
18923 && matches!(
18924 source,
18925 DialectType::Hive
18926 | DialectType::Spark
18927 | DialectType::Databricks
18928 ) =>
18929 {
18930 let mut args = f.args;
18931 let unix_ts = args.remove(0);
18932 let fmt_expr = args.remove(0);
18933 match target {
18934 DialectType::DuckDB => {
18935 // DuckDB: STRFTIME(TO_TIMESTAMP(x), c_fmt)
18936 let to_ts = Expression::Function(Box::new(Function::new(
18937 "TO_TIMESTAMP".to_string(),
18938 vec![unix_ts],
18939 )));
18940 if let Expression::Literal(lit) = &fmt_expr {
18941 if let crate::expressions::Literal::String(s) =
18942 lit.as_ref()
18943 {
18944 let c_fmt = Self::hive_format_to_c_format(s);
18945 Ok(Expression::Function(Box::new(Function::new(
18946 "STRFTIME".to_string(),
18947 vec![to_ts, Expression::string(&c_fmt)],
18948 ))))
18949 } else {
18950 Ok(Expression::Function(Box::new(Function::new(
18951 "STRFTIME".to_string(),
18952 vec![to_ts, fmt_expr],
18953 ))))
18954 }
18955 } else {
18956 Ok(Expression::Function(Box::new(Function::new(
18957 "STRFTIME".to_string(),
18958 vec![to_ts, fmt_expr],
18959 ))))
18960 }
18961 }
18962 DialectType::Presto
18963 | DialectType::Trino
18964 | DialectType::Athena => {
18965 // Presto: DATE_FORMAT(FROM_UNIXTIME(x), presto_fmt)
18966 let from_unix =
18967 Expression::Function(Box::new(Function::new(
18968 "FROM_UNIXTIME".to_string(),
18969 vec![unix_ts],
18970 )));
18971 if let Expression::Literal(lit) = &fmt_expr {
18972 if let crate::expressions::Literal::String(s) =
18973 lit.as_ref()
18974 {
18975 let p_fmt = Self::hive_format_to_presto_format(s);
18976 Ok(Expression::Function(Box::new(Function::new(
18977 "DATE_FORMAT".to_string(),
18978 vec![from_unix, Expression::string(&p_fmt)],
18979 ))))
18980 } else {
18981 Ok(Expression::Function(Box::new(Function::new(
18982 "DATE_FORMAT".to_string(),
18983 vec![from_unix, fmt_expr],
18984 ))))
18985 }
18986 } else {
18987 Ok(Expression::Function(Box::new(Function::new(
18988 "DATE_FORMAT".to_string(),
18989 vec![from_unix, fmt_expr],
18990 ))))
18991 }
18992 }
18993 _ => {
18994 // Keep as FROM_UNIXTIME(x, fmt) for other targets
18995 Ok(Expression::Function(Box::new(Function::new(
18996 "FROM_UNIXTIME".to_string(),
18997 vec![unix_ts, fmt_expr],
18998 ))))
18999 }
19000 }
19001 }
19002 // DATEPART(unit, expr) -> EXTRACT(unit FROM expr) for Spark
19003 "DATEPART" | "DATE_PART" if f.args.len() == 2 => {
19004 let unit_str = Self::get_unit_str_static(&f.args[0]);
19005 // Get the raw unit text preserving original case
19006 let raw_unit = match &f.args[0] {
19007 Expression::Identifier(id) => id.name.clone(),
19008 Expression::Var(v) => v.this.clone(),
19009 Expression::Literal(lit)
19010 if matches!(
19011 lit.as_ref(),
19012 crate::expressions::Literal::String(_)
19013 ) =>
19014 {
19015 let crate::expressions::Literal::String(s) = lit.as_ref()
19016 else {
19017 unreachable!()
19018 };
19019 s.clone()
19020 }
19021 Expression::Column(col) => col.name.name.clone(),
19022 _ => unit_str.clone(),
19023 };
19024 match target {
19025 DialectType::TSQL | DialectType::Fabric => {
19026 // Preserve original case of unit for TSQL
19027 let unit_name = match unit_str.as_str() {
19028 "YY" | "YYYY" => "YEAR".to_string(),
19029 "QQ" | "Q" => "QUARTER".to_string(),
19030 "MM" | "M" => "MONTH".to_string(),
19031 "WK" | "WW" => "WEEK".to_string(),
19032 "DD" | "D" | "DY" => "DAY".to_string(),
19033 "HH" => "HOUR".to_string(),
19034 "MI" | "N" => "MINUTE".to_string(),
19035 "SS" | "S" => "SECOND".to_string(),
19036 _ => raw_unit.clone(), // preserve original case
19037 };
19038 let mut args = f.args;
19039 args[0] =
19040 Expression::Identifier(Identifier::new(&unit_name));
19041 Ok(Expression::Function(Box::new(Function::new(
19042 "DATEPART".to_string(),
19043 args,
19044 ))))
19045 }
19046 DialectType::Spark | DialectType::Databricks => {
19047 // DATEPART(unit, expr) -> EXTRACT(unit FROM expr)
19048 // Preserve original case for non-abbreviation units
19049 let unit = match unit_str.as_str() {
19050 "YY" | "YYYY" => "YEAR".to_string(),
19051 "QQ" | "Q" => "QUARTER".to_string(),
19052 "MM" | "M" => "MONTH".to_string(),
19053 "WK" | "WW" => "WEEK".to_string(),
19054 "DD" | "D" | "DY" => "DAY".to_string(),
19055 "HH" => "HOUR".to_string(),
19056 "MI" | "N" => "MINUTE".to_string(),
19057 "SS" | "S" => "SECOND".to_string(),
19058 _ => raw_unit, // preserve original case
19059 };
19060 Ok(Expression::Extract(Box::new(
19061 crate::expressions::ExtractFunc {
19062 this: f.args[1].clone(),
19063 field: crate::expressions::DateTimeField::Custom(
19064 unit,
19065 ),
19066 },
19067 )))
19068 }
19069 _ => Ok(Expression::Function(Box::new(Function::new(
19070 "DATE_PART".to_string(),
19071 f.args,
19072 )))),
19073 }
19074 }
19075 // DATENAME(mm, date) -> FORMAT(CAST(date AS DATETIME2), 'MMMM') for TSQL
19076 // DATENAME(dw, date) -> FORMAT(CAST(date AS DATETIME2), 'dddd') for TSQL
19077 // DATENAME(mm, date) -> DATE_FORMAT(CAST(date AS TIMESTAMP), 'MMMM') for Spark
19078 // DATENAME(dw, date) -> DATE_FORMAT(CAST(date AS TIMESTAMP), 'EEEE') for Spark
19079 "DATENAME" if f.args.len() == 2 => {
19080 let unit_str = Self::get_unit_str_static(&f.args[0]);
19081 let date_expr = f.args[1].clone();
19082 match unit_str.as_str() {
19083 "MM" | "M" | "MONTH" => match target {
19084 DialectType::TSQL => {
19085 let cast_date = Expression::Cast(Box::new(
19086 crate::expressions::Cast {
19087 this: date_expr,
19088 to: DataType::Custom {
19089 name: "DATETIME2".to_string(),
19090 },
19091 trailing_comments: Vec::new(),
19092 double_colon_syntax: false,
19093 format: None,
19094 default: None,
19095 inferred_type: None,
19096 },
19097 ));
19098 Ok(Expression::Function(Box::new(Function::new(
19099 "FORMAT".to_string(),
19100 vec![cast_date, Expression::string("MMMM")],
19101 ))))
19102 }
19103 DialectType::Spark | DialectType::Databricks => {
19104 let cast_date = Expression::Cast(Box::new(
19105 crate::expressions::Cast {
19106 this: date_expr,
19107 to: DataType::Timestamp {
19108 timezone: false,
19109 precision: None,
19110 },
19111 trailing_comments: Vec::new(),
19112 double_colon_syntax: false,
19113 format: None,
19114 default: None,
19115 inferred_type: None,
19116 },
19117 ));
19118 Ok(Expression::Function(Box::new(Function::new(
19119 "DATE_FORMAT".to_string(),
19120 vec![cast_date, Expression::string("MMMM")],
19121 ))))
19122 }
19123 _ => Ok(Expression::Function(f)),
19124 },
19125 "DW" | "WEEKDAY" => match target {
19126 DialectType::TSQL => {
19127 let cast_date = Expression::Cast(Box::new(
19128 crate::expressions::Cast {
19129 this: date_expr,
19130 to: DataType::Custom {
19131 name: "DATETIME2".to_string(),
19132 },
19133 trailing_comments: Vec::new(),
19134 double_colon_syntax: false,
19135 format: None,
19136 default: None,
19137 inferred_type: None,
19138 },
19139 ));
19140 Ok(Expression::Function(Box::new(Function::new(
19141 "FORMAT".to_string(),
19142 vec![cast_date, Expression::string("dddd")],
19143 ))))
19144 }
19145 DialectType::Spark | DialectType::Databricks => {
19146 let cast_date = Expression::Cast(Box::new(
19147 crate::expressions::Cast {
19148 this: date_expr,
19149 to: DataType::Timestamp {
19150 timezone: false,
19151 precision: None,
19152 },
19153 trailing_comments: Vec::new(),
19154 double_colon_syntax: false,
19155 format: None,
19156 default: None,
19157 inferred_type: None,
19158 },
19159 ));
19160 Ok(Expression::Function(Box::new(Function::new(
19161 "DATE_FORMAT".to_string(),
19162 vec![cast_date, Expression::string("EEEE")],
19163 ))))
19164 }
19165 _ => Ok(Expression::Function(f)),
19166 },
19167 _ => Ok(Expression::Function(f)),
19168 }
19169 }
19170 // STRING_AGG(x, sep) without WITHIN GROUP -> target-specific
19171 "STRING_AGG" if f.args.len() >= 2 => {
19172 let x = f.args[0].clone();
19173 let sep = f.args[1].clone();
19174 match target {
19175 DialectType::MySQL
19176 | DialectType::SingleStore
19177 | DialectType::Doris
19178 | DialectType::StarRocks => Ok(Expression::GroupConcat(
19179 Box::new(crate::expressions::GroupConcatFunc {
19180 this: x,
19181 separator: Some(sep),
19182 order_by: None,
19183 distinct: false,
19184 filter: None,
19185 limit: None,
19186 inferred_type: None,
19187 }),
19188 )),
19189 DialectType::SQLite => Ok(Expression::GroupConcat(Box::new(
19190 crate::expressions::GroupConcatFunc {
19191 this: x,
19192 separator: Some(sep),
19193 order_by: None,
19194 distinct: false,
19195 filter: None,
19196 limit: None,
19197 inferred_type: None,
19198 },
19199 ))),
19200 DialectType::PostgreSQL | DialectType::Redshift => {
19201 Ok(Expression::StringAgg(Box::new(
19202 crate::expressions::StringAggFunc {
19203 this: x,
19204 separator: Some(sep),
19205 order_by: None,
19206 distinct: false,
19207 filter: None,
19208 limit: None,
19209 inferred_type: None,
19210 },
19211 )))
19212 }
19213 _ => Ok(Expression::Function(f)),
19214 }
19215 }
19216 "TRY_DIVIDE" if f.args.len() == 2 => {
19217 let mut args = f.args;
19218 let x = args.remove(0);
19219 let y = args.remove(0);
19220 match target {
19221 DialectType::Spark | DialectType::Databricks => {
19222 Ok(Expression::Function(Box::new(Function::new(
19223 "TRY_DIVIDE".to_string(),
19224 vec![x, y],
19225 ))))
19226 }
19227 DialectType::Snowflake => {
19228 let y_ref = match &y {
19229 Expression::Column(_)
19230 | Expression::Literal(_)
19231 | Expression::Identifier(_) => y.clone(),
19232 _ => Expression::Paren(Box::new(Paren {
19233 this: y.clone(),
19234 trailing_comments: vec![],
19235 })),
19236 };
19237 let x_ref = match &x {
19238 Expression::Column(_)
19239 | Expression::Literal(_)
19240 | Expression::Identifier(_) => x.clone(),
19241 _ => Expression::Paren(Box::new(Paren {
19242 this: x.clone(),
19243 trailing_comments: vec![],
19244 })),
19245 };
19246 let condition = Expression::Neq(Box::new(
19247 crate::expressions::BinaryOp::new(
19248 y_ref.clone(),
19249 Expression::number(0),
19250 ),
19251 ));
19252 let div_expr = Expression::Div(Box::new(
19253 crate::expressions::BinaryOp::new(x_ref, y_ref),
19254 ));
19255 Ok(Expression::IfFunc(Box::new(
19256 crate::expressions::IfFunc {
19257 condition,
19258 true_value: div_expr,
19259 false_value: Some(Expression::Null(Null)),
19260 original_name: Some("IFF".to_string()),
19261 inferred_type: None,
19262 },
19263 )))
19264 }
19265 DialectType::DuckDB => {
19266 let y_ref = match &y {
19267 Expression::Column(_)
19268 | Expression::Literal(_)
19269 | Expression::Identifier(_) => y.clone(),
19270 _ => Expression::Paren(Box::new(Paren {
19271 this: y.clone(),
19272 trailing_comments: vec![],
19273 })),
19274 };
19275 let x_ref = match &x {
19276 Expression::Column(_)
19277 | Expression::Literal(_)
19278 | Expression::Identifier(_) => x.clone(),
19279 _ => Expression::Paren(Box::new(Paren {
19280 this: x.clone(),
19281 trailing_comments: vec![],
19282 })),
19283 };
19284 let condition = Expression::Neq(Box::new(
19285 crate::expressions::BinaryOp::new(
19286 y_ref.clone(),
19287 Expression::number(0),
19288 ),
19289 ));
19290 let div_expr = Expression::Div(Box::new(
19291 crate::expressions::BinaryOp::new(x_ref, y_ref),
19292 ));
19293 Ok(Expression::Case(Box::new(Case {
19294 operand: None,
19295 whens: vec![(condition, div_expr)],
19296 else_: Some(Expression::Null(Null)),
19297 comments: Vec::new(),
19298 inferred_type: None,
19299 })))
19300 }
19301 _ => Ok(Expression::Function(Box::new(Function::new(
19302 "TRY_DIVIDE".to_string(),
19303 vec![x, y],
19304 )))),
19305 }
19306 }
19307 // JSON_ARRAYAGG -> JSON_AGG for PostgreSQL
19308 "JSON_ARRAYAGG" => match target {
19309 DialectType::PostgreSQL => {
19310 Ok(Expression::Function(Box::new(Function {
19311 name: "JSON_AGG".to_string(),
19312 ..(*f)
19313 })))
19314 }
19315 _ => Ok(Expression::Function(f)),
19316 },
19317 // SCHEMA_NAME(id) -> CURRENT_SCHEMA for PostgreSQL, 'main' for SQLite
19318 "SCHEMA_NAME" => match target {
19319 DialectType::PostgreSQL => Ok(Expression::CurrentSchema(Box::new(
19320 crate::expressions::CurrentSchema { this: None },
19321 ))),
19322 DialectType::SQLite => Ok(Expression::string("main")),
19323 _ => Ok(Expression::Function(f)),
19324 },
19325 // TO_TIMESTAMP(x, fmt) 2-arg from Spark/Hive: convert Java format to target format
19326 "TO_TIMESTAMP"
19327 if f.args.len() == 2
19328 && matches!(
19329 source,
19330 DialectType::Spark
19331 | DialectType::Databricks
19332 | DialectType::Hive
19333 )
19334 && matches!(target, DialectType::DuckDB) =>
19335 {
19336 let mut args = f.args;
19337 let val = args.remove(0);
19338 let fmt_expr = args.remove(0);
19339 if let Expression::Literal(ref lit) = fmt_expr {
19340 if let Literal::String(ref s) = lit.as_ref() {
19341 // Convert Java/Spark format to C strptime format
19342 fn java_to_c_fmt(fmt: &str) -> String {
19343 let result = fmt
19344 .replace("yyyy", "%Y")
19345 .replace("SSSSSS", "%f")
19346 .replace("EEEE", "%W")
19347 .replace("MM", "%m")
19348 .replace("dd", "%d")
19349 .replace("HH", "%H")
19350 .replace("mm", "%M")
19351 .replace("ss", "%S")
19352 .replace("yy", "%y");
19353 let mut out = String::new();
19354 let chars: Vec<char> = result.chars().collect();
19355 let mut i = 0;
19356 while i < chars.len() {
19357 if chars[i] == '%' && i + 1 < chars.len() {
19358 out.push(chars[i]);
19359 out.push(chars[i + 1]);
19360 i += 2;
19361 } else if chars[i] == 'z' {
19362 out.push_str("%Z");
19363 i += 1;
19364 } else if chars[i] == 'Z' {
19365 out.push_str("%z");
19366 i += 1;
19367 } else {
19368 out.push(chars[i]);
19369 i += 1;
19370 }
19371 }
19372 out
19373 }
19374 let c_fmt = java_to_c_fmt(s);
19375 Ok(Expression::Function(Box::new(Function::new(
19376 "STRPTIME".to_string(),
19377 vec![val, Expression::string(&c_fmt)],
19378 ))))
19379 } else {
19380 Ok(Expression::Function(Box::new(Function::new(
19381 "STRPTIME".to_string(),
19382 vec![val, fmt_expr],
19383 ))))
19384 }
19385 } else {
19386 Ok(Expression::Function(Box::new(Function::new(
19387 "STRPTIME".to_string(),
19388 vec![val, fmt_expr],
19389 ))))
19390 }
19391 }
19392 // TO_DATE(x) 1-arg from Doris: date conversion
19393 "TO_DATE"
19394 if f.args.len() == 1
19395 && matches!(
19396 source,
19397 DialectType::Doris | DialectType::StarRocks
19398 ) =>
19399 {
19400 let arg = f.args.into_iter().next().unwrap();
19401 match target {
19402 DialectType::Oracle
19403 | DialectType::DuckDB
19404 | DialectType::TSQL => {
19405 // CAST(x AS DATE)
19406 Ok(Expression::Cast(Box::new(Cast {
19407 this: arg,
19408 to: DataType::Date,
19409 double_colon_syntax: false,
19410 trailing_comments: vec![],
19411 format: None,
19412 default: None,
19413 inferred_type: None,
19414 })))
19415 }
19416 DialectType::MySQL | DialectType::SingleStore => {
19417 // DATE(x)
19418 Ok(Expression::Function(Box::new(Function::new(
19419 "DATE".to_string(),
19420 vec![arg],
19421 ))))
19422 }
19423 _ => {
19424 // Default: keep as TO_DATE(x) (Spark, PostgreSQL, etc.)
19425 Ok(Expression::Function(Box::new(Function::new(
19426 "TO_DATE".to_string(),
19427 vec![arg],
19428 ))))
19429 }
19430 }
19431 }
19432 // TO_DATE(x) 1-arg from Spark/Hive: safe date conversion
19433 "TO_DATE"
19434 if f.args.len() == 1
19435 && matches!(
19436 source,
19437 DialectType::Spark
19438 | DialectType::Databricks
19439 | DialectType::Hive
19440 ) =>
19441 {
19442 let arg = f.args.into_iter().next().unwrap();
19443 match target {
19444 DialectType::DuckDB => {
19445 // Spark TO_DATE is safe -> TRY_CAST(x AS DATE)
19446 Ok(Expression::TryCast(Box::new(Cast {
19447 this: arg,
19448 to: DataType::Date,
19449 double_colon_syntax: false,
19450 trailing_comments: vec![],
19451 format: None,
19452 default: None,
19453 inferred_type: None,
19454 })))
19455 }
19456 DialectType::Presto
19457 | DialectType::Trino
19458 | DialectType::Athena => {
19459 // CAST(CAST(x AS TIMESTAMP) AS DATE)
19460 Ok(Self::double_cast_timestamp_date(arg))
19461 }
19462 DialectType::Snowflake => {
19463 // Spark's TO_DATE is safe -> TRY_TO_DATE(x, 'yyyy-mm-DD')
19464 // The default Spark format 'yyyy-MM-dd' maps to Snowflake 'yyyy-mm-DD'
19465 Ok(Expression::Function(Box::new(Function::new(
19466 "TRY_TO_DATE".to_string(),
19467 vec![arg, Expression::string("yyyy-mm-DD")],
19468 ))))
19469 }
19470 _ => {
19471 // Default: keep as TO_DATE(x)
19472 Ok(Expression::Function(Box::new(Function::new(
19473 "TO_DATE".to_string(),
19474 vec![arg],
19475 ))))
19476 }
19477 }
19478 }
19479 // TO_DATE(x, fmt) 2-arg from Spark/Hive: format-based date conversion
19480 "TO_DATE"
19481 if f.args.len() == 2
19482 && matches!(
19483 source,
19484 DialectType::Spark
19485 | DialectType::Databricks
19486 | DialectType::Hive
19487 ) =>
19488 {
19489 let mut args = f.args;
19490 let val = args.remove(0);
19491 let fmt_expr = args.remove(0);
19492 let is_default_format = matches!(&fmt_expr, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(s) if s == "yyyy-MM-dd"));
19493
19494 if is_default_format {
19495 // Default format: same as 1-arg form
19496 match target {
19497 DialectType::DuckDB => {
19498 Ok(Expression::TryCast(Box::new(Cast {
19499 this: val,
19500 to: DataType::Date,
19501 double_colon_syntax: false,
19502 trailing_comments: vec![],
19503 format: None,
19504 default: None,
19505 inferred_type: None,
19506 })))
19507 }
19508 DialectType::Presto
19509 | DialectType::Trino
19510 | DialectType::Athena => {
19511 Ok(Self::double_cast_timestamp_date(val))
19512 }
19513 DialectType::Snowflake => {
19514 // TRY_TO_DATE(x, format) with Snowflake format mapping
19515 let sf_fmt = "yyyy-MM-dd"
19516 .replace("yyyy", "yyyy")
19517 .replace("MM", "mm")
19518 .replace("dd", "DD");
19519 Ok(Expression::Function(Box::new(Function::new(
19520 "TRY_TO_DATE".to_string(),
19521 vec![val, Expression::string(&sf_fmt)],
19522 ))))
19523 }
19524 _ => Ok(Expression::Function(Box::new(Function::new(
19525 "TO_DATE".to_string(),
19526 vec![val],
19527 )))),
19528 }
19529 } else {
19530 // Non-default format: use format-based parsing
19531 if let Expression::Literal(ref lit) = fmt_expr {
19532 if let Literal::String(ref s) = lit.as_ref() {
19533 match target {
19534 DialectType::DuckDB => {
19535 // CAST(CAST(TRY_STRPTIME(x, c_fmt) AS TIMESTAMP) AS DATE)
19536 fn java_to_c_fmt_todate(fmt: &str) -> String {
19537 let result = fmt
19538 .replace("yyyy", "%Y")
19539 .replace("SSSSSS", "%f")
19540 .replace("EEEE", "%W")
19541 .replace("MM", "%m")
19542 .replace("dd", "%d")
19543 .replace("HH", "%H")
19544 .replace("mm", "%M")
19545 .replace("ss", "%S")
19546 .replace("yy", "%y");
19547 let mut out = String::new();
19548 let chars: Vec<char> =
19549 result.chars().collect();
19550 let mut i = 0;
19551 while i < chars.len() {
19552 if chars[i] == '%'
19553 && i + 1 < chars.len()
19554 {
19555 out.push(chars[i]);
19556 out.push(chars[i + 1]);
19557 i += 2;
19558 } else if chars[i] == 'z' {
19559 out.push_str("%Z");
19560 i += 1;
19561 } else if chars[i] == 'Z' {
19562 out.push_str("%z");
19563 i += 1;
19564 } else {
19565 out.push(chars[i]);
19566 i += 1;
19567 }
19568 }
19569 out
19570 }
19571 let c_fmt = java_to_c_fmt_todate(s);
19572 // CAST(CAST(TRY_STRPTIME(x, fmt) AS TIMESTAMP) AS DATE)
19573 let try_strptime = Expression::Function(
19574 Box::new(Function::new(
19575 "TRY_STRPTIME".to_string(),
19576 vec![val, Expression::string(&c_fmt)],
19577 )),
19578 );
19579 let cast_ts =
19580 Expression::Cast(Box::new(Cast {
19581 this: try_strptime,
19582 to: DataType::Timestamp {
19583 precision: None,
19584 timezone: false,
19585 },
19586 double_colon_syntax: false,
19587 trailing_comments: vec![],
19588 format: None,
19589 default: None,
19590 inferred_type: None,
19591 }));
19592 Ok(Expression::Cast(Box::new(Cast {
19593 this: cast_ts,
19594 to: DataType::Date,
19595 double_colon_syntax: false,
19596 trailing_comments: vec![],
19597 format: None,
19598 default: None,
19599 inferred_type: None,
19600 })))
19601 }
19602 DialectType::Presto
19603 | DialectType::Trino
19604 | DialectType::Athena => {
19605 // CAST(DATE_PARSE(x, presto_fmt) AS DATE)
19606 let p_fmt = s
19607 .replace("yyyy", "%Y")
19608 .replace("SSSSSS", "%f")
19609 .replace("MM", "%m")
19610 .replace("dd", "%d")
19611 .replace("HH", "%H")
19612 .replace("mm", "%M")
19613 .replace("ss", "%S")
19614 .replace("yy", "%y");
19615 let date_parse = Expression::Function(
19616 Box::new(Function::new(
19617 "DATE_PARSE".to_string(),
19618 vec![val, Expression::string(&p_fmt)],
19619 )),
19620 );
19621 Ok(Expression::Cast(Box::new(Cast {
19622 this: date_parse,
19623 to: DataType::Date,
19624 double_colon_syntax: false,
19625 trailing_comments: vec![],
19626 format: None,
19627 default: None,
19628 inferred_type: None,
19629 })))
19630 }
19631 DialectType::Snowflake => {
19632 // TRY_TO_DATE(x, snowflake_fmt)
19633 Ok(Expression::Function(Box::new(
19634 Function::new(
19635 "TRY_TO_DATE".to_string(),
19636 vec![val, Expression::string(s)],
19637 ),
19638 )))
19639 }
19640 _ => Ok(Expression::Function(Box::new(
19641 Function::new(
19642 "TO_DATE".to_string(),
19643 vec![val, fmt_expr],
19644 ),
19645 ))),
19646 }
19647 } else {
19648 Ok(Expression::Function(Box::new(Function::new(
19649 "TO_DATE".to_string(),
19650 vec![val, fmt_expr],
19651 ))))
19652 }
19653 } else {
19654 Ok(Expression::Function(Box::new(Function::new(
19655 "TO_DATE".to_string(),
19656 vec![val, fmt_expr],
19657 ))))
19658 }
19659 }
19660 }
19661 // TO_TIMESTAMP(x) 1-arg: epoch conversion
19662 "TO_TIMESTAMP"
19663 if f.args.len() == 1
19664 && matches!(source, DialectType::DuckDB)
19665 && matches!(
19666 target,
19667 DialectType::BigQuery
19668 | DialectType::Presto
19669 | DialectType::Trino
19670 | DialectType::Hive
19671 | DialectType::Spark
19672 | DialectType::Databricks
19673 | DialectType::Athena
19674 ) =>
19675 {
19676 let arg = f.args.into_iter().next().unwrap();
19677 let func_name = match target {
19678 DialectType::BigQuery => "TIMESTAMP_SECONDS",
19679 DialectType::Presto
19680 | DialectType::Trino
19681 | DialectType::Athena
19682 | DialectType::Hive
19683 | DialectType::Spark
19684 | DialectType::Databricks => "FROM_UNIXTIME",
19685 _ => "TO_TIMESTAMP",
19686 };
19687 Ok(Expression::Function(Box::new(Function::new(
19688 func_name.to_string(),
19689 vec![arg],
19690 ))))
19691 }
19692 // CONCAT(x) single-arg: -> CONCAT(COALESCE(x, '')) for Spark
19693 "CONCAT" if f.args.len() == 1 => {
19694 let arg = f.args.into_iter().next().unwrap();
19695 match target {
19696 DialectType::Presto
19697 | DialectType::Trino
19698 | DialectType::Athena => {
19699 // CONCAT(a) -> CAST(a AS VARCHAR)
19700 Ok(Expression::Cast(Box::new(Cast {
19701 this: arg,
19702 to: DataType::VarChar {
19703 length: None,
19704 parenthesized_length: false,
19705 },
19706 trailing_comments: vec![],
19707 double_colon_syntax: false,
19708 format: None,
19709 default: None,
19710 inferred_type: None,
19711 })))
19712 }
19713 DialectType::TSQL => {
19714 // CONCAT(a) -> a
19715 Ok(arg)
19716 }
19717 DialectType::DuckDB => {
19718 // Keep CONCAT(a) for DuckDB (native support)
19719 Ok(Expression::Function(Box::new(Function::new(
19720 "CONCAT".to_string(),
19721 vec![arg],
19722 ))))
19723 }
19724 DialectType::Spark | DialectType::Databricks => {
19725 let coalesced = Expression::Coalesce(Box::new(
19726 crate::expressions::VarArgFunc {
19727 expressions: vec![arg, Expression::string("")],
19728 original_name: None,
19729 inferred_type: None,
19730 },
19731 ));
19732 Ok(Expression::Function(Box::new(Function::new(
19733 "CONCAT".to_string(),
19734 vec![coalesced],
19735 ))))
19736 }
19737 _ => Ok(Expression::Function(Box::new(Function::new(
19738 "CONCAT".to_string(),
19739 vec![arg],
19740 )))),
19741 }
19742 }
19743 // REGEXP_EXTRACT(a, p) 2-arg: BigQuery default group is 0 (no 3rd arg needed)
19744 "REGEXP_EXTRACT"
19745 if f.args.len() == 3 && matches!(target, DialectType::BigQuery) =>
19746 {
19747 // If group_index is 0, drop it
19748 let drop_group = match &f.args[2] {
19749 Expression::Literal(lit)
19750 if matches!(lit.as_ref(), Literal::Number(_)) =>
19751 {
19752 let Literal::Number(n) = lit.as_ref() else {
19753 unreachable!()
19754 };
19755 n == "0"
19756 }
19757 _ => false,
19758 };
19759 if drop_group {
19760 let mut args = f.args;
19761 args.truncate(2);
19762 Ok(Expression::Function(Box::new(Function::new(
19763 "REGEXP_EXTRACT".to_string(),
19764 args,
19765 ))))
19766 } else {
19767 Ok(Expression::Function(f))
19768 }
19769 }
19770 // REGEXP_EXTRACT(a, pattern, group, flags) 4-arg -> REGEXP_SUBSTR for Snowflake
19771 "REGEXP_EXTRACT"
19772 if f.args.len() == 4
19773 && matches!(target, DialectType::Snowflake) =>
19774 {
19775 // REGEXP_EXTRACT(a, 'pattern', 2, 'i') -> REGEXP_SUBSTR(a, 'pattern', 1, 1, 'i', 2)
19776 let mut args = f.args;
19777 let this = args.remove(0);
19778 let pattern = args.remove(0);
19779 let group = args.remove(0);
19780 let flags = args.remove(0);
19781 Ok(Expression::Function(Box::new(Function::new(
19782 "REGEXP_SUBSTR".to_string(),
19783 vec![
19784 this,
19785 pattern,
19786 Expression::number(1),
19787 Expression::number(1),
19788 flags,
19789 group,
19790 ],
19791 ))))
19792 }
19793 // REGEXP_SUBSTR(a, pattern, position) 3-arg -> REGEXP_EXTRACT(SUBSTRING(a, pos), pattern)
19794 "REGEXP_SUBSTR"
19795 if f.args.len() == 3
19796 && matches!(
19797 target,
19798 DialectType::DuckDB
19799 | DialectType::Presto
19800 | DialectType::Trino
19801 | DialectType::Spark
19802 | DialectType::Databricks
19803 ) =>
19804 {
19805 let mut args = f.args;
19806 let this = args.remove(0);
19807 let pattern = args.remove(0);
19808 let position = args.remove(0);
19809 // Wrap subject in SUBSTRING(this, position) to apply the offset
19810 let substring_expr = Expression::Function(Box::new(Function::new(
19811 "SUBSTRING".to_string(),
19812 vec![this, position],
19813 )));
19814 let target_name = match target {
19815 DialectType::DuckDB => "REGEXP_EXTRACT",
19816 _ => "REGEXP_EXTRACT",
19817 };
19818 Ok(Expression::Function(Box::new(Function::new(
19819 target_name.to_string(),
19820 vec![substring_expr, pattern],
19821 ))))
19822 }
19823 // TO_DAYS(x) -> (DATEDIFF(x, '0000-01-01') + 1) or target-specific
19824 "TO_DAYS" if f.args.len() == 1 => {
19825 let x = f.args.into_iter().next().unwrap();
19826 let epoch = Expression::string("0000-01-01");
19827 // Build the final target-specific expression directly
19828 let datediff_expr = match target {
19829 DialectType::MySQL | DialectType::SingleStore => {
19830 // MySQL: (DATEDIFF(x, '0000-01-01') + 1)
19831 Expression::Function(Box::new(Function::new(
19832 "DATEDIFF".to_string(),
19833 vec![x, epoch],
19834 )))
19835 }
19836 DialectType::DuckDB => {
19837 // DuckDB: (DATE_DIFF('DAY', CAST('0000-01-01' AS DATE), CAST(x AS DATE)) + 1)
19838 let cast_epoch = Expression::Cast(Box::new(Cast {
19839 this: epoch,
19840 to: DataType::Date,
19841 trailing_comments: Vec::new(),
19842 double_colon_syntax: false,
19843 format: None,
19844 default: None,
19845 inferred_type: None,
19846 }));
19847 let cast_x = Expression::Cast(Box::new(Cast {
19848 this: x,
19849 to: DataType::Date,
19850 trailing_comments: Vec::new(),
19851 double_colon_syntax: false,
19852 format: None,
19853 default: None,
19854 inferred_type: None,
19855 }));
19856 Expression::Function(Box::new(Function::new(
19857 "DATE_DIFF".to_string(),
19858 vec![Expression::string("DAY"), cast_epoch, cast_x],
19859 )))
19860 }
19861 DialectType::Presto
19862 | DialectType::Trino
19863 | DialectType::Athena => {
19864 // Presto: (DATE_DIFF('DAY', CAST(CAST('0000-01-01' AS TIMESTAMP) AS DATE), CAST(CAST(x AS TIMESTAMP) AS DATE)) + 1)
19865 let cast_epoch = Self::double_cast_timestamp_date(epoch);
19866 let cast_x = Self::double_cast_timestamp_date(x);
19867 Expression::Function(Box::new(Function::new(
19868 "DATE_DIFF".to_string(),
19869 vec![Expression::string("DAY"), cast_epoch, cast_x],
19870 )))
19871 }
19872 _ => {
19873 // Default: (DATEDIFF(x, '0000-01-01') + 1)
19874 Expression::Function(Box::new(Function::new(
19875 "DATEDIFF".to_string(),
19876 vec![x, epoch],
19877 )))
19878 }
19879 };
19880 let add_one = Expression::Add(Box::new(BinaryOp::new(
19881 datediff_expr,
19882 Expression::number(1),
19883 )));
19884 Ok(Expression::Paren(Box::new(crate::expressions::Paren {
19885 this: add_one,
19886 trailing_comments: Vec::new(),
19887 })))
19888 }
19889 // STR_TO_DATE(x, format) -> DATE_PARSE / STRPTIME / TO_DATE etc.
19890 "STR_TO_DATE"
19891 if f.args.len() == 2
19892 && matches!(
19893 target,
19894 DialectType::Presto | DialectType::Trino
19895 ) =>
19896 {
19897 let mut args = f.args;
19898 let x = args.remove(0);
19899 let format_expr = args.remove(0);
19900 // Check if the format contains time components
19901 let has_time = if let Expression::Literal(ref lit) = format_expr {
19902 if let Literal::String(ref fmt) = lit.as_ref() {
19903 fmt.contains("%H")
19904 || fmt.contains("%T")
19905 || fmt.contains("%M")
19906 || fmt.contains("%S")
19907 || fmt.contains("%I")
19908 || fmt.contains("%p")
19909 } else {
19910 false
19911 }
19912 } else {
19913 false
19914 };
19915 let date_parse = Expression::Function(Box::new(Function::new(
19916 "DATE_PARSE".to_string(),
19917 vec![x, format_expr],
19918 )));
19919 if has_time {
19920 // Has time components: just DATE_PARSE
19921 Ok(date_parse)
19922 } else {
19923 // Date-only: CAST(DATE_PARSE(...) AS DATE)
19924 Ok(Expression::Cast(Box::new(Cast {
19925 this: date_parse,
19926 to: DataType::Date,
19927 trailing_comments: Vec::new(),
19928 double_colon_syntax: false,
19929 format: None,
19930 default: None,
19931 inferred_type: None,
19932 })))
19933 }
19934 }
19935 "STR_TO_DATE"
19936 if f.args.len() == 2
19937 && matches!(
19938 target,
19939 DialectType::PostgreSQL | DialectType::Redshift
19940 ) =>
19941 {
19942 let mut args = f.args;
19943 let x = args.remove(0);
19944 let fmt = args.remove(0);
19945 let pg_fmt = match fmt {
19946 Expression::Literal(lit)
19947 if matches!(lit.as_ref(), Literal::String(_)) =>
19948 {
19949 let Literal::String(s) = lit.as_ref() else {
19950 unreachable!()
19951 };
19952 Expression::string(
19953 &s.replace("%Y", "YYYY")
19954 .replace("%m", "MM")
19955 .replace("%d", "DD")
19956 .replace("%H", "HH24")
19957 .replace("%M", "MI")
19958 .replace("%S", "SS"),
19959 )
19960 }
19961 other => other,
19962 };
19963 let to_date = Expression::Function(Box::new(Function::new(
19964 "TO_DATE".to_string(),
19965 vec![x, pg_fmt],
19966 )));
19967 Ok(Expression::Cast(Box::new(Cast {
19968 this: to_date,
19969 to: DataType::Timestamp {
19970 timezone: false,
19971 precision: None,
19972 },
19973 trailing_comments: Vec::new(),
19974 double_colon_syntax: false,
19975 format: None,
19976 default: None,
19977 inferred_type: None,
19978 })))
19979 }
19980 // RANGE(start, end) -> GENERATE_SERIES for SQLite
19981 "RANGE"
19982 if (f.args.len() == 1 || f.args.len() == 2)
19983 && matches!(target, DialectType::SQLite) =>
19984 {
19985 if f.args.len() == 2 {
19986 // RANGE(start, end) -> (SELECT value AS col_alias FROM GENERATE_SERIES(start, end))
19987 // For SQLite, RANGE is exclusive on end, GENERATE_SERIES is inclusive
19988 let mut args = f.args;
19989 let start = args.remove(0);
19990 let end = args.remove(0);
19991 Ok(Expression::Function(Box::new(Function::new(
19992 "GENERATE_SERIES".to_string(),
19993 vec![start, end],
19994 ))))
19995 } else {
19996 Ok(Expression::Function(f))
19997 }
19998 }
19999 // UNIFORM(low, high[, seed]) -> UNIFORM(low, high, RANDOM([seed])) for Snowflake
20000 // When source is Snowflake, keep as-is (args already in correct form)
20001 "UNIFORM"
20002 if matches!(target, DialectType::Snowflake)
20003 && (f.args.len() == 2 || f.args.len() == 3) =>
20004 {
20005 if matches!(source, DialectType::Snowflake) {
20006 // Snowflake -> Snowflake: keep as-is
20007 Ok(Expression::Function(f))
20008 } else {
20009 let mut args = f.args;
20010 let low = args.remove(0);
20011 let high = args.remove(0);
20012 let random = if !args.is_empty() {
20013 let seed = args.remove(0);
20014 Expression::Function(Box::new(Function::new(
20015 "RANDOM".to_string(),
20016 vec![seed],
20017 )))
20018 } else {
20019 Expression::Function(Box::new(Function::new(
20020 "RANDOM".to_string(),
20021 vec![],
20022 )))
20023 };
20024 Ok(Expression::Function(Box::new(Function::new(
20025 "UNIFORM".to_string(),
20026 vec![low, high, random],
20027 ))))
20028 }
20029 }
20030 // TO_UTC_TIMESTAMP(ts, tz) -> target-specific UTC conversion
20031 "TO_UTC_TIMESTAMP" if f.args.len() == 2 => {
20032 let mut args = f.args;
20033 let ts_arg = args.remove(0);
20034 let tz_arg = args.remove(0);
20035 // Cast string literal to TIMESTAMP for all targets
20036 let ts_cast = if matches!(&ts_arg, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)))
20037 {
20038 Expression::Cast(Box::new(Cast {
20039 this: ts_arg,
20040 to: DataType::Timestamp {
20041 timezone: false,
20042 precision: None,
20043 },
20044 trailing_comments: vec![],
20045 double_colon_syntax: false,
20046 format: None,
20047 default: None,
20048 inferred_type: None,
20049 }))
20050 } else {
20051 ts_arg
20052 };
20053 match target {
20054 DialectType::Spark | DialectType::Databricks => {
20055 Ok(Expression::Function(Box::new(Function::new(
20056 "TO_UTC_TIMESTAMP".to_string(),
20057 vec![ts_cast, tz_arg],
20058 ))))
20059 }
20060 DialectType::Snowflake => {
20061 // CONVERT_TIMEZONE(tz, 'UTC', CAST(ts AS TIMESTAMP))
20062 Ok(Expression::Function(Box::new(Function::new(
20063 "CONVERT_TIMEZONE".to_string(),
20064 vec![tz_arg, Expression::string("UTC"), ts_cast],
20065 ))))
20066 }
20067 DialectType::Presto
20068 | DialectType::Trino
20069 | DialectType::Athena => {
20070 // WITH_TIMEZONE(CAST(ts AS TIMESTAMP), tz) AT TIME ZONE 'UTC'
20071 let wtz = Expression::Function(Box::new(Function::new(
20072 "WITH_TIMEZONE".to_string(),
20073 vec![ts_cast, tz_arg],
20074 )));
20075 Ok(Expression::AtTimeZone(Box::new(
20076 crate::expressions::AtTimeZone {
20077 this: wtz,
20078 zone: Expression::string("UTC"),
20079 },
20080 )))
20081 }
20082 DialectType::BigQuery => {
20083 // DATETIME(TIMESTAMP(CAST(ts AS DATETIME), tz), 'UTC')
20084 let cast_dt = Expression::Cast(Box::new(Cast {
20085 this: if let Expression::Cast(c) = ts_cast {
20086 c.this
20087 } else {
20088 ts_cast.clone()
20089 },
20090 to: DataType::Custom {
20091 name: "DATETIME".to_string(),
20092 },
20093 trailing_comments: vec![],
20094 double_colon_syntax: false,
20095 format: None,
20096 default: None,
20097 inferred_type: None,
20098 }));
20099 let ts_func =
20100 Expression::Function(Box::new(Function::new(
20101 "TIMESTAMP".to_string(),
20102 vec![cast_dt, tz_arg],
20103 )));
20104 Ok(Expression::Function(Box::new(Function::new(
20105 "DATETIME".to_string(),
20106 vec![ts_func, Expression::string("UTC")],
20107 ))))
20108 }
20109 _ => {
20110 // DuckDB, PostgreSQL, Redshift: CAST(ts AS TIMESTAMP) AT TIME ZONE tz AT TIME ZONE 'UTC'
20111 let atz1 = Expression::AtTimeZone(Box::new(
20112 crate::expressions::AtTimeZone {
20113 this: ts_cast,
20114 zone: tz_arg,
20115 },
20116 ));
20117 Ok(Expression::AtTimeZone(Box::new(
20118 crate::expressions::AtTimeZone {
20119 this: atz1,
20120 zone: Expression::string("UTC"),
20121 },
20122 )))
20123 }
20124 }
20125 }
20126 // FROM_UTC_TIMESTAMP(ts, tz) -> target-specific UTC conversion
20127 "FROM_UTC_TIMESTAMP" if f.args.len() == 2 => {
20128 let mut args = f.args;
20129 let ts_arg = args.remove(0);
20130 let tz_arg = args.remove(0);
20131 // Cast string literal to TIMESTAMP
20132 let ts_cast = if matches!(&ts_arg, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)))
20133 {
20134 Expression::Cast(Box::new(Cast {
20135 this: ts_arg,
20136 to: DataType::Timestamp {
20137 timezone: false,
20138 precision: None,
20139 },
20140 trailing_comments: vec![],
20141 double_colon_syntax: false,
20142 format: None,
20143 default: None,
20144 inferred_type: None,
20145 }))
20146 } else {
20147 ts_arg
20148 };
20149 match target {
20150 DialectType::Spark | DialectType::Databricks => {
20151 Ok(Expression::Function(Box::new(Function::new(
20152 "FROM_UTC_TIMESTAMP".to_string(),
20153 vec![ts_cast, tz_arg],
20154 ))))
20155 }
20156 DialectType::Presto
20157 | DialectType::Trino
20158 | DialectType::Athena => {
20159 // AT_TIMEZONE(CAST(ts AS TIMESTAMP), tz)
20160 Ok(Expression::Function(Box::new(Function::new(
20161 "AT_TIMEZONE".to_string(),
20162 vec![ts_cast, tz_arg],
20163 ))))
20164 }
20165 DialectType::Snowflake => {
20166 // CONVERT_TIMEZONE('UTC', tz, CAST(ts AS TIMESTAMP))
20167 Ok(Expression::Function(Box::new(Function::new(
20168 "CONVERT_TIMEZONE".to_string(),
20169 vec![Expression::string("UTC"), tz_arg, ts_cast],
20170 ))))
20171 }
20172 _ => {
20173 // DuckDB, PostgreSQL, Redshift: CAST(ts AS TIMESTAMP) AT TIME ZONE tz
20174 Ok(Expression::AtTimeZone(Box::new(
20175 crate::expressions::AtTimeZone {
20176 this: ts_cast,
20177 zone: tz_arg,
20178 },
20179 )))
20180 }
20181 }
20182 }
20183 // MAP_FROM_ARRAYS(keys, values) -> target-specific map construction
20184 "MAP_FROM_ARRAYS" if f.args.len() == 2 => {
20185 let name = match target {
20186 DialectType::Snowflake => "OBJECT_CONSTRUCT",
20187 _ => "MAP",
20188 };
20189 Ok(Expression::Function(Box::new(Function::new(
20190 name.to_string(),
20191 f.args,
20192 ))))
20193 }
20194 // STR_TO_MAP(s, pair_delim, kv_delim) -> SPLIT_TO_MAP for Presto
20195 "STR_TO_MAP" if f.args.len() >= 1 => match target {
20196 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
20197 Ok(Expression::Function(Box::new(Function::new(
20198 "SPLIT_TO_MAP".to_string(),
20199 f.args,
20200 ))))
20201 }
20202 _ => Ok(Expression::Function(f)),
20203 },
20204 // TIME_TO_STR(x, fmt) -> Expression::TimeToStr for proper generation
20205 "TIME_TO_STR" if f.args.len() == 2 => {
20206 let mut args = f.args;
20207 let this = args.remove(0);
20208 let fmt_expr = args.remove(0);
20209 let format = if let Expression::Literal(lit) = fmt_expr {
20210 if let Literal::String(s) = lit.as_ref() {
20211 s.clone()
20212 } else {
20213 String::new()
20214 }
20215 } else {
20216 "%Y-%m-%d %H:%M:%S".to_string()
20217 };
20218 Ok(Expression::TimeToStr(Box::new(
20219 crate::expressions::TimeToStr {
20220 this: Box::new(this),
20221 format,
20222 culture: None,
20223 zone: None,
20224 },
20225 )))
20226 }
20227 // STR_TO_TIME(x, fmt) -> Expression::StrToTime for proper generation
20228 "STR_TO_TIME" if f.args.len() == 2 => {
20229 let mut args = f.args;
20230 let this = args.remove(0);
20231 let fmt_expr = args.remove(0);
20232 let format = if let Expression::Literal(lit) = fmt_expr {
20233 if let Literal::String(s) = lit.as_ref() {
20234 s.clone()
20235 } else {
20236 String::new()
20237 }
20238 } else {
20239 "%Y-%m-%d %H:%M:%S".to_string()
20240 };
20241 Ok(Expression::StrToTime(Box::new(
20242 crate::expressions::StrToTime {
20243 this: Box::new(this),
20244 format,
20245 zone: None,
20246 safe: None,
20247 target_type: None,
20248 },
20249 )))
20250 }
20251 // STR_TO_UNIX(x, fmt) -> Expression::StrToUnix for proper generation
20252 "STR_TO_UNIX" if f.args.len() >= 1 => {
20253 let mut args = f.args;
20254 let this = args.remove(0);
20255 let format = if !args.is_empty() {
20256 if let Expression::Literal(lit) = args.remove(0) {
20257 if let Literal::String(s) = lit.as_ref() {
20258 Some(s.clone())
20259 } else {
20260 None
20261 }
20262 } else {
20263 None
20264 }
20265 } else {
20266 None
20267 };
20268 Ok(Expression::StrToUnix(Box::new(
20269 crate::expressions::StrToUnix {
20270 this: Some(Box::new(this)),
20271 format,
20272 },
20273 )))
20274 }
20275 // TIME_TO_UNIX(x) -> Expression::TimeToUnix for proper generation
20276 "TIME_TO_UNIX" if f.args.len() == 1 => {
20277 let mut args = f.args;
20278 let this = args.remove(0);
20279 Ok(Expression::TimeToUnix(Box::new(
20280 crate::expressions::UnaryFunc {
20281 this,
20282 original_name: None,
20283 inferred_type: None,
20284 },
20285 )))
20286 }
20287 // UNIX_TO_STR(x, fmt) -> Expression::UnixToStr for proper generation
20288 "UNIX_TO_STR" if f.args.len() >= 1 => {
20289 let mut args = f.args;
20290 let this = args.remove(0);
20291 let format = if !args.is_empty() {
20292 if let Expression::Literal(lit) = args.remove(0) {
20293 if let Literal::String(s) = lit.as_ref() {
20294 Some(s.clone())
20295 } else {
20296 None
20297 }
20298 } else {
20299 None
20300 }
20301 } else {
20302 None
20303 };
20304 Ok(Expression::UnixToStr(Box::new(
20305 crate::expressions::UnixToStr {
20306 this: Box::new(this),
20307 format,
20308 },
20309 )))
20310 }
20311 // UNIX_TO_TIME(x) -> Expression::UnixToTime for proper generation
20312 "UNIX_TO_TIME" if f.args.len() == 1 => {
20313 let mut args = f.args;
20314 let this = args.remove(0);
20315 Ok(Expression::UnixToTime(Box::new(
20316 crate::expressions::UnixToTime {
20317 this: Box::new(this),
20318 scale: None,
20319 zone: None,
20320 hours: None,
20321 minutes: None,
20322 format: None,
20323 target_type: None,
20324 },
20325 )))
20326 }
20327 // TIME_STR_TO_DATE(x) -> Expression::TimeStrToDate for proper generation
20328 "TIME_STR_TO_DATE" if f.args.len() == 1 => {
20329 let mut args = f.args;
20330 let this = args.remove(0);
20331 Ok(Expression::TimeStrToDate(Box::new(
20332 crate::expressions::UnaryFunc {
20333 this,
20334 original_name: None,
20335 inferred_type: None,
20336 },
20337 )))
20338 }
20339 // TIME_STR_TO_TIME(x) -> Expression::TimeStrToTime for proper generation
20340 "TIME_STR_TO_TIME" if f.args.len() == 1 => {
20341 let mut args = f.args;
20342 let this = args.remove(0);
20343 Ok(Expression::TimeStrToTime(Box::new(
20344 crate::expressions::TimeStrToTime {
20345 this: Box::new(this),
20346 zone: None,
20347 },
20348 )))
20349 }
20350 // MONTHS_BETWEEN(end, start) -> DuckDB complex expansion
20351 "MONTHS_BETWEEN" if f.args.len() == 2 => {
20352 match target {
20353 DialectType::DuckDB => {
20354 let mut args = f.args;
20355 let end_date = args.remove(0);
20356 let start_date = args.remove(0);
20357 let cast_end = Self::ensure_cast_date(end_date);
20358 let cast_start = Self::ensure_cast_date(start_date);
20359 // DATE_DIFF('MONTH', start, end) + CASE WHEN DAY(end) = DAY(LAST_DAY(end)) AND DAY(start) = DAY(LAST_DAY(start)) THEN 0 ELSE (DAY(end) - DAY(start)) / 31.0 END
20360 let dd = Expression::Function(Box::new(Function::new(
20361 "DATE_DIFF".to_string(),
20362 vec![
20363 Expression::string("MONTH"),
20364 cast_start.clone(),
20365 cast_end.clone(),
20366 ],
20367 )));
20368 let day_end =
20369 Expression::Function(Box::new(Function::new(
20370 "DAY".to_string(),
20371 vec![cast_end.clone()],
20372 )));
20373 let day_start =
20374 Expression::Function(Box::new(Function::new(
20375 "DAY".to_string(),
20376 vec![cast_start.clone()],
20377 )));
20378 let last_day_end =
20379 Expression::Function(Box::new(Function::new(
20380 "LAST_DAY".to_string(),
20381 vec![cast_end.clone()],
20382 )));
20383 let last_day_start =
20384 Expression::Function(Box::new(Function::new(
20385 "LAST_DAY".to_string(),
20386 vec![cast_start.clone()],
20387 )));
20388 let day_last_end = Expression::Function(Box::new(
20389 Function::new("DAY".to_string(), vec![last_day_end]),
20390 ));
20391 let day_last_start = Expression::Function(Box::new(
20392 Function::new("DAY".to_string(), vec![last_day_start]),
20393 ));
20394 let cond1 = Expression::Eq(Box::new(BinaryOp::new(
20395 day_end.clone(),
20396 day_last_end,
20397 )));
20398 let cond2 = Expression::Eq(Box::new(BinaryOp::new(
20399 day_start.clone(),
20400 day_last_start,
20401 )));
20402 let both_cond =
20403 Expression::And(Box::new(BinaryOp::new(cond1, cond2)));
20404 let day_diff = Expression::Sub(Box::new(BinaryOp::new(
20405 day_end, day_start,
20406 )));
20407 let day_diff_paren = Expression::Paren(Box::new(
20408 crate::expressions::Paren {
20409 this: day_diff,
20410 trailing_comments: Vec::new(),
20411 },
20412 ));
20413 let frac = Expression::Div(Box::new(BinaryOp::new(
20414 day_diff_paren,
20415 Expression::Literal(Box::new(Literal::Number(
20416 "31.0".to_string(),
20417 ))),
20418 )));
20419 let case_expr = Expression::Case(Box::new(Case {
20420 operand: None,
20421 whens: vec![(both_cond, Expression::number(0))],
20422 else_: Some(frac),
20423 comments: Vec::new(),
20424 inferred_type: None,
20425 }));
20426 Ok(Expression::Add(Box::new(BinaryOp::new(dd, case_expr))))
20427 }
20428 DialectType::Snowflake | DialectType::Redshift => {
20429 let mut args = f.args;
20430 let end_date = args.remove(0);
20431 let start_date = args.remove(0);
20432 let unit = Expression::Identifier(Identifier::new("MONTH"));
20433 Ok(Expression::Function(Box::new(Function::new(
20434 "DATEDIFF".to_string(),
20435 vec![unit, start_date, end_date],
20436 ))))
20437 }
20438 DialectType::Presto
20439 | DialectType::Trino
20440 | DialectType::Athena => {
20441 let mut args = f.args;
20442 let end_date = args.remove(0);
20443 let start_date = args.remove(0);
20444 Ok(Expression::Function(Box::new(Function::new(
20445 "DATE_DIFF".to_string(),
20446 vec![Expression::string("MONTH"), start_date, end_date],
20447 ))))
20448 }
20449 _ => Ok(Expression::Function(f)),
20450 }
20451 }
20452 // MONTHS_BETWEEN(end, start, roundOff) - 3-arg form (Spark-specific)
20453 // Drop the roundOff arg for non-Spark targets, keep it for Spark
20454 "MONTHS_BETWEEN" if f.args.len() == 3 => {
20455 match target {
20456 DialectType::Spark | DialectType::Databricks => {
20457 Ok(Expression::Function(f))
20458 }
20459 _ => {
20460 // Drop the 3rd arg and delegate to the 2-arg logic
20461 let mut args = f.args;
20462 let end_date = args.remove(0);
20463 let start_date = args.remove(0);
20464 // Re-create as 2-arg and process
20465 let f2 = Function::new(
20466 "MONTHS_BETWEEN".to_string(),
20467 vec![end_date, start_date],
20468 );
20469 let e2 = Expression::Function(Box::new(f2));
20470 Self::cross_dialect_normalize(e2, source, target)
20471 }
20472 }
20473 }
20474 // TO_TIMESTAMP(x) with 1 arg -> CAST(x AS TIMESTAMP) for most targets
20475 "TO_TIMESTAMP"
20476 if f.args.len() == 1
20477 && matches!(
20478 source,
20479 DialectType::Spark
20480 | DialectType::Databricks
20481 | DialectType::Hive
20482 ) =>
20483 {
20484 let arg = f.args.into_iter().next().unwrap();
20485 Ok(Expression::Cast(Box::new(Cast {
20486 this: arg,
20487 to: DataType::Timestamp {
20488 timezone: false,
20489 precision: None,
20490 },
20491 trailing_comments: vec![],
20492 double_colon_syntax: false,
20493 format: None,
20494 default: None,
20495 inferred_type: None,
20496 })))
20497 }
20498 // STRING(x) -> CAST(x AS STRING) for Spark target
20499 "STRING"
20500 if f.args.len() == 1
20501 && matches!(
20502 source,
20503 DialectType::Spark | DialectType::Databricks
20504 ) =>
20505 {
20506 let arg = f.args.into_iter().next().unwrap();
20507 let dt = match target {
20508 DialectType::Spark
20509 | DialectType::Databricks
20510 | DialectType::Hive => DataType::Custom {
20511 name: "STRING".to_string(),
20512 },
20513 _ => DataType::Text,
20514 };
20515 Ok(Expression::Cast(Box::new(Cast {
20516 this: arg,
20517 to: dt,
20518 trailing_comments: vec![],
20519 double_colon_syntax: false,
20520 format: None,
20521 default: None,
20522 inferred_type: None,
20523 })))
20524 }
20525 // LOGICAL_OR(x) -> BOOL_OR(x) for Spark target
20526 "LOGICAL_OR" if f.args.len() == 1 => {
20527 let name = match target {
20528 DialectType::Spark | DialectType::Databricks => "BOOL_OR",
20529 _ => "LOGICAL_OR",
20530 };
20531 Ok(Expression::Function(Box::new(Function::new(
20532 name.to_string(),
20533 f.args,
20534 ))))
20535 }
20536 // SPLIT(x, pattern) from Spark -> STR_SPLIT_REGEX for DuckDB, REGEXP_SPLIT for Presto
20537 "SPLIT"
20538 if f.args.len() == 2
20539 && matches!(
20540 source,
20541 DialectType::Spark
20542 | DialectType::Databricks
20543 | DialectType::Hive
20544 ) =>
20545 {
20546 let name = match target {
20547 DialectType::DuckDB => "STR_SPLIT_REGEX",
20548 DialectType::Presto
20549 | DialectType::Trino
20550 | DialectType::Athena => "REGEXP_SPLIT",
20551 DialectType::Spark
20552 | DialectType::Databricks
20553 | DialectType::Hive => "SPLIT",
20554 _ => "SPLIT",
20555 };
20556 Ok(Expression::Function(Box::new(Function::new(
20557 name.to_string(),
20558 f.args,
20559 ))))
20560 }
20561 // TRY_ELEMENT_AT -> ELEMENT_AT for Presto, array[idx] for DuckDB
20562 "TRY_ELEMENT_AT" if f.args.len() == 2 => match target {
20563 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
20564 Ok(Expression::Function(Box::new(Function::new(
20565 "ELEMENT_AT".to_string(),
20566 f.args,
20567 ))))
20568 }
20569 DialectType::DuckDB => {
20570 let mut args = f.args;
20571 let arr = args.remove(0);
20572 let idx = args.remove(0);
20573 Ok(Expression::Subscript(Box::new(
20574 crate::expressions::Subscript {
20575 this: arr,
20576 index: idx,
20577 },
20578 )))
20579 }
20580 _ => Ok(Expression::Function(f)),
20581 },
20582 // ARRAY_FILTER(arr, lambda) -> FILTER for Hive/Spark/Presto, LIST_FILTER for DuckDB
20583 "ARRAY_FILTER" if f.args.len() == 2 => {
20584 let name = match target {
20585 DialectType::DuckDB => "LIST_FILTER",
20586 DialectType::StarRocks => "ARRAY_FILTER",
20587 _ => "FILTER",
20588 };
20589 Ok(Expression::Function(Box::new(Function::new(
20590 name.to_string(),
20591 f.args,
20592 ))))
20593 }
20594 // FILTER(arr, lambda) -> ARRAY_FILTER for StarRocks, LIST_FILTER for DuckDB
20595 "FILTER" if f.args.len() == 2 => {
20596 let name = match target {
20597 DialectType::DuckDB => "LIST_FILTER",
20598 DialectType::StarRocks => "ARRAY_FILTER",
20599 _ => "FILTER",
20600 };
20601 Ok(Expression::Function(Box::new(Function::new(
20602 name.to_string(),
20603 f.args,
20604 ))))
20605 }
20606 // REDUCE(arr, init, lambda1, lambda2) -> AGGREGATE for Spark
20607 "REDUCE" if f.args.len() >= 3 => {
20608 let name = match target {
20609 DialectType::Spark | DialectType::Databricks => "AGGREGATE",
20610 _ => "REDUCE",
20611 };
20612 Ok(Expression::Function(Box::new(Function::new(
20613 name.to_string(),
20614 f.args,
20615 ))))
20616 }
20617 // CURRENT_SCHEMA() -> dialect-specific
20618 "CURRENT_SCHEMA" => {
20619 match target {
20620 DialectType::PostgreSQL => {
20621 // PostgreSQL: CURRENT_SCHEMA (no parens)
20622 Ok(Expression::Function(Box::new(Function {
20623 name: "CURRENT_SCHEMA".to_string(),
20624 args: vec![],
20625 distinct: false,
20626 trailing_comments: vec![],
20627 use_bracket_syntax: false,
20628 no_parens: true,
20629 quoted: false,
20630 span: None,
20631 inferred_type: None,
20632 })))
20633 }
20634 DialectType::MySQL
20635 | DialectType::Doris
20636 | DialectType::StarRocks => Ok(Expression::Function(Box::new(
20637 Function::new("SCHEMA".to_string(), vec![]),
20638 ))),
20639 DialectType::TSQL => Ok(Expression::Function(Box::new(
20640 Function::new("SCHEMA_NAME".to_string(), vec![]),
20641 ))),
20642 DialectType::SQLite => Ok(Expression::Literal(Box::new(
20643 Literal::String("main".to_string()),
20644 ))),
20645 _ => Ok(Expression::Function(f)),
20646 }
20647 }
20648 // LTRIM(str, chars) 2-arg -> TRIM(LEADING chars FROM str) for Spark/Hive/Databricks/ClickHouse
20649 "LTRIM" if f.args.len() == 2 => match target {
20650 DialectType::Spark
20651 | DialectType::Hive
20652 | DialectType::Databricks
20653 | DialectType::ClickHouse => {
20654 let mut args = f.args;
20655 let str_expr = args.remove(0);
20656 let chars = args.remove(0);
20657 Ok(Expression::Trim(Box::new(crate::expressions::TrimFunc {
20658 this: str_expr,
20659 characters: Some(chars),
20660 position: crate::expressions::TrimPosition::Leading,
20661 sql_standard_syntax: true,
20662 position_explicit: true,
20663 })))
20664 }
20665 _ => Ok(Expression::Function(f)),
20666 },
20667 // RTRIM(str, chars) 2-arg -> TRIM(TRAILING chars FROM str) for Spark/Hive/Databricks/ClickHouse
20668 "RTRIM" if f.args.len() == 2 => match target {
20669 DialectType::Spark
20670 | DialectType::Hive
20671 | DialectType::Databricks
20672 | DialectType::ClickHouse => {
20673 let mut args = f.args;
20674 let str_expr = args.remove(0);
20675 let chars = args.remove(0);
20676 Ok(Expression::Trim(Box::new(crate::expressions::TrimFunc {
20677 this: str_expr,
20678 characters: Some(chars),
20679 position: crate::expressions::TrimPosition::Trailing,
20680 sql_standard_syntax: true,
20681 position_explicit: true,
20682 })))
20683 }
20684 _ => Ok(Expression::Function(f)),
20685 },
20686 // ARRAY_REVERSE(x) -> arrayReverse(x) for ClickHouse
20687 "ARRAY_REVERSE" if f.args.len() == 1 => match target {
20688 DialectType::ClickHouse => {
20689 let mut new_f = *f;
20690 new_f.name = "arrayReverse".to_string();
20691 Ok(Expression::Function(Box::new(new_f)))
20692 }
20693 _ => Ok(Expression::Function(f)),
20694 },
20695 // UUID() -> NEWID() for TSQL
20696 "UUID" if f.args.is_empty() => match target {
20697 DialectType::TSQL | DialectType::Fabric => {
20698 Ok(Expression::Function(Box::new(Function::new(
20699 "NEWID".to_string(),
20700 vec![],
20701 ))))
20702 }
20703 _ => Ok(Expression::Function(f)),
20704 },
20705 // FARM_FINGERPRINT(x) -> farmFingerprint64(x) for ClickHouse, FARMFINGERPRINT64(x) for Redshift
20706 "FARM_FINGERPRINT" if f.args.len() == 1 => match target {
20707 DialectType::ClickHouse => {
20708 let mut new_f = *f;
20709 new_f.name = "farmFingerprint64".to_string();
20710 Ok(Expression::Function(Box::new(new_f)))
20711 }
20712 DialectType::Redshift => {
20713 let mut new_f = *f;
20714 new_f.name = "FARMFINGERPRINT64".to_string();
20715 Ok(Expression::Function(Box::new(new_f)))
20716 }
20717 _ => Ok(Expression::Function(f)),
20718 },
20719 // JSON_KEYS(x) -> JSON_OBJECT_KEYS(x) for Databricks/Spark, OBJECT_KEYS(x) for Snowflake
20720 "JSON_KEYS" => match target {
20721 DialectType::Databricks | DialectType::Spark => {
20722 let mut new_f = *f;
20723 new_f.name = "JSON_OBJECT_KEYS".to_string();
20724 Ok(Expression::Function(Box::new(new_f)))
20725 }
20726 DialectType::Snowflake => {
20727 let mut new_f = *f;
20728 new_f.name = "OBJECT_KEYS".to_string();
20729 Ok(Expression::Function(Box::new(new_f)))
20730 }
20731 _ => Ok(Expression::Function(f)),
20732 },
20733 // WEEKOFYEAR(x) -> WEEKISO(x) for Snowflake
20734 "WEEKOFYEAR" => match target {
20735 DialectType::Snowflake => {
20736 let mut new_f = *f;
20737 new_f.name = "WEEKISO".to_string();
20738 Ok(Expression::Function(Box::new(new_f)))
20739 }
20740 _ => Ok(Expression::Function(f)),
20741 },
20742 // FORMAT(fmt, args...) -> FORMAT_STRING(fmt, args...) for Databricks
20743 "FORMAT"
20744 if f.args.len() >= 2 && matches!(source, DialectType::Generic) =>
20745 {
20746 match target {
20747 DialectType::Databricks | DialectType::Spark => {
20748 let mut new_f = *f;
20749 new_f.name = "FORMAT_STRING".to_string();
20750 Ok(Expression::Function(Box::new(new_f)))
20751 }
20752 _ => Ok(Expression::Function(f)),
20753 }
20754 }
20755 // CONCAT_WS('-', args...) -> CONCAT_WS('-', CAST(arg AS VARCHAR), ...) for Presto/Trino
20756 "CONCAT_WS" if f.args.len() >= 2 => match target {
20757 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
20758 let mut args = f.args;
20759 let sep = args.remove(0);
20760 let cast_args: Vec<Expression> = args
20761 .into_iter()
20762 .map(|a| {
20763 Expression::Cast(Box::new(Cast {
20764 this: a,
20765 to: DataType::VarChar {
20766 length: None,
20767 parenthesized_length: false,
20768 },
20769 double_colon_syntax: false,
20770 trailing_comments: Vec::new(),
20771 format: None,
20772 default: None,
20773 inferred_type: None,
20774 }))
20775 })
20776 .collect();
20777 let mut new_args = vec![sep];
20778 new_args.extend(cast_args);
20779 Ok(Expression::Function(Box::new(Function::new(
20780 "CONCAT_WS".to_string(),
20781 new_args,
20782 ))))
20783 }
20784 DialectType::DuckDB => {
20785 let args = f.args;
20786 let mut null_checks = args.iter().cloned().map(|arg| {
20787 Expression::IsNull(Box::new(crate::expressions::IsNull {
20788 this: arg,
20789 not: false,
20790 postfix_form: false,
20791 }))
20792 });
20793 let first_null_check = null_checks
20794 .next()
20795 .expect("CONCAT_WS with >= 2 args must yield a null check");
20796 let null_check =
20797 null_checks.fold(first_null_check, |left, right| {
20798 Expression::Or(Box::new(BinaryOp {
20799 left,
20800 right,
20801 left_comments: Vec::new(),
20802 operator_comments: Vec::new(),
20803 trailing_comments: Vec::new(),
20804 inferred_type: None,
20805 }))
20806 });
20807 Ok(Expression::Case(Box::new(Case {
20808 operand: None,
20809 whens: vec![(null_check, Expression::Null(Null))],
20810 else_: Some(Expression::Function(Box::new(Function::new(
20811 "CONCAT_WS".to_string(),
20812 args,
20813 )))),
20814 comments: vec![],
20815 inferred_type: None,
20816 })))
20817 }
20818 _ => Ok(Expression::Function(f)),
20819 },
20820 // ARRAY_SLICE(x, start, end) -> SLICE(x, start, end) for Presto/Trino/Databricks, arraySlice for ClickHouse
20821 "ARRAY_SLICE" if f.args.len() >= 2 => match target {
20822 DialectType::DuckDB
20823 if f.args.len() == 3
20824 && matches!(source, DialectType::Snowflake) =>
20825 {
20826 // Snowflake ARRAY_SLICE (0-indexed, exclusive end)
20827 // -> DuckDB ARRAY_SLICE (1-indexed, inclusive end)
20828 let mut args = f.args;
20829 let arr = args.remove(0);
20830 let start = args.remove(0);
20831 let end = args.remove(0);
20832
20833 // CASE WHEN start >= 0 THEN start + 1 ELSE start END
20834 let adjusted_start = Expression::Case(Box::new(Case {
20835 operand: None,
20836 whens: vec![(
20837 Expression::Gte(Box::new(BinaryOp {
20838 left: start.clone(),
20839 right: Expression::number(0),
20840 left_comments: vec![],
20841 operator_comments: vec![],
20842 trailing_comments: vec![],
20843 inferred_type: None,
20844 })),
20845 Expression::Add(Box::new(BinaryOp {
20846 left: start.clone(),
20847 right: Expression::number(1),
20848 left_comments: vec![],
20849 operator_comments: vec![],
20850 trailing_comments: vec![],
20851 inferred_type: None,
20852 })),
20853 )],
20854 else_: Some(start),
20855 comments: vec![],
20856 inferred_type: None,
20857 }));
20858
20859 // CASE WHEN end < 0 THEN end - 1 ELSE end END
20860 let adjusted_end = Expression::Case(Box::new(Case {
20861 operand: None,
20862 whens: vec![(
20863 Expression::Lt(Box::new(BinaryOp {
20864 left: end.clone(),
20865 right: Expression::number(0),
20866 left_comments: vec![],
20867 operator_comments: vec![],
20868 trailing_comments: vec![],
20869 inferred_type: None,
20870 })),
20871 Expression::Sub(Box::new(BinaryOp {
20872 left: end.clone(),
20873 right: Expression::number(1),
20874 left_comments: vec![],
20875 operator_comments: vec![],
20876 trailing_comments: vec![],
20877 inferred_type: None,
20878 })),
20879 )],
20880 else_: Some(end),
20881 comments: vec![],
20882 inferred_type: None,
20883 }));
20884
20885 Ok(Expression::Function(Box::new(Function::new(
20886 "ARRAY_SLICE".to_string(),
20887 vec![arr, adjusted_start, adjusted_end],
20888 ))))
20889 }
20890 DialectType::Presto
20891 | DialectType::Trino
20892 | DialectType::Athena
20893 | DialectType::Databricks
20894 | DialectType::Spark => {
20895 let mut new_f = *f;
20896 new_f.name = "SLICE".to_string();
20897 Ok(Expression::Function(Box::new(new_f)))
20898 }
20899 DialectType::ClickHouse => {
20900 let mut new_f = *f;
20901 new_f.name = "arraySlice".to_string();
20902 Ok(Expression::Function(Box::new(new_f)))
20903 }
20904 _ => Ok(Expression::Function(f)),
20905 },
20906 // ARRAY_PREPEND(arr, x) -> LIST_PREPEND(x, arr) for DuckDB (swap args)
20907 "ARRAY_PREPEND" if f.args.len() == 2 => match target {
20908 DialectType::DuckDB => {
20909 let mut args = f.args;
20910 let arr = args.remove(0);
20911 let val = args.remove(0);
20912 Ok(Expression::Function(Box::new(Function::new(
20913 "LIST_PREPEND".to_string(),
20914 vec![val, arr],
20915 ))))
20916 }
20917 _ => Ok(Expression::Function(f)),
20918 },
20919 // ARRAY_REMOVE(arr, target) -> dialect-specific
20920 "ARRAY_REMOVE" if f.args.len() == 2 => {
20921 match target {
20922 DialectType::DuckDB => {
20923 let mut args = f.args;
20924 let arr = args.remove(0);
20925 let target_val = args.remove(0);
20926 let u_id = crate::expressions::Identifier::new("_u");
20927 // LIST_FILTER(arr, _u -> _u <> target)
20928 let lambda = Expression::Lambda(Box::new(
20929 crate::expressions::LambdaExpr {
20930 parameters: vec![u_id.clone()],
20931 body: Expression::Neq(Box::new(BinaryOp {
20932 left: Expression::Identifier(u_id),
20933 right: target_val,
20934 left_comments: Vec::new(),
20935 operator_comments: Vec::new(),
20936 trailing_comments: Vec::new(),
20937 inferred_type: None,
20938 })),
20939 colon: false,
20940 parameter_types: Vec::new(),
20941 },
20942 ));
20943 Ok(Expression::Function(Box::new(Function::new(
20944 "LIST_FILTER".to_string(),
20945 vec![arr, lambda],
20946 ))))
20947 }
20948 DialectType::ClickHouse => {
20949 let mut args = f.args;
20950 let arr = args.remove(0);
20951 let target_val = args.remove(0);
20952 let u_id = crate::expressions::Identifier::new("_u");
20953 // arrayFilter(_u -> _u <> target, arr)
20954 let lambda = Expression::Lambda(Box::new(
20955 crate::expressions::LambdaExpr {
20956 parameters: vec![u_id.clone()],
20957 body: Expression::Neq(Box::new(BinaryOp {
20958 left: Expression::Identifier(u_id),
20959 right: target_val,
20960 left_comments: Vec::new(),
20961 operator_comments: Vec::new(),
20962 trailing_comments: Vec::new(),
20963 inferred_type: None,
20964 })),
20965 colon: false,
20966 parameter_types: Vec::new(),
20967 },
20968 ));
20969 Ok(Expression::Function(Box::new(Function::new(
20970 "arrayFilter".to_string(),
20971 vec![lambda, arr],
20972 ))))
20973 }
20974 DialectType::BigQuery => {
20975 // ARRAY(SELECT _u FROM UNNEST(the_array) AS _u WHERE _u <> target)
20976 let mut args = f.args;
20977 let arr = args.remove(0);
20978 let target_val = args.remove(0);
20979 let u_id = crate::expressions::Identifier::new("_u");
20980 let u_col = Expression::Column(Box::new(
20981 crate::expressions::Column {
20982 name: u_id.clone(),
20983 table: None,
20984 join_mark: false,
20985 trailing_comments: Vec::new(),
20986 span: None,
20987 inferred_type: None,
20988 },
20989 ));
20990 // UNNEST(the_array) AS _u
20991 let unnest_expr = Expression::Unnest(Box::new(
20992 crate::expressions::UnnestFunc {
20993 this: arr,
20994 expressions: Vec::new(),
20995 with_ordinality: false,
20996 alias: None,
20997 offset_alias: None,
20998 },
20999 ));
21000 let aliased_unnest = Expression::Alias(Box::new(
21001 crate::expressions::Alias {
21002 this: unnest_expr,
21003 alias: u_id.clone(),
21004 column_aliases: Vec::new(),
21005 pre_alias_comments: Vec::new(),
21006 trailing_comments: Vec::new(),
21007 inferred_type: None,
21008 },
21009 ));
21010 // _u <> target
21011 let where_cond = Expression::Neq(Box::new(BinaryOp {
21012 left: u_col.clone(),
21013 right: target_val,
21014 left_comments: Vec::new(),
21015 operator_comments: Vec::new(),
21016 trailing_comments: Vec::new(),
21017 inferred_type: None,
21018 }));
21019 // SELECT _u FROM UNNEST(the_array) AS _u WHERE _u <> target
21020 let subquery = Expression::Select(Box::new(
21021 crate::expressions::Select::new()
21022 .column(u_col)
21023 .from(aliased_unnest)
21024 .where_(where_cond),
21025 ));
21026 // ARRAY(subquery) -- use ArrayFunc with subquery as single element
21027 Ok(Expression::ArrayFunc(Box::new(
21028 crate::expressions::ArrayConstructor {
21029 expressions: vec![subquery],
21030 bracket_notation: false,
21031 use_list_keyword: false,
21032 },
21033 )))
21034 }
21035 _ => Ok(Expression::Function(f)),
21036 }
21037 }
21038 // PARSE_JSON(str) -> remove for SQLite/Doris (just use the string literal)
21039 "PARSE_JSON" if f.args.len() == 1 => {
21040 match target {
21041 DialectType::SQLite
21042 | DialectType::Doris
21043 | DialectType::MySQL
21044 | DialectType::StarRocks => {
21045 // Strip PARSE_JSON, return the inner argument
21046 Ok(f.args.into_iter().next().unwrap())
21047 }
21048 _ => Ok(Expression::Function(f)),
21049 }
21050 }
21051 // JSON_REMOVE(PARSE_JSON(str), path...) -> for SQLite strip PARSE_JSON
21052 // This is handled by PARSE_JSON stripping above; JSON_REMOVE is passed through
21053 "JSON_REMOVE" => Ok(Expression::Function(f)),
21054 // JSON_SET(PARSE_JSON(str), path, PARSE_JSON(val)) -> for SQLite strip PARSE_JSON
21055 // This is handled by PARSE_JSON stripping above; JSON_SET is passed through
21056 "JSON_SET" => Ok(Expression::Function(f)),
21057 // DECODE(x, search1, result1, ..., default) -> CASE WHEN
21058 // Behavior per search value type:
21059 // NULL literal -> CASE WHEN x IS NULL THEN result
21060 // Literal (number, string, bool) -> CASE WHEN x = literal THEN result
21061 // Non-literal (column, expr) -> CASE WHEN x = search OR (x IS NULL AND search IS NULL) THEN result
21062 "DECODE" if f.args.len() >= 3 => {
21063 // Keep as DECODE for targets that support it natively
21064 let keep_as_decode = matches!(
21065 target,
21066 DialectType::Oracle
21067 | DialectType::Snowflake
21068 | DialectType::Redshift
21069 | DialectType::Teradata
21070 | DialectType::Spark
21071 | DialectType::Databricks
21072 );
21073 if keep_as_decode {
21074 return Ok(Expression::Function(f));
21075 }
21076
21077 let mut args = f.args;
21078 let this_expr = args.remove(0);
21079 let mut pairs = Vec::new();
21080 let mut default = None;
21081 let mut i = 0;
21082 while i + 1 < args.len() {
21083 pairs.push((args[i].clone(), args[i + 1].clone()));
21084 i += 2;
21085 }
21086 if i < args.len() {
21087 default = Some(args[i].clone());
21088 }
21089 // Helper: check if expression is a literal value
21090 fn is_literal(e: &Expression) -> bool {
21091 matches!(
21092 e,
21093 Expression::Literal(_)
21094 | Expression::Boolean(_)
21095 | Expression::Neg(_)
21096 )
21097 }
21098 let whens: Vec<(Expression, Expression)> = pairs
21099 .into_iter()
21100 .map(|(search, result)| {
21101 if matches!(&search, Expression::Null(_)) {
21102 // NULL search -> IS NULL
21103 let condition = Expression::Is(Box::new(BinaryOp {
21104 left: this_expr.clone(),
21105 right: Expression::Null(crate::expressions::Null),
21106 left_comments: Vec::new(),
21107 operator_comments: Vec::new(),
21108 trailing_comments: Vec::new(),
21109 inferred_type: None,
21110 }));
21111 (condition, result)
21112 } else if is_literal(&search) {
21113 // Literal search -> simple equality
21114 let eq = Expression::Eq(Box::new(BinaryOp {
21115 left: this_expr.clone(),
21116 right: search,
21117 left_comments: Vec::new(),
21118 operator_comments: Vec::new(),
21119 trailing_comments: Vec::new(),
21120 inferred_type: None,
21121 }));
21122 (eq, result)
21123 } else {
21124 // Non-literal (column ref, expression) -> null-safe comparison
21125 let needs_paren = matches!(
21126 &search,
21127 Expression::Eq(_)
21128 | Expression::Neq(_)
21129 | Expression::Gt(_)
21130 | Expression::Gte(_)
21131 | Expression::Lt(_)
21132 | Expression::Lte(_)
21133 );
21134 let search_for_eq = if needs_paren {
21135 Expression::Paren(Box::new(
21136 crate::expressions::Paren {
21137 this: search.clone(),
21138 trailing_comments: Vec::new(),
21139 },
21140 ))
21141 } else {
21142 search.clone()
21143 };
21144 let eq = Expression::Eq(Box::new(BinaryOp {
21145 left: this_expr.clone(),
21146 right: search_for_eq,
21147 left_comments: Vec::new(),
21148 operator_comments: Vec::new(),
21149 trailing_comments: Vec::new(),
21150 inferred_type: None,
21151 }));
21152 let search_for_null = if needs_paren {
21153 Expression::Paren(Box::new(
21154 crate::expressions::Paren {
21155 this: search.clone(),
21156 trailing_comments: Vec::new(),
21157 },
21158 ))
21159 } else {
21160 search.clone()
21161 };
21162 let x_is_null = Expression::Is(Box::new(BinaryOp {
21163 left: this_expr.clone(),
21164 right: Expression::Null(crate::expressions::Null),
21165 left_comments: Vec::new(),
21166 operator_comments: Vec::new(),
21167 trailing_comments: Vec::new(),
21168 inferred_type: None,
21169 }));
21170 let s_is_null = Expression::Is(Box::new(BinaryOp {
21171 left: search_for_null,
21172 right: Expression::Null(crate::expressions::Null),
21173 left_comments: Vec::new(),
21174 operator_comments: Vec::new(),
21175 trailing_comments: Vec::new(),
21176 inferred_type: None,
21177 }));
21178 let both_null = Expression::And(Box::new(BinaryOp {
21179 left: x_is_null,
21180 right: s_is_null,
21181 left_comments: Vec::new(),
21182 operator_comments: Vec::new(),
21183 trailing_comments: Vec::new(),
21184 inferred_type: None,
21185 }));
21186 let condition = Expression::Or(Box::new(BinaryOp {
21187 left: eq,
21188 right: Expression::Paren(Box::new(
21189 crate::expressions::Paren {
21190 this: both_null,
21191 trailing_comments: Vec::new(),
21192 },
21193 )),
21194 left_comments: Vec::new(),
21195 operator_comments: Vec::new(),
21196 trailing_comments: Vec::new(),
21197 inferred_type: None,
21198 }));
21199 (condition, result)
21200 }
21201 })
21202 .collect();
21203 Ok(Expression::Case(Box::new(Case {
21204 operand: None,
21205 whens,
21206 else_: default,
21207 comments: Vec::new(),
21208 inferred_type: None,
21209 })))
21210 }
21211 // LEVENSHTEIN(a, b, ...) -> dialect-specific
21212 "LEVENSHTEIN" => {
21213 match target {
21214 DialectType::BigQuery => {
21215 let mut new_f = *f;
21216 new_f.name = "EDIT_DISTANCE".to_string();
21217 Ok(Expression::Function(Box::new(new_f)))
21218 }
21219 DialectType::Drill => {
21220 let mut new_f = *f;
21221 new_f.name = "LEVENSHTEIN_DISTANCE".to_string();
21222 Ok(Expression::Function(Box::new(new_f)))
21223 }
21224 DialectType::PostgreSQL if f.args.len() == 6 => {
21225 // PostgreSQL: LEVENSHTEIN(src, tgt, ins, del, sub, max_d) -> LEVENSHTEIN_LESS_EQUAL
21226 // 2 args: basic, 5 args: with costs, 6 args: with costs + max_distance
21227 let mut new_f = *f;
21228 new_f.name = "LEVENSHTEIN_LESS_EQUAL".to_string();
21229 Ok(Expression::Function(Box::new(new_f)))
21230 }
21231 _ => Ok(Expression::Function(f)),
21232 }
21233 }
21234 // ARRAY_MAX(x) -> arrayMax(x) for ClickHouse, LIST_MAX(x) for DuckDB
21235 "ARRAY_MAX" => {
21236 let name = match target {
21237 DialectType::ClickHouse => "arrayMax",
21238 DialectType::DuckDB => "LIST_MAX",
21239 _ => "ARRAY_MAX",
21240 };
21241 let mut new_f = *f;
21242 new_f.name = name.to_string();
21243 Ok(Expression::Function(Box::new(new_f)))
21244 }
21245 // ARRAY_MIN(x) -> arrayMin(x) for ClickHouse, LIST_MIN(x) for DuckDB
21246 "ARRAY_MIN" => {
21247 let name = match target {
21248 DialectType::ClickHouse => "arrayMin",
21249 DialectType::DuckDB => "LIST_MIN",
21250 _ => "ARRAY_MIN",
21251 };
21252 let mut new_f = *f;
21253 new_f.name = name.to_string();
21254 Ok(Expression::Function(Box::new(new_f)))
21255 }
21256 // JAROWINKLER_SIMILARITY(a, b) -> jaroWinklerSimilarity(UPPER(a), UPPER(b)) for ClickHouse
21257 // -> JARO_WINKLER_SIMILARITY(UPPER(a), UPPER(b)) for DuckDB
21258 "JAROWINKLER_SIMILARITY" if f.args.len() == 2 => {
21259 let mut args = f.args;
21260 let b = args.pop().unwrap();
21261 let a = args.pop().unwrap();
21262 match target {
21263 DialectType::ClickHouse => {
21264 let upper_a = Expression::Upper(Box::new(
21265 crate::expressions::UnaryFunc::new(a),
21266 ));
21267 let upper_b = Expression::Upper(Box::new(
21268 crate::expressions::UnaryFunc::new(b),
21269 ));
21270 Ok(Expression::Function(Box::new(Function::new(
21271 "jaroWinklerSimilarity".to_string(),
21272 vec![upper_a, upper_b],
21273 ))))
21274 }
21275 DialectType::DuckDB => {
21276 let upper_a = Expression::Upper(Box::new(
21277 crate::expressions::UnaryFunc::new(a),
21278 ));
21279 let upper_b = Expression::Upper(Box::new(
21280 crate::expressions::UnaryFunc::new(b),
21281 ));
21282 let score = Expression::Function(Box::new(Function::new(
21283 "JARO_WINKLER_SIMILARITY".to_string(),
21284 vec![upper_a, upper_b],
21285 )));
21286 let scaled = Expression::Mul(Box::new(BinaryOp {
21287 left: score,
21288 right: Expression::number(100),
21289 left_comments: Vec::new(),
21290 operator_comments: Vec::new(),
21291 trailing_comments: Vec::new(),
21292 inferred_type: None,
21293 }));
21294 Ok(Expression::Cast(Box::new(Cast {
21295 this: scaled,
21296 to: DataType::Int {
21297 length: None,
21298 integer_spelling: false,
21299 },
21300 trailing_comments: Vec::new(),
21301 double_colon_syntax: false,
21302 format: None,
21303 default: None,
21304 inferred_type: None,
21305 })))
21306 }
21307 _ => Ok(Expression::Function(Box::new(Function::new(
21308 "JAROWINKLER_SIMILARITY".to_string(),
21309 vec![a, b],
21310 )))),
21311 }
21312 }
21313 // CURRENT_SCHEMAS(x) -> CURRENT_SCHEMAS() for Snowflake (drop arg)
21314 "CURRENT_SCHEMAS" => match target {
21315 DialectType::Snowflake => Ok(Expression::Function(Box::new(
21316 Function::new("CURRENT_SCHEMAS".to_string(), vec![]),
21317 ))),
21318 _ => Ok(Expression::Function(f)),
21319 },
21320 // TRUNC/TRUNCATE (numeric) -> dialect-specific
21321 "TRUNC" | "TRUNCATE" if f.args.len() <= 2 => {
21322 match target {
21323 DialectType::TSQL | DialectType::Fabric => {
21324 // ROUND(x, decimals, 1) - the 1 flag means truncation
21325 let mut args = f.args;
21326 let this = if args.is_empty() {
21327 return Ok(Expression::Function(Box::new(
21328 Function::new("TRUNC".to_string(), args),
21329 )));
21330 } else {
21331 args.remove(0)
21332 };
21333 let decimals = if args.is_empty() {
21334 Expression::Literal(Box::new(Literal::Number(
21335 "0".to_string(),
21336 )))
21337 } else {
21338 args.remove(0)
21339 };
21340 Ok(Expression::Function(Box::new(Function::new(
21341 "ROUND".to_string(),
21342 vec![
21343 this,
21344 decimals,
21345 Expression::Literal(Box::new(Literal::Number(
21346 "1".to_string(),
21347 ))),
21348 ],
21349 ))))
21350 }
21351 DialectType::Presto
21352 | DialectType::Trino
21353 | DialectType::Athena => {
21354 // TRUNCATE(x, decimals)
21355 let mut new_f = *f;
21356 new_f.name = "TRUNCATE".to_string();
21357 Ok(Expression::Function(Box::new(new_f)))
21358 }
21359 DialectType::MySQL
21360 | DialectType::SingleStore
21361 | DialectType::TiDB => {
21362 // TRUNCATE(x, decimals)
21363 let mut new_f = *f;
21364 new_f.name = "TRUNCATE".to_string();
21365 Ok(Expression::Function(Box::new(new_f)))
21366 }
21367 DialectType::DuckDB => {
21368 // DuckDB supports TRUNC(x, decimals) — preserve both args
21369 let mut args = f.args;
21370 // Snowflake fractions_supported: wrap non-INT decimals in CAST(... AS INT)
21371 if args.len() == 2
21372 && matches!(source, DialectType::Snowflake)
21373 {
21374 let decimals = args.remove(1);
21375 let is_int = matches!(&decimals, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(_)))
21376 || matches!(&decimals, Expression::Cast(c) if matches!(c.to, DataType::Int { .. } | DataType::SmallInt { .. } | DataType::BigInt { .. } | DataType::TinyInt { .. }));
21377 let wrapped = if !is_int {
21378 Expression::Cast(Box::new(
21379 crate::expressions::Cast {
21380 this: decimals,
21381 to: DataType::Int {
21382 length: None,
21383 integer_spelling: false,
21384 },
21385 double_colon_syntax: false,
21386 trailing_comments: Vec::new(),
21387 format: None,
21388 default: None,
21389 inferred_type: None,
21390 },
21391 ))
21392 } else {
21393 decimals
21394 };
21395 args.push(wrapped);
21396 }
21397 Ok(Expression::Function(Box::new(Function::new(
21398 "TRUNC".to_string(),
21399 args,
21400 ))))
21401 }
21402 DialectType::ClickHouse => {
21403 // trunc(x, decimals) - lowercase
21404 let mut new_f = *f;
21405 new_f.name = "trunc".to_string();
21406 Ok(Expression::Function(Box::new(new_f)))
21407 }
21408 DialectType::Spark | DialectType::Databricks => {
21409 // Spark: TRUNC is date-only; numeric TRUNC → CAST(x AS BIGINT)
21410 let this = f.args.into_iter().next().unwrap_or(
21411 Expression::Literal(Box::new(Literal::Number(
21412 "0".to_string(),
21413 ))),
21414 );
21415 Ok(Expression::Cast(Box::new(crate::expressions::Cast {
21416 this,
21417 to: crate::expressions::DataType::BigInt {
21418 length: None,
21419 },
21420 double_colon_syntax: false,
21421 trailing_comments: Vec::new(),
21422 format: None,
21423 default: None,
21424 inferred_type: None,
21425 })))
21426 }
21427 _ => {
21428 // TRUNC(x, decimals) for PostgreSQL, Oracle, Snowflake, etc.
21429 let mut new_f = *f;
21430 new_f.name = "TRUNC".to_string();
21431 Ok(Expression::Function(Box::new(new_f)))
21432 }
21433 }
21434 }
21435 // CURRENT_VERSION() -> VERSION() for most dialects
21436 "CURRENT_VERSION" => match target {
21437 DialectType::Snowflake
21438 | DialectType::Databricks
21439 | DialectType::StarRocks => Ok(Expression::Function(f)),
21440 DialectType::SQLite => {
21441 let mut new_f = *f;
21442 new_f.name = "SQLITE_VERSION".to_string();
21443 Ok(Expression::Function(Box::new(new_f)))
21444 }
21445 _ => {
21446 let mut new_f = *f;
21447 new_f.name = "VERSION".to_string();
21448 Ok(Expression::Function(Box::new(new_f)))
21449 }
21450 },
21451 // ARRAY_REVERSE(x) -> arrayReverse(x) for ClickHouse
21452 "ARRAY_REVERSE" => match target {
21453 DialectType::ClickHouse => {
21454 let mut new_f = *f;
21455 new_f.name = "arrayReverse".to_string();
21456 Ok(Expression::Function(Box::new(new_f)))
21457 }
21458 _ => Ok(Expression::Function(f)),
21459 },
21460 // GENERATE_DATE_ARRAY(start, end[, step]) -> target-specific
21461 "GENERATE_DATE_ARRAY" => {
21462 let mut args = f.args;
21463 if matches!(target, DialectType::BigQuery) {
21464 // BigQuery keeps GENERATE_DATE_ARRAY; add default interval if not present
21465 if args.len() == 2 {
21466 let default_interval = Expression::Interval(Box::new(
21467 crate::expressions::Interval {
21468 this: Some(Expression::Literal(Box::new(
21469 Literal::String("1".to_string()),
21470 ))),
21471 unit: Some(
21472 crate::expressions::IntervalUnitSpec::Simple {
21473 unit: crate::expressions::IntervalUnit::Day,
21474 use_plural: false,
21475 },
21476 ),
21477 },
21478 ));
21479 args.push(default_interval);
21480 }
21481 Ok(Expression::Function(Box::new(Function::new(
21482 "GENERATE_DATE_ARRAY".to_string(),
21483 args,
21484 ))))
21485 } else if matches!(target, DialectType::DuckDB) {
21486 // DuckDB: CAST(GENERATE_SERIES(start, end, step) AS DATE[])
21487 let start = args.get(0).cloned();
21488 let end = args.get(1).cloned();
21489 let step = args.get(2).cloned().or_else(|| {
21490 Some(Expression::Interval(Box::new(
21491 crate::expressions::Interval {
21492 this: Some(Expression::Literal(Box::new(
21493 Literal::String("1".to_string()),
21494 ))),
21495 unit: Some(
21496 crate::expressions::IntervalUnitSpec::Simple {
21497 unit: crate::expressions::IntervalUnit::Day,
21498 use_plural: false,
21499 },
21500 ),
21501 },
21502 )))
21503 });
21504 let gen_series = Expression::GenerateSeries(Box::new(
21505 crate::expressions::GenerateSeries {
21506 start: start.map(Box::new),
21507 end: end.map(Box::new),
21508 step: step.map(Box::new),
21509 is_end_exclusive: None,
21510 },
21511 ));
21512 Ok(Expression::Cast(Box::new(Cast {
21513 this: gen_series,
21514 to: DataType::Array {
21515 element_type: Box::new(DataType::Date),
21516 dimension: None,
21517 },
21518 trailing_comments: vec![],
21519 double_colon_syntax: false,
21520 format: None,
21521 default: None,
21522 inferred_type: None,
21523 })))
21524 } else if matches!(
21525 target,
21526 DialectType::Presto | DialectType::Trino | DialectType::Athena
21527 ) {
21528 // Presto/Trino: SEQUENCE(start, end, interval) with interval normalization
21529 let start = args.get(0).cloned();
21530 let end = args.get(1).cloned();
21531 let step = args.get(2).cloned().or_else(|| {
21532 Some(Expression::Interval(Box::new(
21533 crate::expressions::Interval {
21534 this: Some(Expression::Literal(Box::new(
21535 Literal::String("1".to_string()),
21536 ))),
21537 unit: Some(
21538 crate::expressions::IntervalUnitSpec::Simple {
21539 unit: crate::expressions::IntervalUnit::Day,
21540 use_plural: false,
21541 },
21542 ),
21543 },
21544 )))
21545 });
21546 let gen_series = Expression::GenerateSeries(Box::new(
21547 crate::expressions::GenerateSeries {
21548 start: start.map(Box::new),
21549 end: end.map(Box::new),
21550 step: step.map(Box::new),
21551 is_end_exclusive: None,
21552 },
21553 ));
21554 Ok(gen_series)
21555 } else if matches!(
21556 target,
21557 DialectType::Spark | DialectType::Databricks
21558 ) {
21559 // Spark/Databricks: SEQUENCE(start, end, step) - keep step as-is
21560 let start = args.get(0).cloned();
21561 let end = args.get(1).cloned();
21562 let step = args.get(2).cloned().or_else(|| {
21563 Some(Expression::Interval(Box::new(
21564 crate::expressions::Interval {
21565 this: Some(Expression::Literal(Box::new(
21566 Literal::String("1".to_string()),
21567 ))),
21568 unit: Some(
21569 crate::expressions::IntervalUnitSpec::Simple {
21570 unit: crate::expressions::IntervalUnit::Day,
21571 use_plural: false,
21572 },
21573 ),
21574 },
21575 )))
21576 });
21577 let gen_series = Expression::GenerateSeries(Box::new(
21578 crate::expressions::GenerateSeries {
21579 start: start.map(Box::new),
21580 end: end.map(Box::new),
21581 step: step.map(Box::new),
21582 is_end_exclusive: None,
21583 },
21584 ));
21585 Ok(gen_series)
21586 } else if matches!(target, DialectType::Snowflake) {
21587 // Snowflake: keep as GENERATE_DATE_ARRAY for later transform
21588 if args.len() == 2 {
21589 let default_interval = Expression::Interval(Box::new(
21590 crate::expressions::Interval {
21591 this: Some(Expression::Literal(Box::new(
21592 Literal::String("1".to_string()),
21593 ))),
21594 unit: Some(
21595 crate::expressions::IntervalUnitSpec::Simple {
21596 unit: crate::expressions::IntervalUnit::Day,
21597 use_plural: false,
21598 },
21599 ),
21600 },
21601 ));
21602 args.push(default_interval);
21603 }
21604 Ok(Expression::Function(Box::new(Function::new(
21605 "GENERATE_DATE_ARRAY".to_string(),
21606 args,
21607 ))))
21608 } else if matches!(
21609 target,
21610 DialectType::MySQL
21611 | DialectType::TSQL
21612 | DialectType::Fabric
21613 | DialectType::Redshift
21614 ) {
21615 // MySQL/TSQL/Redshift: keep as GENERATE_DATE_ARRAY for the preprocess
21616 // step (unnest_generate_date_array_using_recursive_cte) to convert to CTE
21617 Ok(Expression::Function(Box::new(Function::new(
21618 "GENERATE_DATE_ARRAY".to_string(),
21619 args,
21620 ))))
21621 } else {
21622 // PostgreSQL/others: convert to GenerateSeries
21623 let start = args.get(0).cloned();
21624 let end = args.get(1).cloned();
21625 let step = args.get(2).cloned().or_else(|| {
21626 Some(Expression::Interval(Box::new(
21627 crate::expressions::Interval {
21628 this: Some(Expression::Literal(Box::new(
21629 Literal::String("1".to_string()),
21630 ))),
21631 unit: Some(
21632 crate::expressions::IntervalUnitSpec::Simple {
21633 unit: crate::expressions::IntervalUnit::Day,
21634 use_plural: false,
21635 },
21636 ),
21637 },
21638 )))
21639 });
21640 Ok(Expression::GenerateSeries(Box::new(
21641 crate::expressions::GenerateSeries {
21642 start: start.map(Box::new),
21643 end: end.map(Box::new),
21644 step: step.map(Box::new),
21645 is_end_exclusive: None,
21646 },
21647 )))
21648 }
21649 }
21650 // ARRAYS_OVERLAP(arr1, arr2) from Snowflake -> DuckDB:
21651 // (arr1 && arr2) OR (ARRAY_LENGTH(arr1) <> LIST_COUNT(arr1) AND ARRAY_LENGTH(arr2) <> LIST_COUNT(arr2))
21652 "ARRAYS_OVERLAP"
21653 if f.args.len() == 2
21654 && matches!(source, DialectType::Snowflake)
21655 && matches!(target, DialectType::DuckDB) =>
21656 {
21657 let mut args = f.args;
21658 let arr1 = args.remove(0);
21659 let arr2 = args.remove(0);
21660
21661 // (arr1 && arr2)
21662 let overlap = Expression::Paren(Box::new(Paren {
21663 this: Expression::ArrayOverlaps(Box::new(BinaryOp {
21664 left: arr1.clone(),
21665 right: arr2.clone(),
21666 left_comments: vec![],
21667 operator_comments: vec![],
21668 trailing_comments: vec![],
21669 inferred_type: None,
21670 })),
21671 trailing_comments: vec![],
21672 }));
21673
21674 // ARRAY_LENGTH(arr1) <> LIST_COUNT(arr1)
21675 let arr1_has_null = Expression::Neq(Box::new(BinaryOp {
21676 left: Expression::Function(Box::new(Function::new(
21677 "ARRAY_LENGTH".to_string(),
21678 vec![arr1.clone()],
21679 ))),
21680 right: Expression::Function(Box::new(Function::new(
21681 "LIST_COUNT".to_string(),
21682 vec![arr1],
21683 ))),
21684 left_comments: vec![],
21685 operator_comments: vec![],
21686 trailing_comments: vec![],
21687 inferred_type: None,
21688 }));
21689
21690 // ARRAY_LENGTH(arr2) <> LIST_COUNT(arr2)
21691 let arr2_has_null = Expression::Neq(Box::new(BinaryOp {
21692 left: Expression::Function(Box::new(Function::new(
21693 "ARRAY_LENGTH".to_string(),
21694 vec![arr2.clone()],
21695 ))),
21696 right: Expression::Function(Box::new(Function::new(
21697 "LIST_COUNT".to_string(),
21698 vec![arr2],
21699 ))),
21700 left_comments: vec![],
21701 operator_comments: vec![],
21702 trailing_comments: vec![],
21703 inferred_type: None,
21704 }));
21705
21706 // (ARRAY_LENGTH(arr1) <> LIST_COUNT(arr1) AND ARRAY_LENGTH(arr2) <> LIST_COUNT(arr2))
21707 let null_check = Expression::Paren(Box::new(Paren {
21708 this: Expression::And(Box::new(BinaryOp {
21709 left: arr1_has_null,
21710 right: arr2_has_null,
21711 left_comments: vec![],
21712 operator_comments: vec![],
21713 trailing_comments: vec![],
21714 inferred_type: None,
21715 })),
21716 trailing_comments: vec![],
21717 }));
21718
21719 // (arr1 && arr2) OR (null_check)
21720 Ok(Expression::Or(Box::new(BinaryOp {
21721 left: overlap,
21722 right: null_check,
21723 left_comments: vec![],
21724 operator_comments: vec![],
21725 trailing_comments: vec![],
21726 inferred_type: None,
21727 })))
21728 }
21729 // ARRAY_INTERSECTION([1, 2], [2, 3]) from Snowflake -> DuckDB:
21730 // Bag semantics using LIST_TRANSFORM/LIST_FILTER with GENERATE_SERIES
21731 "ARRAY_INTERSECTION"
21732 if f.args.len() == 2
21733 && matches!(source, DialectType::Snowflake)
21734 && matches!(target, DialectType::DuckDB) =>
21735 {
21736 let mut args = f.args;
21737 let arr1 = args.remove(0);
21738 let arr2 = args.remove(0);
21739
21740 // Build: arr1 IS NULL
21741 let arr1_is_null = Expression::IsNull(Box::new(IsNull {
21742 this: arr1.clone(),
21743 not: false,
21744 postfix_form: false,
21745 }));
21746 let arr2_is_null = Expression::IsNull(Box::new(IsNull {
21747 this: arr2.clone(),
21748 not: false,
21749 postfix_form: false,
21750 }));
21751 let null_check = Expression::Or(Box::new(BinaryOp {
21752 left: arr1_is_null,
21753 right: arr2_is_null,
21754 left_comments: vec![],
21755 operator_comments: vec![],
21756 trailing_comments: vec![],
21757 inferred_type: None,
21758 }));
21759
21760 // GENERATE_SERIES(1, LENGTH(arr1))
21761 let gen_series = Expression::Function(Box::new(Function::new(
21762 "GENERATE_SERIES".to_string(),
21763 vec![
21764 Expression::number(1),
21765 Expression::Function(Box::new(Function::new(
21766 "LENGTH".to_string(),
21767 vec![arr1.clone()],
21768 ))),
21769 ],
21770 )));
21771
21772 // LIST_ZIP(arr1, GENERATE_SERIES(1, LENGTH(arr1)))
21773 let list_zip = Expression::Function(Box::new(Function::new(
21774 "LIST_ZIP".to_string(),
21775 vec![arr1.clone(), gen_series],
21776 )));
21777
21778 // pair[1] and pair[2]
21779 let pair_col = Expression::column("pair");
21780 let pair_1 = Expression::Subscript(Box::new(
21781 crate::expressions::Subscript {
21782 this: pair_col.clone(),
21783 index: Expression::number(1),
21784 },
21785 ));
21786 let pair_2 = Expression::Subscript(Box::new(
21787 crate::expressions::Subscript {
21788 this: pair_col.clone(),
21789 index: Expression::number(2),
21790 },
21791 ));
21792
21793 // arr1[1:pair[2]]
21794 let arr1_slice = Expression::ArraySlice(Box::new(
21795 crate::expressions::ArraySlice {
21796 this: arr1.clone(),
21797 start: Some(Expression::number(1)),
21798 end: Some(pair_2),
21799 },
21800 ));
21801
21802 // e IS NOT DISTINCT FROM pair[1]
21803 let e_col = Expression::column("e");
21804 let is_not_distinct = Expression::NullSafeEq(Box::new(BinaryOp {
21805 left: e_col.clone(),
21806 right: pair_1.clone(),
21807 left_comments: vec![],
21808 operator_comments: vec![],
21809 trailing_comments: vec![],
21810 inferred_type: None,
21811 }));
21812
21813 // e -> e IS NOT DISTINCT FROM pair[1]
21814 let inner_lambda1 =
21815 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
21816 parameters: vec![crate::expressions::Identifier::new("e")],
21817 body: is_not_distinct,
21818 colon: false,
21819 parameter_types: vec![],
21820 }));
21821
21822 // LIST_FILTER(arr1[1:pair[2]], e -> e IS NOT DISTINCT FROM pair[1])
21823 let inner_filter1 = Expression::Function(Box::new(Function::new(
21824 "LIST_FILTER".to_string(),
21825 vec![arr1_slice, inner_lambda1],
21826 )));
21827
21828 // LENGTH(LIST_FILTER(arr1[1:pair[2]], ...))
21829 let len1 = Expression::Function(Box::new(Function::new(
21830 "LENGTH".to_string(),
21831 vec![inner_filter1],
21832 )));
21833
21834 // e -> e IS NOT DISTINCT FROM pair[1]
21835 let inner_lambda2 =
21836 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
21837 parameters: vec![crate::expressions::Identifier::new("e")],
21838 body: Expression::NullSafeEq(Box::new(BinaryOp {
21839 left: e_col,
21840 right: pair_1.clone(),
21841 left_comments: vec![],
21842 operator_comments: vec![],
21843 trailing_comments: vec![],
21844 inferred_type: None,
21845 })),
21846 colon: false,
21847 parameter_types: vec![],
21848 }));
21849
21850 // LIST_FILTER(arr2, e -> e IS NOT DISTINCT FROM pair[1])
21851 let inner_filter2 = Expression::Function(Box::new(Function::new(
21852 "LIST_FILTER".to_string(),
21853 vec![arr2.clone(), inner_lambda2],
21854 )));
21855
21856 // LENGTH(LIST_FILTER(arr2, ...))
21857 let len2 = Expression::Function(Box::new(Function::new(
21858 "LENGTH".to_string(),
21859 vec![inner_filter2],
21860 )));
21861
21862 // LENGTH(...) <= LENGTH(...)
21863 let cond = Expression::Paren(Box::new(Paren {
21864 this: Expression::Lte(Box::new(BinaryOp {
21865 left: len1,
21866 right: len2,
21867 left_comments: vec![],
21868 operator_comments: vec![],
21869 trailing_comments: vec![],
21870 inferred_type: None,
21871 })),
21872 trailing_comments: vec![],
21873 }));
21874
21875 // pair -> (condition)
21876 let filter_lambda =
21877 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
21878 parameters: vec![crate::expressions::Identifier::new(
21879 "pair",
21880 )],
21881 body: cond,
21882 colon: false,
21883 parameter_types: vec![],
21884 }));
21885
21886 // LIST_FILTER(LIST_ZIP(...), pair -> ...)
21887 let outer_filter = Expression::Function(Box::new(Function::new(
21888 "LIST_FILTER".to_string(),
21889 vec![list_zip, filter_lambda],
21890 )));
21891
21892 // pair -> pair[1]
21893 let transform_lambda =
21894 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
21895 parameters: vec![crate::expressions::Identifier::new(
21896 "pair",
21897 )],
21898 body: pair_1,
21899 colon: false,
21900 parameter_types: vec![],
21901 }));
21902
21903 // LIST_TRANSFORM(LIST_FILTER(...), pair -> pair[1])
21904 let list_transform = Expression::Function(Box::new(Function::new(
21905 "LIST_TRANSFORM".to_string(),
21906 vec![outer_filter, transform_lambda],
21907 )));
21908
21909 // CASE WHEN arr1 IS NULL OR arr2 IS NULL THEN NULL
21910 // ELSE LIST_TRANSFORM(LIST_FILTER(...), pair -> pair[1])
21911 // END
21912 Ok(Expression::Case(Box::new(Case {
21913 operand: None,
21914 whens: vec![(null_check, Expression::Null(Null))],
21915 else_: Some(list_transform),
21916 comments: vec![],
21917 inferred_type: None,
21918 })))
21919 }
21920 // ARRAY_CONSTRUCT(args) -> Expression::Array for all targets
21921 "ARRAY_CONSTRUCT" => {
21922 if matches!(target, DialectType::Snowflake) {
21923 Ok(Expression::Function(f))
21924 } else {
21925 Ok(Expression::Array(Box::new(crate::expressions::Array {
21926 expressions: f.args,
21927 })))
21928 }
21929 }
21930 // ARRAY(args) function -> Expression::Array for DuckDB/Snowflake/Presto/Trino/Athena
21931 "ARRAY"
21932 if !f.args.iter().any(|a| {
21933 matches!(a, Expression::Select(_) | Expression::Subquery(_))
21934 }) =>
21935 {
21936 match target {
21937 DialectType::DuckDB
21938 | DialectType::Snowflake
21939 | DialectType::Presto
21940 | DialectType::Trino
21941 | DialectType::Athena => {
21942 Ok(Expression::Array(Box::new(crate::expressions::Array {
21943 expressions: f.args,
21944 })))
21945 }
21946 _ => Ok(Expression::Function(f)),
21947 }
21948 }
21949 _ => Ok(Expression::Function(f)),
21950 }
21951 } else if let Expression::AggregateFunction(mut af) = e {
21952 let name = af.name.to_ascii_uppercase();
21953 match name.as_str() {
21954 "ARBITRARY" if af.args.len() == 1 => {
21955 let arg = af.args.into_iter().next().unwrap();
21956 Ok(convert_arbitrary(arg, target))
21957 }
21958 "JSON_ARRAYAGG" => {
21959 match target {
21960 DialectType::PostgreSQL => {
21961 af.name = "JSON_AGG".to_string();
21962 // Add NULLS FIRST to ORDER BY items for PostgreSQL
21963 for ordered in af.order_by.iter_mut() {
21964 if ordered.nulls_first.is_none() {
21965 ordered.nulls_first = Some(true);
21966 }
21967 }
21968 Ok(Expression::AggregateFunction(af))
21969 }
21970 _ => Ok(Expression::AggregateFunction(af)),
21971 }
21972 }
21973 _ => Ok(Expression::AggregateFunction(af)),
21974 }
21975 } else if let Expression::JSONArrayAgg(ja) = e {
21976 // JSONArrayAgg -> JSON_AGG for PostgreSQL, JSON_ARRAYAGG for others
21977 match target {
21978 DialectType::PostgreSQL => {
21979 let mut order_by = Vec::new();
21980 if let Some(order_expr) = ja.order {
21981 if let Expression::OrderBy(ob) = *order_expr {
21982 for mut ordered in ob.expressions {
21983 if ordered.nulls_first.is_none() {
21984 ordered.nulls_first = Some(true);
21985 }
21986 order_by.push(ordered);
21987 }
21988 }
21989 }
21990 Ok(Expression::AggregateFunction(Box::new(
21991 crate::expressions::AggregateFunction {
21992 name: "JSON_AGG".to_string(),
21993 args: vec![*ja.this],
21994 distinct: false,
21995 filter: None,
21996 order_by,
21997 limit: None,
21998 ignore_nulls: None,
21999 inferred_type: None,
22000 },
22001 )))
22002 }
22003 _ => Ok(Expression::JSONArrayAgg(ja)),
22004 }
22005 } else if let Expression::JSONArray(ja) = e {
22006 match target {
22007 DialectType::Snowflake
22008 if ja.null_handling.is_none()
22009 && ja.return_type.is_none()
22010 && ja.strict.is_none() =>
22011 {
22012 let array_construct = Expression::ArrayFunc(Box::new(
22013 crate::expressions::ArrayConstructor {
22014 expressions: ja.expressions,
22015 bracket_notation: false,
22016 use_list_keyword: false,
22017 },
22018 ));
22019 Ok(Expression::Function(Box::new(Function::new(
22020 "TO_VARIANT".to_string(),
22021 vec![array_construct],
22022 ))))
22023 }
22024 _ => Ok(Expression::JSONArray(ja)),
22025 }
22026 } else if let Expression::JsonArray(f) = e {
22027 match target {
22028 DialectType::Snowflake => {
22029 let array_construct = Expression::ArrayFunc(Box::new(
22030 crate::expressions::ArrayConstructor {
22031 expressions: f.expressions,
22032 bracket_notation: false,
22033 use_list_keyword: false,
22034 },
22035 ));
22036 Ok(Expression::Function(Box::new(Function::new(
22037 "TO_VARIANT".to_string(),
22038 vec![array_construct],
22039 ))))
22040 }
22041 _ => Ok(Expression::JsonArray(f)),
22042 }
22043 } else if let Expression::CombinedParameterizedAgg(cpa) = e {
22044 let function_name = match cpa.this.as_ref() {
22045 Expression::Identifier(ident) => Some(ident.name.as_str()),
22046 _ => None,
22047 };
22048 match function_name {
22049 Some(name)
22050 if name.eq_ignore_ascii_case("groupConcat")
22051 && cpa.expressions.len() == 1 =>
22052 {
22053 match target {
22054 DialectType::MySQL | DialectType::SingleStore => {
22055 let this = cpa.expressions[0].clone();
22056 let separator = cpa.params.first().cloned();
22057 Ok(Expression::GroupConcat(Box::new(
22058 crate::expressions::GroupConcatFunc {
22059 this,
22060 separator,
22061 order_by: None,
22062 distinct: false,
22063 filter: None,
22064 limit: None,
22065 inferred_type: None,
22066 },
22067 )))
22068 }
22069 DialectType::DuckDB => Ok(Expression::ListAgg(Box::new({
22070 let this = cpa.expressions[0].clone();
22071 let separator = cpa.params.first().cloned();
22072 crate::expressions::ListAggFunc {
22073 this,
22074 separator,
22075 on_overflow: None,
22076 order_by: None,
22077 distinct: false,
22078 filter: None,
22079 inferred_type: None,
22080 }
22081 }))),
22082 _ => Ok(Expression::CombinedParameterizedAgg(cpa)),
22083 }
22084 }
22085 _ => Ok(Expression::CombinedParameterizedAgg(cpa)),
22086 }
22087 } else if let Expression::ToNumber(tn) = e {
22088 // TO_NUMBER(x) with no format/precision/scale -> CAST(x AS DOUBLE)
22089 let arg = *tn.this;
22090 Ok(Expression::Cast(Box::new(crate::expressions::Cast {
22091 this: arg,
22092 to: crate::expressions::DataType::Double {
22093 precision: None,
22094 scale: None,
22095 },
22096 double_colon_syntax: false,
22097 trailing_comments: Vec::new(),
22098 format: None,
22099 default: None,
22100 inferred_type: None,
22101 })))
22102 } else {
22103 Ok(e)
22104 }
22105 }
22106
22107 Action::RegexpLikeToDuckDB => {
22108 if let Expression::RegexpLike(f) = e {
22109 let mut args = vec![f.this, f.pattern];
22110 if let Some(flags) = f.flags {
22111 args.push(flags);
22112 }
22113 Ok(Expression::Function(Box::new(Function::new(
22114 "REGEXP_MATCHES".to_string(),
22115 args,
22116 ))))
22117 } else {
22118 Ok(e)
22119 }
22120 }
22121 Action::EpochConvert => {
22122 if let Expression::Epoch(f) = e {
22123 let arg = f.this;
22124 let name = match target {
22125 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
22126 "UNIX_TIMESTAMP"
22127 }
22128 DialectType::Presto | DialectType::Trino => "TO_UNIXTIME",
22129 DialectType::BigQuery => "TIME_TO_UNIX",
22130 _ => "EPOCH",
22131 };
22132 Ok(Expression::Function(Box::new(Function::new(
22133 name.to_string(),
22134 vec![arg],
22135 ))))
22136 } else {
22137 Ok(e)
22138 }
22139 }
22140 Action::EpochMsConvert => {
22141 use crate::expressions::{BinaryOp, Cast};
22142 if let Expression::EpochMs(f) = e {
22143 let arg = f.this;
22144 match target {
22145 DialectType::Spark | DialectType::Databricks => {
22146 Ok(Expression::Function(Box::new(Function::new(
22147 "TIMESTAMP_MILLIS".to_string(),
22148 vec![arg],
22149 ))))
22150 }
22151 DialectType::BigQuery => Ok(Expression::Function(Box::new(
22152 Function::new("TIMESTAMP_MILLIS".to_string(), vec![arg]),
22153 ))),
22154 DialectType::Presto | DialectType::Trino => {
22155 // FROM_UNIXTIME(CAST(x AS DOUBLE) / POW(10, 3))
22156 let cast_arg = Expression::Cast(Box::new(Cast {
22157 this: arg,
22158 to: DataType::Double {
22159 precision: None,
22160 scale: None,
22161 },
22162 trailing_comments: Vec::new(),
22163 double_colon_syntax: false,
22164 format: None,
22165 default: None,
22166 inferred_type: None,
22167 }));
22168 let div = Expression::Div(Box::new(BinaryOp::new(
22169 cast_arg,
22170 Expression::Function(Box::new(Function::new(
22171 "POW".to_string(),
22172 vec![Expression::number(10), Expression::number(3)],
22173 ))),
22174 )));
22175 Ok(Expression::Function(Box::new(Function::new(
22176 "FROM_UNIXTIME".to_string(),
22177 vec![div],
22178 ))))
22179 }
22180 DialectType::MySQL => {
22181 // FROM_UNIXTIME(x / POWER(10, 3))
22182 let div = Expression::Div(Box::new(BinaryOp::new(
22183 arg,
22184 Expression::Function(Box::new(Function::new(
22185 "POWER".to_string(),
22186 vec![Expression::number(10), Expression::number(3)],
22187 ))),
22188 )));
22189 Ok(Expression::Function(Box::new(Function::new(
22190 "FROM_UNIXTIME".to_string(),
22191 vec![div],
22192 ))))
22193 }
22194 DialectType::PostgreSQL | DialectType::Redshift => {
22195 // TO_TIMESTAMP(CAST(x AS DOUBLE PRECISION) / POWER(10, 3))
22196 let cast_arg = Expression::Cast(Box::new(Cast {
22197 this: arg,
22198 to: DataType::Custom {
22199 name: "DOUBLE PRECISION".to_string(),
22200 },
22201 trailing_comments: Vec::new(),
22202 double_colon_syntax: false,
22203 format: None,
22204 default: None,
22205 inferred_type: None,
22206 }));
22207 let div = Expression::Div(Box::new(BinaryOp::new(
22208 cast_arg,
22209 Expression::Function(Box::new(Function::new(
22210 "POWER".to_string(),
22211 vec![Expression::number(10), Expression::number(3)],
22212 ))),
22213 )));
22214 Ok(Expression::Function(Box::new(Function::new(
22215 "TO_TIMESTAMP".to_string(),
22216 vec![div],
22217 ))))
22218 }
22219 DialectType::ClickHouse => {
22220 // fromUnixTimestamp64Milli(CAST(x AS Nullable(Int64)))
22221 let cast_arg = Expression::Cast(Box::new(Cast {
22222 this: arg,
22223 to: DataType::Nullable {
22224 inner: Box::new(DataType::BigInt { length: None }),
22225 },
22226 trailing_comments: Vec::new(),
22227 double_colon_syntax: false,
22228 format: None,
22229 default: None,
22230 inferred_type: None,
22231 }));
22232 Ok(Expression::Function(Box::new(Function::new(
22233 "fromUnixTimestamp64Milli".to_string(),
22234 vec![cast_arg],
22235 ))))
22236 }
22237 _ => Ok(Expression::Function(Box::new(Function::new(
22238 "EPOCH_MS".to_string(),
22239 vec![arg],
22240 )))),
22241 }
22242 } else {
22243 Ok(e)
22244 }
22245 }
22246 Action::TSQLTypeNormalize => {
22247 if let Expression::DataType(dt) = e {
22248 let new_dt = match &dt {
22249 DataType::Custom { name } if name.eq_ignore_ascii_case("MONEY") => {
22250 DataType::Decimal {
22251 precision: Some(15),
22252 scale: Some(4),
22253 }
22254 }
22255 DataType::Custom { name }
22256 if name.eq_ignore_ascii_case("SMALLMONEY") =>
22257 {
22258 DataType::Decimal {
22259 precision: Some(6),
22260 scale: Some(4),
22261 }
22262 }
22263 DataType::Custom { name } if name.eq_ignore_ascii_case("DATETIME2") => {
22264 DataType::Timestamp {
22265 timezone: false,
22266 precision: None,
22267 }
22268 }
22269 DataType::Custom { name } if name.eq_ignore_ascii_case("REAL") => {
22270 DataType::Float {
22271 precision: None,
22272 scale: None,
22273 real_spelling: false,
22274 }
22275 }
22276 DataType::Float {
22277 real_spelling: true,
22278 ..
22279 } => DataType::Float {
22280 precision: None,
22281 scale: None,
22282 real_spelling: false,
22283 },
22284 DataType::Custom { name } if name.eq_ignore_ascii_case("IMAGE") => {
22285 DataType::Custom {
22286 name: "BLOB".to_string(),
22287 }
22288 }
22289 DataType::Custom { name } if name.eq_ignore_ascii_case("BIT") => {
22290 DataType::Boolean
22291 }
22292 DataType::Custom { name }
22293 if name.eq_ignore_ascii_case("ROWVERSION") =>
22294 {
22295 DataType::Custom {
22296 name: "BINARY".to_string(),
22297 }
22298 }
22299 DataType::Custom { name }
22300 if name.eq_ignore_ascii_case("UNIQUEIDENTIFIER") =>
22301 {
22302 match target {
22303 DialectType::Spark
22304 | DialectType::Databricks
22305 | DialectType::Hive => DataType::Custom {
22306 name: "STRING".to_string(),
22307 },
22308 _ => DataType::VarChar {
22309 length: Some(36),
22310 parenthesized_length: true,
22311 },
22312 }
22313 }
22314 DataType::Custom { name }
22315 if name.eq_ignore_ascii_case("DATETIMEOFFSET") =>
22316 {
22317 match target {
22318 DialectType::Spark
22319 | DialectType::Databricks
22320 | DialectType::Hive => DataType::Timestamp {
22321 timezone: false,
22322 precision: None,
22323 },
22324 _ => DataType::Timestamp {
22325 timezone: true,
22326 precision: None,
22327 },
22328 }
22329 }
22330 DataType::Custom { ref name }
22331 if name.len() >= 10
22332 && name[..10].eq_ignore_ascii_case("DATETIME2(") =>
22333 {
22334 // DATETIME2(n) -> TIMESTAMP
22335 DataType::Timestamp {
22336 timezone: false,
22337 precision: None,
22338 }
22339 }
22340 DataType::Custom { ref name }
22341 if name.len() >= 5 && name[..5].eq_ignore_ascii_case("TIME(") =>
22342 {
22343 // TIME(n) -> TIMESTAMP for Spark, keep as TIME for others
22344 match target {
22345 DialectType::Spark
22346 | DialectType::Databricks
22347 | DialectType::Hive => DataType::Timestamp {
22348 timezone: false,
22349 precision: None,
22350 },
22351 _ => return Ok(Expression::DataType(dt)),
22352 }
22353 }
22354 DataType::Custom { ref name }
22355 if name.len() >= 7 && name[..7].eq_ignore_ascii_case("NUMERIC") =>
22356 {
22357 // Parse NUMERIC(p,s) back to Decimal(p,s)
22358 let upper = name.to_ascii_uppercase();
22359 if let Some(inner) = upper
22360 .strip_prefix("NUMERIC(")
22361 .and_then(|s| s.strip_suffix(')'))
22362 {
22363 let parts: Vec<&str> = inner.split(',').collect();
22364 let precision =
22365 parts.first().and_then(|s| s.trim().parse::<u32>().ok());
22366 let scale =
22367 parts.get(1).and_then(|s| s.trim().parse::<u32>().ok());
22368 DataType::Decimal { precision, scale }
22369 } else if upper == "NUMERIC" {
22370 DataType::Decimal {
22371 precision: None,
22372 scale: None,
22373 }
22374 } else {
22375 return Ok(Expression::DataType(dt));
22376 }
22377 }
22378 DataType::Float {
22379 precision: Some(p), ..
22380 } => {
22381 // For Hive/Spark: FLOAT(1-32) -> FLOAT, FLOAT(33+) -> DOUBLE (IEEE 754 boundary)
22382 // For other targets: FLOAT(1-24) -> FLOAT, FLOAT(25+) -> DOUBLE (TSQL boundary)
22383 let boundary = match target {
22384 DialectType::Hive
22385 | DialectType::Spark
22386 | DialectType::Databricks => 32,
22387 _ => 24,
22388 };
22389 if *p <= boundary {
22390 DataType::Float {
22391 precision: None,
22392 scale: None,
22393 real_spelling: false,
22394 }
22395 } else {
22396 DataType::Double {
22397 precision: None,
22398 scale: None,
22399 }
22400 }
22401 }
22402 DataType::TinyInt { .. } => match target {
22403 DialectType::DuckDB => DataType::Custom {
22404 name: "UTINYINT".to_string(),
22405 },
22406 DialectType::Hive
22407 | DialectType::Spark
22408 | DialectType::Databricks => DataType::SmallInt { length: None },
22409 _ => return Ok(Expression::DataType(dt)),
22410 },
22411 // INTEGER -> INT for Spark/Databricks
22412 DataType::Int {
22413 length,
22414 integer_spelling: true,
22415 } => DataType::Int {
22416 length: *length,
22417 integer_spelling: false,
22418 },
22419 _ => return Ok(Expression::DataType(dt)),
22420 };
22421 Ok(Expression::DataType(new_dt))
22422 } else {
22423 Ok(e)
22424 }
22425 }
22426 Action::MySQLSafeDivide => {
22427 use crate::expressions::{BinaryOp, Cast};
22428 if let Expression::Div(op) = e {
22429 let left = op.left;
22430 let right = op.right;
22431 // For SQLite: CAST left as REAL but NO NULLIF wrapping
22432 if matches!(target, DialectType::SQLite) {
22433 let new_left = Expression::Cast(Box::new(Cast {
22434 this: left,
22435 to: DataType::Float {
22436 precision: None,
22437 scale: None,
22438 real_spelling: true,
22439 },
22440 trailing_comments: Vec::new(),
22441 double_colon_syntax: false,
22442 format: None,
22443 default: None,
22444 inferred_type: None,
22445 }));
22446 return Ok(Expression::Div(Box::new(BinaryOp::new(new_left, right))));
22447 }
22448 // Wrap right in NULLIF(right, 0)
22449 let nullif_right = Expression::Function(Box::new(Function::new(
22450 "NULLIF".to_string(),
22451 vec![right, Expression::number(0)],
22452 )));
22453 // For some dialects, also CAST the left side
22454 let new_left = match target {
22455 DialectType::PostgreSQL
22456 | DialectType::Redshift
22457 | DialectType::Teradata
22458 | DialectType::Materialize
22459 | DialectType::RisingWave => Expression::Cast(Box::new(Cast {
22460 this: left,
22461 to: DataType::Custom {
22462 name: "DOUBLE PRECISION".to_string(),
22463 },
22464 trailing_comments: Vec::new(),
22465 double_colon_syntax: false,
22466 format: None,
22467 default: None,
22468 inferred_type: None,
22469 })),
22470 DialectType::Drill
22471 | DialectType::Trino
22472 | DialectType::Presto
22473 | DialectType::Athena => Expression::Cast(Box::new(Cast {
22474 this: left,
22475 to: DataType::Double {
22476 precision: None,
22477 scale: None,
22478 },
22479 trailing_comments: Vec::new(),
22480 double_colon_syntax: false,
22481 format: None,
22482 default: None,
22483 inferred_type: None,
22484 })),
22485 DialectType::TSQL => Expression::Cast(Box::new(Cast {
22486 this: left,
22487 to: DataType::Float {
22488 precision: None,
22489 scale: None,
22490 real_spelling: false,
22491 },
22492 trailing_comments: Vec::new(),
22493 double_colon_syntax: false,
22494 format: None,
22495 default: None,
22496 inferred_type: None,
22497 })),
22498 _ => left,
22499 };
22500 Ok(Expression::Div(Box::new(BinaryOp::new(
22501 new_left,
22502 nullif_right,
22503 ))))
22504 } else {
22505 Ok(e)
22506 }
22507 }
22508 Action::AlterTableRenameStripSchema => {
22509 if let Expression::AlterTable(mut at) = e {
22510 if let Some(crate::expressions::AlterTableAction::RenameTable(
22511 ref mut new_tbl,
22512 )) = at.actions.first_mut()
22513 {
22514 new_tbl.schema = None;
22515 new_tbl.catalog = None;
22516 }
22517 Ok(Expression::AlterTable(at))
22518 } else {
22519 Ok(e)
22520 }
22521 }
22522 Action::NullsOrdering => {
22523 // Fill in the source dialect's implied null ordering default.
22524 // This makes implicit null ordering explicit so the target generator
22525 // can correctly strip or keep it.
22526 //
22527 // Dialect null ordering categories:
22528 // nulls_are_large (Oracle, PostgreSQL, Redshift, Snowflake):
22529 // ASC -> NULLS LAST, DESC -> NULLS FIRST
22530 // nulls_are_small (Spark, Hive, BigQuery, MySQL, Databricks, ClickHouse, etc.):
22531 // ASC -> NULLS FIRST, DESC -> NULLS LAST
22532 // nulls_are_last (DuckDB, Presto, Trino, Dremio, Athena):
22533 // NULLS LAST always (both ASC and DESC)
22534 if let Expression::Ordered(mut o) = e {
22535 let is_asc = !o.desc;
22536
22537 let is_source_nulls_large = matches!(
22538 source,
22539 DialectType::Oracle
22540 | DialectType::PostgreSQL
22541 | DialectType::Redshift
22542 | DialectType::Snowflake
22543 );
22544 let is_source_nulls_last = matches!(
22545 source,
22546 DialectType::DuckDB
22547 | DialectType::Presto
22548 | DialectType::Trino
22549 | DialectType::Dremio
22550 | DialectType::Athena
22551 | DialectType::ClickHouse
22552 | DialectType::Drill
22553 | DialectType::Exasol
22554 | DialectType::DataFusion
22555 );
22556
22557 // Determine target category to check if default matches
22558 let is_target_nulls_large = matches!(
22559 target,
22560 DialectType::Oracle
22561 | DialectType::PostgreSQL
22562 | DialectType::Redshift
22563 | DialectType::Snowflake
22564 );
22565 let is_target_nulls_last = matches!(
22566 target,
22567 DialectType::DuckDB
22568 | DialectType::Presto
22569 | DialectType::Trino
22570 | DialectType::Dremio
22571 | DialectType::Athena
22572 | DialectType::ClickHouse
22573 | DialectType::Drill
22574 | DialectType::Exasol
22575 | DialectType::DataFusion
22576 );
22577
22578 // Compute the implied nulls_first for source
22579 let source_nulls_first = if is_source_nulls_large {
22580 !is_asc // ASC -> NULLS LAST (false), DESC -> NULLS FIRST (true)
22581 } else if is_source_nulls_last {
22582 false // NULLS LAST always
22583 } else {
22584 is_asc // nulls_are_small: ASC -> NULLS FIRST (true), DESC -> NULLS LAST (false)
22585 };
22586
22587 // Compute the target's default
22588 let target_nulls_first = if is_target_nulls_large {
22589 !is_asc
22590 } else if is_target_nulls_last {
22591 false
22592 } else {
22593 is_asc
22594 };
22595
22596 // Only add explicit nulls ordering if source and target defaults differ
22597 if source_nulls_first != target_nulls_first {
22598 o.nulls_first = Some(source_nulls_first);
22599 }
22600 // If they match, leave nulls_first as None so the generator won't output it
22601
22602 Ok(Expression::Ordered(o))
22603 } else {
22604 Ok(e)
22605 }
22606 }
22607 Action::StringAggConvert => {
22608 match e {
22609 Expression::WithinGroup(wg) => {
22610 // STRING_AGG(x, sep) WITHIN GROUP (ORDER BY z) -> target-specific
22611 // Extract args and distinct flag from either Function, AggregateFunction, or StringAgg
22612 let (x_opt, sep_opt, distinct) = match wg.this {
22613 Expression::AggregateFunction(ref af)
22614 if af.name.eq_ignore_ascii_case("STRING_AGG")
22615 && af.args.len() >= 2 =>
22616 {
22617 (
22618 Some(af.args[0].clone()),
22619 Some(af.args[1].clone()),
22620 af.distinct,
22621 )
22622 }
22623 Expression::Function(ref f)
22624 if f.name.eq_ignore_ascii_case("STRING_AGG")
22625 && f.args.len() >= 2 =>
22626 {
22627 (Some(f.args[0].clone()), Some(f.args[1].clone()), false)
22628 }
22629 Expression::StringAgg(ref sa) => {
22630 (Some(sa.this.clone()), sa.separator.clone(), sa.distinct)
22631 }
22632 _ => (None, None, false),
22633 };
22634 if let (Some(x), Some(sep)) = (x_opt, sep_opt) {
22635 let order_by = wg.order_by;
22636
22637 match target {
22638 DialectType::TSQL | DialectType::Fabric => {
22639 // Keep as WithinGroup(StringAgg) for TSQL
22640 Ok(Expression::WithinGroup(Box::new(
22641 crate::expressions::WithinGroup {
22642 this: Expression::StringAgg(Box::new(
22643 crate::expressions::StringAggFunc {
22644 this: x,
22645 separator: Some(sep),
22646 order_by: None, // order_by goes in WithinGroup, not StringAgg
22647 distinct,
22648 filter: None,
22649 limit: None,
22650 inferred_type: None,
22651 },
22652 )),
22653 order_by,
22654 },
22655 )))
22656 }
22657 DialectType::MySQL
22658 | DialectType::SingleStore
22659 | DialectType::Doris
22660 | DialectType::StarRocks => {
22661 // GROUP_CONCAT(x ORDER BY z SEPARATOR sep)
22662 Ok(Expression::GroupConcat(Box::new(
22663 crate::expressions::GroupConcatFunc {
22664 this: x,
22665 separator: Some(sep),
22666 order_by: Some(order_by),
22667 distinct,
22668 filter: None,
22669 limit: None,
22670 inferred_type: None,
22671 },
22672 )))
22673 }
22674 DialectType::SQLite => {
22675 // GROUP_CONCAT(x, sep) - no ORDER BY support
22676 Ok(Expression::GroupConcat(Box::new(
22677 crate::expressions::GroupConcatFunc {
22678 this: x,
22679 separator: Some(sep),
22680 order_by: None,
22681 distinct,
22682 filter: None,
22683 limit: None,
22684 inferred_type: None,
22685 },
22686 )))
22687 }
22688 DialectType::PostgreSQL | DialectType::Redshift => {
22689 // STRING_AGG(x, sep ORDER BY z)
22690 Ok(Expression::StringAgg(Box::new(
22691 crate::expressions::StringAggFunc {
22692 this: x,
22693 separator: Some(sep),
22694 order_by: Some(order_by),
22695 distinct,
22696 filter: None,
22697 limit: None,
22698 inferred_type: None,
22699 },
22700 )))
22701 }
22702 _ => {
22703 // Default: keep as STRING_AGG(x, sep) with ORDER BY inside
22704 Ok(Expression::StringAgg(Box::new(
22705 crate::expressions::StringAggFunc {
22706 this: x,
22707 separator: Some(sep),
22708 order_by: Some(order_by),
22709 distinct,
22710 filter: None,
22711 limit: None,
22712 inferred_type: None,
22713 },
22714 )))
22715 }
22716 }
22717 } else {
22718 Ok(Expression::WithinGroup(wg))
22719 }
22720 }
22721 Expression::StringAgg(sa) => {
22722 match target {
22723 DialectType::MySQL
22724 | DialectType::SingleStore
22725 | DialectType::Doris
22726 | DialectType::StarRocks => {
22727 // STRING_AGG(x, sep) -> GROUP_CONCAT(x SEPARATOR sep)
22728 Ok(Expression::GroupConcat(Box::new(
22729 crate::expressions::GroupConcatFunc {
22730 this: sa.this,
22731 separator: sa.separator,
22732 order_by: sa.order_by,
22733 distinct: sa.distinct,
22734 filter: sa.filter,
22735 limit: None,
22736 inferred_type: None,
22737 },
22738 )))
22739 }
22740 DialectType::SQLite => {
22741 // STRING_AGG(x, sep) -> GROUP_CONCAT(x, sep)
22742 Ok(Expression::GroupConcat(Box::new(
22743 crate::expressions::GroupConcatFunc {
22744 this: sa.this,
22745 separator: sa.separator,
22746 order_by: None, // SQLite doesn't support ORDER BY in GROUP_CONCAT
22747 distinct: sa.distinct,
22748 filter: sa.filter,
22749 limit: None,
22750 inferred_type: None,
22751 },
22752 )))
22753 }
22754 DialectType::Spark | DialectType::Databricks => {
22755 // STRING_AGG(x, sep) -> LISTAGG(x, sep)
22756 Ok(Expression::ListAgg(Box::new(
22757 crate::expressions::ListAggFunc {
22758 this: sa.this,
22759 separator: sa.separator,
22760 on_overflow: None,
22761 order_by: sa.order_by,
22762 distinct: sa.distinct,
22763 filter: None,
22764 inferred_type: None,
22765 },
22766 )))
22767 }
22768 _ => Ok(Expression::StringAgg(sa)),
22769 }
22770 }
22771 _ => Ok(e),
22772 }
22773 }
22774 Action::GroupConcatConvert => {
22775 // Helper to expand CONCAT(a, b, c) -> a || b || c (for PostgreSQL/SQLite)
22776 // or CONCAT(a, b, c) -> a + b + c (for TSQL)
22777 fn expand_concat_to_dpipe(expr: Expression) -> Expression {
22778 if let Expression::Function(ref f) = expr {
22779 if f.name.eq_ignore_ascii_case("CONCAT") && f.args.len() > 1 {
22780 let mut result = f.args[0].clone();
22781 for arg in &f.args[1..] {
22782 result = Expression::Concat(Box::new(BinaryOp {
22783 left: result,
22784 right: arg.clone(),
22785 left_comments: vec![],
22786 operator_comments: vec![],
22787 trailing_comments: vec![],
22788 inferred_type: None,
22789 }));
22790 }
22791 return result;
22792 }
22793 }
22794 expr
22795 }
22796 fn expand_concat_to_plus(expr: Expression) -> Expression {
22797 if let Expression::Function(ref f) = expr {
22798 if f.name.eq_ignore_ascii_case("CONCAT") && f.args.len() > 1 {
22799 let mut result = f.args[0].clone();
22800 for arg in &f.args[1..] {
22801 result = Expression::Add(Box::new(BinaryOp {
22802 left: result,
22803 right: arg.clone(),
22804 left_comments: vec![],
22805 operator_comments: vec![],
22806 trailing_comments: vec![],
22807 inferred_type: None,
22808 }));
22809 }
22810 return result;
22811 }
22812 }
22813 expr
22814 }
22815 // Helper to wrap each arg in CAST(arg AS VARCHAR) for Presto/Trino CONCAT
22816 fn wrap_concat_args_in_varchar_cast(expr: Expression) -> Expression {
22817 if let Expression::Function(ref f) = expr {
22818 if f.name.eq_ignore_ascii_case("CONCAT") && f.args.len() > 1 {
22819 let new_args: Vec<Expression> = f
22820 .args
22821 .iter()
22822 .map(|arg| {
22823 Expression::Cast(Box::new(crate::expressions::Cast {
22824 this: arg.clone(),
22825 to: crate::expressions::DataType::VarChar {
22826 length: None,
22827 parenthesized_length: false,
22828 },
22829 trailing_comments: Vec::new(),
22830 double_colon_syntax: false,
22831 format: None,
22832 default: None,
22833 inferred_type: None,
22834 }))
22835 })
22836 .collect();
22837 return Expression::Function(Box::new(
22838 crate::expressions::Function::new(
22839 "CONCAT".to_string(),
22840 new_args,
22841 ),
22842 ));
22843 }
22844 }
22845 expr
22846 }
22847 if let Expression::GroupConcat(gc) = e {
22848 match target {
22849 DialectType::Presto => {
22850 // GROUP_CONCAT(x [, sep]) -> ARRAY_JOIN(ARRAY_AGG(x), sep)
22851 let sep = gc.separator.unwrap_or(Expression::string(","));
22852 // For multi-arg CONCAT, wrap each arg in CAST(... AS VARCHAR)
22853 let this = wrap_concat_args_in_varchar_cast(gc.this);
22854 let array_agg =
22855 Expression::ArrayAgg(Box::new(crate::expressions::AggFunc {
22856 this,
22857 distinct: gc.distinct,
22858 filter: gc.filter,
22859 order_by: gc.order_by.unwrap_or_default(),
22860 name: None,
22861 ignore_nulls: None,
22862 having_max: None,
22863 limit: None,
22864 inferred_type: None,
22865 }));
22866 Ok(Expression::ArrayJoin(Box::new(
22867 crate::expressions::ArrayJoinFunc {
22868 this: array_agg,
22869 separator: sep,
22870 null_replacement: None,
22871 },
22872 )))
22873 }
22874 DialectType::Trino => {
22875 // GROUP_CONCAT(x [, sep]) -> LISTAGG(x, sep)
22876 let sep = gc.separator.unwrap_or(Expression::string(","));
22877 // For multi-arg CONCAT, wrap each arg in CAST(... AS VARCHAR)
22878 let this = wrap_concat_args_in_varchar_cast(gc.this);
22879 Ok(Expression::ListAgg(Box::new(
22880 crate::expressions::ListAggFunc {
22881 this,
22882 separator: Some(sep),
22883 on_overflow: None,
22884 order_by: gc.order_by,
22885 distinct: gc.distinct,
22886 filter: gc.filter,
22887 inferred_type: None,
22888 },
22889 )))
22890 }
22891 DialectType::PostgreSQL
22892 | DialectType::Redshift
22893 | DialectType::Snowflake
22894 | DialectType::DuckDB
22895 | DialectType::Hive
22896 | DialectType::ClickHouse => {
22897 // GROUP_CONCAT(x [, sep]) -> STRING_AGG(x, sep)
22898 let sep = gc.separator.unwrap_or(Expression::string(","));
22899 // Expand CONCAT(a,b,c) -> a || b || c for || dialects
22900 let this = expand_concat_to_dpipe(gc.this);
22901 // For PostgreSQL, add NULLS LAST for DESC / NULLS FIRST for ASC
22902 let order_by = if target == DialectType::PostgreSQL {
22903 gc.order_by.map(|ords| {
22904 ords.into_iter()
22905 .map(|mut o| {
22906 if o.nulls_first.is_none() {
22907 if o.desc {
22908 o.nulls_first = Some(false);
22909 // NULLS LAST
22910 } else {
22911 o.nulls_first = Some(true);
22912 // NULLS FIRST
22913 }
22914 }
22915 o
22916 })
22917 .collect()
22918 })
22919 } else {
22920 gc.order_by
22921 };
22922 Ok(Expression::StringAgg(Box::new(
22923 crate::expressions::StringAggFunc {
22924 this,
22925 separator: Some(sep),
22926 order_by,
22927 distinct: gc.distinct,
22928 filter: gc.filter,
22929 limit: None,
22930 inferred_type: None,
22931 },
22932 )))
22933 }
22934 DialectType::TSQL => {
22935 // GROUP_CONCAT(x [, sep]) -> STRING_AGG(x, sep) WITHIN GROUP (ORDER BY ...)
22936 // TSQL doesn't support DISTINCT in STRING_AGG
22937 let sep = gc.separator.unwrap_or(Expression::string(","));
22938 // Expand CONCAT(a,b,c) -> a + b + c for TSQL
22939 let this = expand_concat_to_plus(gc.this);
22940 Ok(Expression::StringAgg(Box::new(
22941 crate::expressions::StringAggFunc {
22942 this,
22943 separator: Some(sep),
22944 order_by: gc.order_by,
22945 distinct: false, // TSQL doesn't support DISTINCT in STRING_AGG
22946 filter: gc.filter,
22947 limit: None,
22948 inferred_type: None,
22949 },
22950 )))
22951 }
22952 DialectType::SQLite => {
22953 // GROUP_CONCAT stays as GROUP_CONCAT but ORDER BY is removed
22954 // SQLite GROUP_CONCAT doesn't support ORDER BY
22955 // Expand CONCAT(a,b,c) -> a || b || c
22956 let this = expand_concat_to_dpipe(gc.this);
22957 Ok(Expression::GroupConcat(Box::new(
22958 crate::expressions::GroupConcatFunc {
22959 this,
22960 separator: gc.separator,
22961 order_by: None, // SQLite doesn't support ORDER BY in GROUP_CONCAT
22962 distinct: gc.distinct,
22963 filter: gc.filter,
22964 limit: None,
22965 inferred_type: None,
22966 },
22967 )))
22968 }
22969 DialectType::Spark | DialectType::Databricks => {
22970 // GROUP_CONCAT(x [, sep]) -> LISTAGG(x, sep)
22971 let sep = gc.separator.unwrap_or(Expression::string(","));
22972 Ok(Expression::ListAgg(Box::new(
22973 crate::expressions::ListAggFunc {
22974 this: gc.this,
22975 separator: Some(sep),
22976 on_overflow: None,
22977 order_by: gc.order_by,
22978 distinct: gc.distinct,
22979 filter: None,
22980 inferred_type: None,
22981 },
22982 )))
22983 }
22984 DialectType::MySQL
22985 | DialectType::SingleStore
22986 | DialectType::StarRocks => {
22987 // MySQL GROUP_CONCAT should have explicit SEPARATOR (default ',')
22988 if gc.separator.is_none() {
22989 let mut gc = gc;
22990 gc.separator = Some(Expression::string(","));
22991 Ok(Expression::GroupConcat(gc))
22992 } else {
22993 Ok(Expression::GroupConcat(gc))
22994 }
22995 }
22996 _ => Ok(Expression::GroupConcat(gc)),
22997 }
22998 } else {
22999 Ok(e)
23000 }
23001 }
23002 Action::TempTableHash => {
23003 match e {
23004 Expression::CreateTable(mut ct) => {
23005 // TSQL #table -> TEMPORARY TABLE with # stripped from name
23006 let name = &ct.name.name.name;
23007 if name.starts_with('#') {
23008 ct.name.name.name = name.trim_start_matches('#').to_string();
23009 }
23010 // Set temporary flag
23011 ct.temporary = true;
23012 Ok(Expression::CreateTable(ct))
23013 }
23014 Expression::Table(mut tr) => {
23015 // Strip # from table references
23016 let name = &tr.name.name;
23017 if name.starts_with('#') {
23018 tr.name.name = name.trim_start_matches('#').to_string();
23019 }
23020 Ok(Expression::Table(tr))
23021 }
23022 Expression::DropTable(mut dt) => {
23023 // Strip # from DROP TABLE names
23024 for table_ref in &mut dt.names {
23025 if table_ref.name.name.starts_with('#') {
23026 table_ref.name.name =
23027 table_ref.name.name.trim_start_matches('#').to_string();
23028 }
23029 }
23030 Ok(Expression::DropTable(dt))
23031 }
23032 _ => Ok(e),
23033 }
23034 }
23035 Action::NvlClearOriginal => {
23036 if let Expression::Nvl(mut f) = e {
23037 f.original_name = None;
23038 Ok(Expression::Nvl(f))
23039 } else {
23040 Ok(e)
23041 }
23042 }
23043 Action::HiveCastToTryCast => {
23044 // Convert Hive/Spark CAST to TRY_CAST for targets that support it
23045 if let Expression::Cast(mut c) = e {
23046 // For Spark/Hive -> DuckDB: TIMESTAMP -> TIMESTAMPTZ
23047 // (Spark's TIMESTAMP is always timezone-aware)
23048 if matches!(target, DialectType::DuckDB)
23049 && matches!(source, DialectType::Spark | DialectType::Databricks)
23050 && matches!(
23051 c.to,
23052 DataType::Timestamp {
23053 timezone: false,
23054 ..
23055 }
23056 )
23057 {
23058 c.to = DataType::Custom {
23059 name: "TIMESTAMPTZ".to_string(),
23060 };
23061 }
23062 // For Spark source -> Databricks: VARCHAR/CHAR -> STRING
23063 // Spark parses VARCHAR(n)/CHAR(n) as TEXT, normalize to STRING
23064 if matches!(target, DialectType::Databricks | DialectType::Spark)
23065 && matches!(
23066 source,
23067 DialectType::Spark | DialectType::Databricks | DialectType::Hive
23068 )
23069 && Self::has_varchar_char_type(&c.to)
23070 {
23071 c.to = Self::normalize_varchar_to_string(c.to);
23072 }
23073 Ok(Expression::TryCast(c))
23074 } else {
23075 Ok(e)
23076 }
23077 }
23078 Action::XorExpand => {
23079 // Expand XOR to (a AND NOT b) OR (NOT a AND b) for dialects without XOR keyword
23080 // Snowflake: use BOOLXOR(a, b) instead
23081 if let Expression::Xor(xor) = e {
23082 // Collect all XOR operands
23083 let mut operands = Vec::new();
23084 if let Some(this) = xor.this {
23085 operands.push(*this);
23086 }
23087 if let Some(expr) = xor.expression {
23088 operands.push(*expr);
23089 }
23090 operands.extend(xor.expressions);
23091
23092 // Snowflake: use BOOLXOR(a, b)
23093 if matches!(target, DialectType::Snowflake) && operands.len() == 2 {
23094 let a = operands.remove(0);
23095 let b = operands.remove(0);
23096 return Ok(Expression::Function(Box::new(Function::new(
23097 "BOOLXOR".to_string(),
23098 vec![a, b],
23099 ))));
23100 }
23101
23102 // Helper to build (a AND NOT b) OR (NOT a AND b)
23103 let make_xor = |a: Expression, b: Expression| -> Expression {
23104 let not_b = Expression::Not(Box::new(
23105 crate::expressions::UnaryOp::new(b.clone()),
23106 ));
23107 let not_a = Expression::Not(Box::new(
23108 crate::expressions::UnaryOp::new(a.clone()),
23109 ));
23110 let left_and = Expression::And(Box::new(BinaryOp {
23111 left: a,
23112 right: Expression::Paren(Box::new(Paren {
23113 this: not_b,
23114 trailing_comments: Vec::new(),
23115 })),
23116 left_comments: Vec::new(),
23117 operator_comments: Vec::new(),
23118 trailing_comments: Vec::new(),
23119 inferred_type: None,
23120 }));
23121 let right_and = Expression::And(Box::new(BinaryOp {
23122 left: Expression::Paren(Box::new(Paren {
23123 this: not_a,
23124 trailing_comments: Vec::new(),
23125 })),
23126 right: b,
23127 left_comments: Vec::new(),
23128 operator_comments: Vec::new(),
23129 trailing_comments: Vec::new(),
23130 inferred_type: None,
23131 }));
23132 Expression::Or(Box::new(BinaryOp {
23133 left: Expression::Paren(Box::new(Paren {
23134 this: left_and,
23135 trailing_comments: Vec::new(),
23136 })),
23137 right: Expression::Paren(Box::new(Paren {
23138 this: right_and,
23139 trailing_comments: Vec::new(),
23140 })),
23141 left_comments: Vec::new(),
23142 operator_comments: Vec::new(),
23143 trailing_comments: Vec::new(),
23144 inferred_type: None,
23145 }))
23146 };
23147
23148 if operands.len() >= 2 {
23149 let mut result = make_xor(operands.remove(0), operands.remove(0));
23150 for operand in operands {
23151 result = make_xor(result, operand);
23152 }
23153 Ok(result)
23154 } else if operands.len() == 1 {
23155 Ok(operands.remove(0))
23156 } else {
23157 // No operands - return FALSE (shouldn't happen)
23158 Ok(Expression::Boolean(crate::expressions::BooleanLiteral {
23159 value: false,
23160 }))
23161 }
23162 } else {
23163 Ok(e)
23164 }
23165 }
23166 Action::DatePartUnquote => {
23167 // DATE_PART('month', x) -> DATE_PART(month, x) for Snowflake target
23168 // Convert the quoted string first arg to a bare Column/Identifier
23169 if let Expression::Function(mut f) = e {
23170 if let Some(Expression::Literal(lit)) = f.args.first() {
23171 if let crate::expressions::Literal::String(s) = lit.as_ref() {
23172 let bare_name = s.to_ascii_lowercase();
23173 f.args[0] =
23174 Expression::Column(Box::new(crate::expressions::Column {
23175 name: Identifier::new(bare_name),
23176 table: None,
23177 join_mark: false,
23178 trailing_comments: Vec::new(),
23179 span: None,
23180 inferred_type: None,
23181 }));
23182 }
23183 }
23184 Ok(Expression::Function(f))
23185 } else {
23186 Ok(e)
23187 }
23188 }
23189 Action::ArrayLengthConvert => {
23190 // Extract the argument from the expression
23191 let arg = match e {
23192 Expression::Cardinality(ref f) => f.this.clone(),
23193 Expression::ArrayLength(ref f) => f.this.clone(),
23194 Expression::ArraySize(ref f) => f.this.clone(),
23195 _ => return Ok(e),
23196 };
23197 match target {
23198 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
23199 Ok(Expression::Function(Box::new(Function::new(
23200 "SIZE".to_string(),
23201 vec![arg],
23202 ))))
23203 }
23204 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
23205 Ok(Expression::Cardinality(Box::new(
23206 crate::expressions::UnaryFunc::new(arg),
23207 )))
23208 }
23209 DialectType::BigQuery => Ok(Expression::ArrayLength(Box::new(
23210 crate::expressions::UnaryFunc::new(arg),
23211 ))),
23212 DialectType::DuckDB => Ok(Expression::ArrayLength(Box::new(
23213 crate::expressions::UnaryFunc::new(arg),
23214 ))),
23215 DialectType::PostgreSQL | DialectType::Redshift => {
23216 // PostgreSQL ARRAY_LENGTH requires dimension arg
23217 Ok(Expression::Function(Box::new(Function::new(
23218 "ARRAY_LENGTH".to_string(),
23219 vec![arg, Expression::number(1)],
23220 ))))
23221 }
23222 DialectType::Snowflake => Ok(Expression::ArraySize(Box::new(
23223 crate::expressions::UnaryFunc::new(arg),
23224 ))),
23225 _ => Ok(e), // Keep original
23226 }
23227 }
23228
23229 Action::JsonExtractToArrow => {
23230 // JSON_EXTRACT(x, path) -> x -> path for SQLite/DuckDB (set arrow_syntax = true)
23231 if let Expression::JsonExtract(mut f) = e {
23232 f.arrow_syntax = true;
23233 // Transform path: convert bracket notation to dot notation
23234 // SQLite strips wildcards, DuckDB preserves them
23235 if let Expression::Literal(ref lit) = f.path {
23236 if let Literal::String(ref s) = lit.as_ref() {
23237 let mut transformed = s.clone();
23238 if matches!(target, DialectType::SQLite) {
23239 transformed = Self::strip_json_wildcards(&transformed);
23240 }
23241 transformed = Self::bracket_to_dot_notation(&transformed);
23242 if transformed != *s {
23243 f.path = Expression::string(&transformed);
23244 }
23245 }
23246 }
23247 Ok(Expression::JsonExtract(f))
23248 } else {
23249 Ok(e)
23250 }
23251 }
23252
23253 Action::JsonExtractToGetJsonObject => {
23254 if let Expression::JsonExtract(f) = e {
23255 if matches!(target, DialectType::PostgreSQL | DialectType::Redshift) {
23256 // JSON_EXTRACT(x, '$.key') -> JSON_EXTRACT_PATH(x, 'key') for PostgreSQL
23257 // Use proper decomposition that handles brackets
23258 let keys: Vec<Expression> = if let Expression::Literal(lit) = f.path {
23259 if let Literal::String(ref s) = lit.as_ref() {
23260 let parts = Self::decompose_json_path(s);
23261 parts.into_iter().map(|k| Expression::string(&k)).collect()
23262 } else {
23263 vec![]
23264 }
23265 } else {
23266 vec![f.path]
23267 };
23268 let func_name = if matches!(target, DialectType::Redshift) {
23269 "JSON_EXTRACT_PATH_TEXT"
23270 } else {
23271 "JSON_EXTRACT_PATH"
23272 };
23273 let mut args = vec![f.this];
23274 args.extend(keys);
23275 Ok(Expression::Function(Box::new(Function::new(
23276 func_name.to_string(),
23277 args,
23278 ))))
23279 } else {
23280 // GET_JSON_OBJECT(x, '$.path') for Hive/Spark
23281 // Convert bracket double quotes to single quotes
23282 let path = if let Expression::Literal(ref lit) = f.path {
23283 if let Literal::String(ref s) = lit.as_ref() {
23284 let normalized = Self::bracket_to_single_quotes(s);
23285 if normalized != *s {
23286 Expression::string(&normalized)
23287 } else {
23288 f.path.clone()
23289 }
23290 } else {
23291 f.path.clone()
23292 }
23293 } else {
23294 f.path.clone()
23295 };
23296 Ok(Expression::Function(Box::new(Function::new(
23297 "GET_JSON_OBJECT".to_string(),
23298 vec![f.this, path],
23299 ))))
23300 }
23301 } else {
23302 Ok(e)
23303 }
23304 }
23305
23306 Action::JsonExtractScalarToGetJsonObject => {
23307 // JSON_EXTRACT_SCALAR(x, '$.path') -> GET_JSON_OBJECT(x, '$.path') for Hive/Spark
23308 if let Expression::JsonExtractScalar(f) = e {
23309 Ok(Expression::Function(Box::new(Function::new(
23310 "GET_JSON_OBJECT".to_string(),
23311 vec![f.this, f.path],
23312 ))))
23313 } else {
23314 Ok(e)
23315 }
23316 }
23317
23318 Action::JsonExtractToTsql => {
23319 // JSON_EXTRACT/JSON_EXTRACT_SCALAR -> ISNULL(JSON_QUERY(x, path), JSON_VALUE(x, path)) for TSQL
23320 let (this, path) = match e {
23321 Expression::JsonExtract(f) => (f.this, f.path),
23322 Expression::JsonExtractScalar(f) => (f.this, f.path),
23323 _ => return Ok(e),
23324 };
23325 // Transform path: strip wildcards, convert bracket notation to dot notation
23326 let transformed_path = if let Expression::Literal(ref lit) = path {
23327 if let Literal::String(ref s) = lit.as_ref() {
23328 let stripped = Self::strip_json_wildcards(s);
23329 let dotted = Self::bracket_to_dot_notation(&stripped);
23330 Expression::string(&dotted)
23331 } else {
23332 path.clone()
23333 }
23334 } else {
23335 path
23336 };
23337 let json_query = Expression::Function(Box::new(Function::new(
23338 "JSON_QUERY".to_string(),
23339 vec![this.clone(), transformed_path.clone()],
23340 )));
23341 let json_value = Expression::Function(Box::new(Function::new(
23342 "JSON_VALUE".to_string(),
23343 vec![this, transformed_path],
23344 )));
23345 Ok(Expression::Function(Box::new(Function::new(
23346 "ISNULL".to_string(),
23347 vec![json_query, json_value],
23348 ))))
23349 }
23350
23351 Action::JsonExtractToClickHouse => {
23352 // JSON_EXTRACT/JSON_EXTRACT_SCALAR -> JSONExtractString(x, 'key1', idx, 'key2') for ClickHouse
23353 let (this, path) = match e {
23354 Expression::JsonExtract(f) => (f.this, f.path),
23355 Expression::JsonExtractScalar(f) => (f.this, f.path),
23356 _ => return Ok(e),
23357 };
23358 let args: Vec<Expression> = if let Expression::Literal(lit) = path {
23359 if let Literal::String(ref s) = lit.as_ref() {
23360 let parts = Self::decompose_json_path(s);
23361 let mut result = vec![this];
23362 for part in parts {
23363 // ClickHouse uses 1-based integer indices for array access
23364 if let Ok(idx) = part.parse::<i64>() {
23365 result.push(Expression::number(idx + 1));
23366 } else {
23367 result.push(Expression::string(&part));
23368 }
23369 }
23370 result
23371 } else {
23372 vec![]
23373 }
23374 } else {
23375 vec![this, path]
23376 };
23377 Ok(Expression::Function(Box::new(Function::new(
23378 "JSONExtractString".to_string(),
23379 args,
23380 ))))
23381 }
23382
23383 Action::JsonExtractScalarConvert => {
23384 // JSON_EXTRACT_SCALAR -> target-specific
23385 if let Expression::JsonExtractScalar(f) = e {
23386 match target {
23387 DialectType::PostgreSQL | DialectType::Redshift => {
23388 // JSON_EXTRACT_SCALAR(x, '$.path') -> JSON_EXTRACT_PATH_TEXT(x, 'key1', 'key2')
23389 let keys: Vec<Expression> = if let Expression::Literal(lit) = f.path
23390 {
23391 if let Literal::String(ref s) = lit.as_ref() {
23392 let parts = Self::decompose_json_path(s);
23393 parts.into_iter().map(|k| Expression::string(&k)).collect()
23394 } else {
23395 vec![]
23396 }
23397 } else {
23398 vec![f.path]
23399 };
23400 let mut args = vec![f.this];
23401 args.extend(keys);
23402 Ok(Expression::Function(Box::new(Function::new(
23403 "JSON_EXTRACT_PATH_TEXT".to_string(),
23404 args,
23405 ))))
23406 }
23407 DialectType::Snowflake => {
23408 // JSON_EXTRACT_SCALAR(x, '$.path') -> JSON_EXTRACT_PATH_TEXT(x, 'stripped_path')
23409 let stripped_path = if let Expression::Literal(ref lit) = f.path {
23410 if let Literal::String(ref s) = lit.as_ref() {
23411 let stripped = Self::strip_json_dollar_prefix(s);
23412 Expression::string(&stripped)
23413 } else {
23414 f.path.clone()
23415 }
23416 } else {
23417 f.path
23418 };
23419 Ok(Expression::Function(Box::new(Function::new(
23420 "JSON_EXTRACT_PATH_TEXT".to_string(),
23421 vec![f.this, stripped_path],
23422 ))))
23423 }
23424 DialectType::SQLite | DialectType::DuckDB => {
23425 // JSON_EXTRACT_SCALAR(x, '$.path') -> x ->> '$.path'
23426 Ok(Expression::JsonExtractScalar(Box::new(
23427 crate::expressions::JsonExtractFunc {
23428 this: f.this,
23429 path: f.path,
23430 returning: f.returning,
23431 arrow_syntax: true,
23432 hash_arrow_syntax: false,
23433 wrapper_option: None,
23434 quotes_option: None,
23435 on_scalar_string: false,
23436 on_error: None,
23437 },
23438 )))
23439 }
23440 _ => Ok(Expression::JsonExtractScalar(f)),
23441 }
23442 } else {
23443 Ok(e)
23444 }
23445 }
23446
23447 Action::JsonPathNormalize => {
23448 // Normalize JSON path format for BigQuery, MySQL, etc.
23449 if let Expression::JsonExtract(mut f) = e {
23450 if let Expression::Literal(ref lit) = f.path {
23451 if let Literal::String(ref s) = lit.as_ref() {
23452 let mut normalized = s.clone();
23453 // Convert bracket notation and handle wildcards per dialect
23454 match target {
23455 DialectType::BigQuery => {
23456 // BigQuery strips wildcards and uses single quotes in brackets
23457 normalized = Self::strip_json_wildcards(&normalized);
23458 normalized = Self::bracket_to_single_quotes(&normalized);
23459 }
23460 DialectType::MySQL => {
23461 // MySQL preserves wildcards, converts brackets to dot notation
23462 normalized = Self::bracket_to_dot_notation(&normalized);
23463 }
23464 _ => {}
23465 }
23466 if normalized != *s {
23467 f.path = Expression::string(&normalized);
23468 }
23469 }
23470 }
23471 Ok(Expression::JsonExtract(f))
23472 } else {
23473 Ok(e)
23474 }
23475 }
23476
23477 Action::JsonQueryValueConvert => {
23478 // JsonQuery/JsonValue -> target-specific
23479 let (f, is_query) = match e {
23480 Expression::JsonQuery(f) => (f, true),
23481 Expression::JsonValue(f) => (f, false),
23482 _ => return Ok(e),
23483 };
23484 match target {
23485 DialectType::TSQL | DialectType::Fabric => {
23486 // ISNULL(JSON_QUERY(...), JSON_VALUE(...))
23487 let json_query = Expression::Function(Box::new(Function::new(
23488 "JSON_QUERY".to_string(),
23489 vec![f.this.clone(), f.path.clone()],
23490 )));
23491 let json_value = Expression::Function(Box::new(Function::new(
23492 "JSON_VALUE".to_string(),
23493 vec![f.this, f.path],
23494 )));
23495 Ok(Expression::Function(Box::new(Function::new(
23496 "ISNULL".to_string(),
23497 vec![json_query, json_value],
23498 ))))
23499 }
23500 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
23501 Ok(Expression::Function(Box::new(Function::new(
23502 "GET_JSON_OBJECT".to_string(),
23503 vec![f.this, f.path],
23504 ))))
23505 }
23506 DialectType::PostgreSQL | DialectType::Redshift => {
23507 Ok(Expression::Function(Box::new(Function::new(
23508 "JSON_EXTRACT_PATH_TEXT".to_string(),
23509 vec![f.this, f.path],
23510 ))))
23511 }
23512 DialectType::DuckDB | DialectType::SQLite => {
23513 // json -> path arrow syntax
23514 Ok(Expression::JsonExtract(Box::new(
23515 crate::expressions::JsonExtractFunc {
23516 this: f.this,
23517 path: f.path,
23518 returning: f.returning,
23519 arrow_syntax: true,
23520 hash_arrow_syntax: false,
23521 wrapper_option: f.wrapper_option,
23522 quotes_option: f.quotes_option,
23523 on_scalar_string: f.on_scalar_string,
23524 on_error: f.on_error,
23525 },
23526 )))
23527 }
23528 DialectType::Snowflake => {
23529 // GET_PATH(PARSE_JSON(json), 'path')
23530 // Strip $. prefix from path
23531 // Only wrap in PARSE_JSON if not already a PARSE_JSON call or ParseJson expression
23532 let json_expr = match &f.this {
23533 Expression::Function(ref inner_f)
23534 if inner_f.name.eq_ignore_ascii_case("PARSE_JSON") =>
23535 {
23536 f.this
23537 }
23538 Expression::ParseJson(_) => {
23539 // Already a ParseJson expression, which generates as PARSE_JSON(...)
23540 f.this
23541 }
23542 _ => Expression::Function(Box::new(Function::new(
23543 "PARSE_JSON".to_string(),
23544 vec![f.this],
23545 ))),
23546 };
23547 let path_str = match &f.path {
23548 Expression::Literal(lit)
23549 if matches!(lit.as_ref(), Literal::String(_)) =>
23550 {
23551 let Literal::String(s) = lit.as_ref() else {
23552 unreachable!()
23553 };
23554 let stripped = s.strip_prefix("$.").unwrap_or(s);
23555 Expression::Literal(Box::new(Literal::String(
23556 stripped.to_string(),
23557 )))
23558 }
23559 other => other.clone(),
23560 };
23561 Ok(Expression::Function(Box::new(Function::new(
23562 "GET_PATH".to_string(),
23563 vec![json_expr, path_str],
23564 ))))
23565 }
23566 _ => {
23567 // Default: keep as JSON_QUERY/JSON_VALUE function
23568 let func_name = if is_query { "JSON_QUERY" } else { "JSON_VALUE" };
23569 Ok(Expression::Function(Box::new(Function::new(
23570 func_name.to_string(),
23571 vec![f.this, f.path],
23572 ))))
23573 }
23574 }
23575 }
23576
23577 Action::JsonLiteralToJsonParse => {
23578 // CAST('x' AS JSON) -> JSON_PARSE('x') for Presto, PARSE_JSON for Snowflake
23579 // Also DuckDB CAST(x AS JSON) -> JSON_PARSE(x) for Trino/Presto/Athena
23580 if let Expression::Cast(c) = e {
23581 let func_name = if matches!(target, DialectType::Snowflake) {
23582 "PARSE_JSON"
23583 } else {
23584 "JSON_PARSE"
23585 };
23586 Ok(Expression::Function(Box::new(Function::new(
23587 func_name.to_string(),
23588 vec![c.this],
23589 ))))
23590 } else {
23591 Ok(e)
23592 }
23593 }
23594
23595 Action::DuckDBCastJsonToVariant => {
23596 if let Expression::Cast(c) = e {
23597 Ok(Expression::Cast(Box::new(Cast {
23598 this: c.this,
23599 to: DataType::Custom {
23600 name: "VARIANT".to_string(),
23601 },
23602 trailing_comments: c.trailing_comments,
23603 double_colon_syntax: false,
23604 format: None,
23605 default: None,
23606 inferred_type: None,
23607 })))
23608 } else {
23609 Ok(e)
23610 }
23611 }
23612
23613 Action::DuckDBTryCastJsonToTryJsonParse => {
23614 // DuckDB TRY_CAST(x AS JSON) -> TRY(JSON_PARSE(x)) for Trino/Presto/Athena
23615 if let Expression::TryCast(c) = e {
23616 let json_parse = Expression::Function(Box::new(Function::new(
23617 "JSON_PARSE".to_string(),
23618 vec![c.this],
23619 )));
23620 Ok(Expression::Function(Box::new(Function::new(
23621 "TRY".to_string(),
23622 vec![json_parse],
23623 ))))
23624 } else {
23625 Ok(e)
23626 }
23627 }
23628
23629 Action::DuckDBJsonFuncToJsonParse => {
23630 // DuckDB json(x) -> JSON_PARSE(x) for Trino/Presto/Athena
23631 if let Expression::Function(f) = e {
23632 let args = f.args;
23633 Ok(Expression::Function(Box::new(Function::new(
23634 "JSON_PARSE".to_string(),
23635 args,
23636 ))))
23637 } else {
23638 Ok(e)
23639 }
23640 }
23641
23642 Action::DuckDBJsonValidToIsJson => {
23643 // DuckDB json_valid(x) -> x IS JSON (SQL:2016 predicate) for Trino/Presto/Athena
23644 if let Expression::Function(mut f) = e {
23645 let arg = f.args.remove(0);
23646 Ok(Expression::IsJson(Box::new(crate::expressions::IsJson {
23647 this: arg,
23648 json_type: None,
23649 unique_keys: None,
23650 negated: false,
23651 })))
23652 } else {
23653 Ok(e)
23654 }
23655 }
23656
23657 Action::AtTimeZoneConvert => {
23658 // AT TIME ZONE -> target-specific conversion
23659 if let Expression::AtTimeZone(atz) = e {
23660 match target {
23661 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
23662 Ok(Expression::Function(Box::new(Function::new(
23663 "AT_TIMEZONE".to_string(),
23664 vec![atz.this, atz.zone],
23665 ))))
23666 }
23667 DialectType::Spark | DialectType::Databricks => {
23668 Ok(Expression::Function(Box::new(Function::new(
23669 "FROM_UTC_TIMESTAMP".to_string(),
23670 vec![atz.this, atz.zone],
23671 ))))
23672 }
23673 DialectType::Snowflake => {
23674 // CONVERT_TIMEZONE('zone', expr)
23675 Ok(Expression::Function(Box::new(Function::new(
23676 "CONVERT_TIMEZONE".to_string(),
23677 vec![atz.zone, atz.this],
23678 ))))
23679 }
23680 DialectType::BigQuery => {
23681 // TIMESTAMP(DATETIME(expr, 'zone'))
23682 let datetime_call = Expression::Function(Box::new(Function::new(
23683 "DATETIME".to_string(),
23684 vec![atz.this, atz.zone],
23685 )));
23686 Ok(Expression::Function(Box::new(Function::new(
23687 "TIMESTAMP".to_string(),
23688 vec![datetime_call],
23689 ))))
23690 }
23691 _ => Ok(Expression::Function(Box::new(Function::new(
23692 "AT_TIMEZONE".to_string(),
23693 vec![atz.this, atz.zone],
23694 )))),
23695 }
23696 } else {
23697 Ok(e)
23698 }
23699 }
23700
23701 Action::DayOfWeekConvert => {
23702 // DAY_OF_WEEK -> ISODOW for DuckDB, ((DAYOFWEEK(x) % 7) + 1) for Spark
23703 if let Expression::DayOfWeek(f) = e {
23704 match target {
23705 DialectType::DuckDB => Ok(Expression::Function(Box::new(
23706 Function::new("ISODOW".to_string(), vec![f.this]),
23707 ))),
23708 DialectType::Spark | DialectType::Databricks => {
23709 // ((DAYOFWEEK(x) % 7) + 1)
23710 let dayofweek = Expression::Function(Box::new(Function::new(
23711 "DAYOFWEEK".to_string(),
23712 vec![f.this],
23713 )));
23714 let modulo = Expression::Mod(Box::new(BinaryOp {
23715 left: dayofweek,
23716 right: Expression::number(7),
23717 left_comments: Vec::new(),
23718 operator_comments: Vec::new(),
23719 trailing_comments: Vec::new(),
23720 inferred_type: None,
23721 }));
23722 let paren_mod = Expression::Paren(Box::new(Paren {
23723 this: modulo,
23724 trailing_comments: Vec::new(),
23725 }));
23726 let add_one = Expression::Add(Box::new(BinaryOp {
23727 left: paren_mod,
23728 right: Expression::number(1),
23729 left_comments: Vec::new(),
23730 operator_comments: Vec::new(),
23731 trailing_comments: Vec::new(),
23732 inferred_type: None,
23733 }));
23734 Ok(Expression::Paren(Box::new(Paren {
23735 this: add_one,
23736 trailing_comments: Vec::new(),
23737 })))
23738 }
23739 _ => Ok(Expression::DayOfWeek(f)),
23740 }
23741 } else {
23742 Ok(e)
23743 }
23744 }
23745
23746 Action::MaxByMinByConvert => {
23747 // MAX_BY -> argMax for ClickHouse, drop 3rd arg for Spark
23748 // MIN_BY -> argMin for ClickHouse, ARG_MIN for DuckDB, drop 3rd arg for Spark/ClickHouse
23749 // Handle both Expression::Function and Expression::AggregateFunction
23750 let (is_max, args) = match &e {
23751 Expression::Function(f) => {
23752 (f.name.eq_ignore_ascii_case("MAX_BY"), f.args.clone())
23753 }
23754 Expression::AggregateFunction(af) => {
23755 (af.name.eq_ignore_ascii_case("MAX_BY"), af.args.clone())
23756 }
23757 _ => return Ok(e),
23758 };
23759 match target {
23760 DialectType::ClickHouse => {
23761 let name = if is_max { "argMax" } else { "argMin" };
23762 let mut args = args;
23763 args.truncate(2);
23764 Ok(Expression::Function(Box::new(Function::new(
23765 name.to_string(),
23766 args,
23767 ))))
23768 }
23769 DialectType::DuckDB => {
23770 let name = if is_max { "ARG_MAX" } else { "ARG_MIN" };
23771 Ok(Expression::Function(Box::new(Function::new(
23772 name.to_string(),
23773 args,
23774 ))))
23775 }
23776 DialectType::Spark | DialectType::Databricks => {
23777 let mut args = args;
23778 args.truncate(2);
23779 let name = if is_max { "MAX_BY" } else { "MIN_BY" };
23780 Ok(Expression::Function(Box::new(Function::new(
23781 name.to_string(),
23782 args,
23783 ))))
23784 }
23785 _ => Ok(e),
23786 }
23787 }
23788
23789 Action::ElementAtConvert => {
23790 // ELEMENT_AT(arr, idx) -> arr[idx] for PostgreSQL, arr[SAFE_ORDINAL(idx)] for BigQuery
23791 let (arr, idx) = if let Expression::ElementAt(bf) = e {
23792 (bf.this, bf.expression)
23793 } else if let Expression::Function(ref f) = e {
23794 if f.args.len() >= 2 {
23795 if let Expression::Function(f) = e {
23796 let mut args = f.args;
23797 let arr = args.remove(0);
23798 let idx = args.remove(0);
23799 (arr, idx)
23800 } else {
23801 unreachable!("outer condition already matched Expression::Function")
23802 }
23803 } else {
23804 return Ok(e);
23805 }
23806 } else {
23807 return Ok(e);
23808 };
23809 match target {
23810 DialectType::PostgreSQL => {
23811 // Wrap array in parens for PostgreSQL: (ARRAY[1,2,3])[4]
23812 let arr_expr = Expression::Paren(Box::new(Paren {
23813 this: arr,
23814 trailing_comments: vec![],
23815 }));
23816 Ok(Expression::Subscript(Box::new(
23817 crate::expressions::Subscript {
23818 this: arr_expr,
23819 index: idx,
23820 },
23821 )))
23822 }
23823 DialectType::BigQuery => {
23824 // BigQuery: convert ARRAY[...] to bare [...] for subscript
23825 let arr_expr = match arr {
23826 Expression::ArrayFunc(af) => Expression::ArrayFunc(Box::new(
23827 crate::expressions::ArrayConstructor {
23828 expressions: af.expressions,
23829 bracket_notation: true,
23830 use_list_keyword: false,
23831 },
23832 )),
23833 other => other,
23834 };
23835 let safe_ordinal = Expression::Function(Box::new(Function::new(
23836 "SAFE_ORDINAL".to_string(),
23837 vec![idx],
23838 )));
23839 Ok(Expression::Subscript(Box::new(
23840 crate::expressions::Subscript {
23841 this: arr_expr,
23842 index: safe_ordinal,
23843 },
23844 )))
23845 }
23846 _ => Ok(Expression::Function(Box::new(Function::new(
23847 "ELEMENT_AT".to_string(),
23848 vec![arr, idx],
23849 )))),
23850 }
23851 }
23852
23853 Action::CurrentUserParens => {
23854 // CURRENT_USER -> CURRENT_USER() for Snowflake
23855 Ok(Expression::Function(Box::new(Function::new(
23856 "CURRENT_USER".to_string(),
23857 vec![],
23858 ))))
23859 }
23860
23861 Action::ArrayAggToCollectList => {
23862 // ARRAY_AGG(x ORDER BY ...) -> COLLECT_LIST(x) for Hive/Spark
23863 // Python sqlglot Hive.arrayagg_sql strips ORDER BY for simple cases
23864 // but preserves it when DISTINCT/IGNORE NULLS/LIMIT are present
23865 match e {
23866 Expression::AggregateFunction(mut af) => {
23867 let is_simple =
23868 !af.distinct && af.ignore_nulls.is_none() && af.limit.is_none();
23869 let args = if af.args.is_empty() {
23870 vec![]
23871 } else {
23872 vec![af.args[0].clone()]
23873 };
23874 af.name = "COLLECT_LIST".to_string();
23875 af.args = args;
23876 if is_simple {
23877 af.order_by = Vec::new();
23878 }
23879 Ok(Expression::AggregateFunction(af))
23880 }
23881 Expression::ArrayAgg(agg) => {
23882 let is_simple =
23883 !agg.distinct && agg.ignore_nulls.is_none() && agg.limit.is_none();
23884 Ok(Expression::AggregateFunction(Box::new(
23885 crate::expressions::AggregateFunction {
23886 name: "COLLECT_LIST".to_string(),
23887 args: vec![agg.this.clone()],
23888 distinct: agg.distinct,
23889 filter: agg.filter.clone(),
23890 order_by: if is_simple {
23891 Vec::new()
23892 } else {
23893 agg.order_by.clone()
23894 },
23895 limit: agg.limit.clone(),
23896 ignore_nulls: agg.ignore_nulls,
23897 inferred_type: None,
23898 },
23899 )))
23900 }
23901 _ => Ok(e),
23902 }
23903 }
23904
23905 Action::ArraySyntaxConvert => {
23906 match e {
23907 // ARRAY[1, 2] (ArrayFunc bracket_notation=false) -> set bracket_notation=true
23908 // so the generator uses dialect-specific output (ARRAY() for Spark, [] for BigQuery)
23909 Expression::ArrayFunc(arr) if !arr.bracket_notation => Ok(
23910 Expression::ArrayFunc(Box::new(crate::expressions::ArrayConstructor {
23911 expressions: arr.expressions,
23912 bracket_notation: true,
23913 use_list_keyword: false,
23914 })),
23915 ),
23916 // ARRAY(y) function style -> ArrayFunc for target dialect
23917 // bracket_notation=true for BigQuery/DuckDB/ClickHouse/StarRocks (output []), false for Presto (output ARRAY[])
23918 Expression::Function(f) if f.name.eq_ignore_ascii_case("ARRAY") => {
23919 let bracket = matches!(
23920 target,
23921 DialectType::BigQuery
23922 | DialectType::DuckDB
23923 | DialectType::Snowflake
23924 | DialectType::ClickHouse
23925 | DialectType::StarRocks
23926 );
23927 Ok(Expression::ArrayFunc(Box::new(
23928 crate::expressions::ArrayConstructor {
23929 expressions: f.args,
23930 bracket_notation: bracket,
23931 use_list_keyword: false,
23932 },
23933 )))
23934 }
23935 _ => Ok(e),
23936 }
23937 }
23938
23939 Action::CastToJsonForSpark => {
23940 // CAST(x AS JSON) -> TO_JSON(x) for Spark
23941 if let Expression::Cast(c) = e {
23942 Ok(Expression::Function(Box::new(Function::new(
23943 "TO_JSON".to_string(),
23944 vec![c.this],
23945 ))))
23946 } else {
23947 Ok(e)
23948 }
23949 }
23950
23951 Action::CastJsonToFromJson => {
23952 // CAST(ParseJson(literal) AS ARRAY/MAP/STRUCT) -> FROM_JSON(literal, type_string) for Spark
23953 if let Expression::Cast(c) = e {
23954 // Extract the string literal from ParseJson
23955 let literal_expr = if let Expression::ParseJson(pj) = c.this {
23956 pj.this
23957 } else {
23958 c.this
23959 };
23960 // Convert the target DataType to Spark's type string format
23961 let type_str = Self::data_type_to_spark_string(&c.to);
23962 Ok(Expression::Function(Box::new(Function::new(
23963 "FROM_JSON".to_string(),
23964 vec![
23965 literal_expr,
23966 Expression::Literal(Box::new(Literal::String(type_str))),
23967 ],
23968 ))))
23969 } else {
23970 Ok(e)
23971 }
23972 }
23973
23974 Action::ToJsonConvert => {
23975 // TO_JSON(x) -> target-specific conversion
23976 if let Expression::ToJson(f) = e {
23977 let arg = f.this;
23978 match target {
23979 DialectType::Presto | DialectType::Trino => {
23980 // JSON_FORMAT(CAST(x AS JSON))
23981 let cast_json = Expression::Cast(Box::new(Cast {
23982 this: arg,
23983 to: DataType::Custom {
23984 name: "JSON".to_string(),
23985 },
23986 trailing_comments: vec![],
23987 double_colon_syntax: false,
23988 format: None,
23989 default: None,
23990 inferred_type: None,
23991 }));
23992 Ok(Expression::Function(Box::new(Function::new(
23993 "JSON_FORMAT".to_string(),
23994 vec![cast_json],
23995 ))))
23996 }
23997 DialectType::BigQuery => Ok(Expression::Function(Box::new(
23998 Function::new("TO_JSON_STRING".to_string(), vec![arg]),
23999 ))),
24000 DialectType::DuckDB => {
24001 // CAST(TO_JSON(x) AS TEXT)
24002 let to_json =
24003 Expression::ToJson(Box::new(crate::expressions::UnaryFunc {
24004 this: arg,
24005 original_name: None,
24006 inferred_type: None,
24007 }));
24008 Ok(Expression::Cast(Box::new(Cast {
24009 this: to_json,
24010 to: DataType::Text,
24011 trailing_comments: vec![],
24012 double_colon_syntax: false,
24013 format: None,
24014 default: None,
24015 inferred_type: None,
24016 })))
24017 }
24018 _ => Ok(Expression::ToJson(Box::new(
24019 crate::expressions::UnaryFunc {
24020 this: arg,
24021 original_name: None,
24022 inferred_type: None,
24023 },
24024 ))),
24025 }
24026 } else {
24027 Ok(e)
24028 }
24029 }
24030
24031 Action::VarianceToClickHouse => {
24032 if let Expression::Variance(f) = e {
24033 Ok(Expression::Function(Box::new(Function::new(
24034 "varSamp".to_string(),
24035 vec![f.this],
24036 ))))
24037 } else {
24038 Ok(e)
24039 }
24040 }
24041
24042 Action::StddevToClickHouse => {
24043 if let Expression::Stddev(f) = e {
24044 Ok(Expression::Function(Box::new(Function::new(
24045 "stddevSamp".to_string(),
24046 vec![f.this],
24047 ))))
24048 } else {
24049 Ok(e)
24050 }
24051 }
24052
24053 Action::ApproxQuantileConvert => {
24054 if let Expression::ApproxQuantile(aq) = e {
24055 let mut args = vec![*aq.this];
24056 if let Some(q) = aq.quantile {
24057 args.push(*q);
24058 }
24059 Ok(Expression::Function(Box::new(Function::new(
24060 "APPROX_PERCENTILE".to_string(),
24061 args,
24062 ))))
24063 } else {
24064 Ok(e)
24065 }
24066 }
24067
24068 Action::DollarParamConvert => {
24069 if let Expression::Parameter(p) = e {
24070 Ok(Expression::Parameter(Box::new(
24071 crate::expressions::Parameter {
24072 name: p.name,
24073 index: p.index,
24074 style: crate::expressions::ParameterStyle::At,
24075 quoted: p.quoted,
24076 string_quoted: p.string_quoted,
24077 expression: p.expression,
24078 },
24079 )))
24080 } else {
24081 Ok(e)
24082 }
24083 }
24084
24085 Action::EscapeStringNormalize => {
24086 if let Expression::Literal(ref lit) = e {
24087 if let Literal::EscapeString(s) = lit.as_ref() {
24088 // Strip prefix (e.g., "e:" or "E:") if present from tokenizer
24089 let stripped = if s.starts_with("e:") || s.starts_with("E:") {
24090 s[2..].to_string()
24091 } else {
24092 s.clone()
24093 };
24094 let normalized = stripped
24095 .replace('\n', "\\n")
24096 .replace('\r', "\\r")
24097 .replace('\t', "\\t");
24098 match target {
24099 DialectType::BigQuery => {
24100 // BigQuery: e'...' -> CAST(b'...' AS STRING)
24101 // Use Raw for the b'...' part to avoid double-escaping
24102 let raw_sql = format!("CAST(b'{}' AS STRING)", normalized);
24103 Ok(Expression::Raw(crate::expressions::Raw { sql: raw_sql }))
24104 }
24105 _ => Ok(Expression::Literal(Box::new(Literal::EscapeString(
24106 normalized,
24107 )))),
24108 }
24109 } else {
24110 Ok(e)
24111 }
24112 } else {
24113 Ok(e)
24114 }
24115 }
24116
24117 Action::StraightJoinCase => {
24118 // straight_join: keep lowercase for DuckDB, quote for MySQL
24119 if let Expression::Column(col) = e {
24120 if col.name.name == "STRAIGHT_JOIN" {
24121 let mut new_col = col;
24122 new_col.name.name = "straight_join".to_string();
24123 if matches!(target, DialectType::MySQL) {
24124 // MySQL: needs quoting since it's a reserved keyword
24125 new_col.name.quoted = true;
24126 }
24127 Ok(Expression::Column(new_col))
24128 } else {
24129 Ok(Expression::Column(col))
24130 }
24131 } else {
24132 Ok(e)
24133 }
24134 }
24135
24136 Action::TablesampleReservoir => {
24137 // TABLESAMPLE -> TABLESAMPLE RESERVOIR for DuckDB
24138 if let Expression::TableSample(mut ts) = e {
24139 if let Some(ref mut sample) = ts.sample {
24140 sample.method = crate::expressions::SampleMethod::Reservoir;
24141 sample.explicit_method = true;
24142 }
24143 Ok(Expression::TableSample(ts))
24144 } else {
24145 Ok(e)
24146 }
24147 }
24148
24149 Action::TablesampleSnowflakeStrip => {
24150 // Strip method and PERCENT for Snowflake target from non-Snowflake source
24151 match e {
24152 Expression::TableSample(mut ts) => {
24153 if let Some(ref mut sample) = ts.sample {
24154 sample.suppress_method_output = true;
24155 sample.unit_after_size = false;
24156 sample.is_percent = false;
24157 }
24158 Ok(Expression::TableSample(ts))
24159 }
24160 Expression::Table(mut t) => {
24161 if let Some(ref mut sample) = t.table_sample {
24162 sample.suppress_method_output = true;
24163 sample.unit_after_size = false;
24164 sample.is_percent = false;
24165 }
24166 Ok(Expression::Table(t))
24167 }
24168 _ => Ok(e),
24169 }
24170 }
24171
24172 Action::FirstToAnyValue => {
24173 // FIRST(col) IGNORE NULLS -> ANY_VALUE(col) for DuckDB
24174 if let Expression::First(mut agg) = e {
24175 agg.ignore_nulls = None;
24176 agg.name = Some("ANY_VALUE".to_string());
24177 Ok(Expression::AnyValue(agg))
24178 } else {
24179 Ok(e)
24180 }
24181 }
24182
24183 Action::ArrayIndexConvert => {
24184 // Subscript index: 1-based to 0-based for BigQuery
24185 if let Expression::Subscript(mut sub) = e {
24186 if let Expression::Literal(ref lit) = sub.index {
24187 if let Literal::Number(ref n) = lit.as_ref() {
24188 if let Ok(val) = n.parse::<i64>() {
24189 sub.index = Expression::Literal(Box::new(Literal::Number(
24190 (val - 1).to_string(),
24191 )));
24192 }
24193 }
24194 }
24195 Ok(Expression::Subscript(sub))
24196 } else {
24197 Ok(e)
24198 }
24199 }
24200
24201 Action::AnyValueIgnoreNulls => {
24202 // ANY_VALUE(x) -> ANY_VALUE(x) IGNORE NULLS for Spark
24203 if let Expression::AnyValue(mut av) = e {
24204 if av.ignore_nulls.is_none() {
24205 av.ignore_nulls = Some(true);
24206 }
24207 Ok(Expression::AnyValue(av))
24208 } else {
24209 Ok(e)
24210 }
24211 }
24212
24213 Action::BigQueryNullsOrdering => {
24214 // BigQuery doesn't support NULLS FIRST/LAST in window function ORDER BY
24215 if let Expression::WindowFunction(mut wf) = e {
24216 for o in &mut wf.over.order_by {
24217 o.nulls_first = None;
24218 }
24219 Ok(Expression::WindowFunction(wf))
24220 } else if let Expression::Ordered(mut o) = e {
24221 o.nulls_first = None;
24222 Ok(Expression::Ordered(o))
24223 } else {
24224 Ok(e)
24225 }
24226 }
24227
24228 Action::SnowflakeFloatProtect => {
24229 // Convert DataType::Float to DataType::Custom("FLOAT") to prevent
24230 // Snowflake's target transform from converting it to DOUBLE.
24231 // Non-Snowflake sources should keep their FLOAT spelling.
24232 if let Expression::DataType(DataType::Float { .. }) = e {
24233 Ok(Expression::DataType(DataType::Custom {
24234 name: "FLOAT".to_string(),
24235 }))
24236 } else {
24237 Ok(e)
24238 }
24239 }
24240
24241 Action::MysqlNullsOrdering => {
24242 // MySQL doesn't support NULLS FIRST/LAST - strip or rewrite
24243 if let Expression::Ordered(mut o) = e {
24244 let nulls_last = o.nulls_first == Some(false);
24245 let desc = o.desc;
24246 // MySQL default: ASC -> NULLS LAST, DESC -> NULLS FIRST
24247 // If requested ordering matches default, just strip NULLS clause
24248 let matches_default = if desc {
24249 // DESC default is NULLS FIRST, so nulls_first=true matches
24250 o.nulls_first == Some(true)
24251 } else {
24252 // ASC default is NULLS LAST, so nulls_first=false matches
24253 nulls_last
24254 };
24255 if matches_default {
24256 o.nulls_first = None;
24257 Ok(Expression::Ordered(o))
24258 } else {
24259 // Need CASE WHEN x IS NULL THEN 0/1 ELSE 0/1 END, x
24260 // For ASC NULLS FIRST: ORDER BY CASE WHEN x IS NULL THEN 0 ELSE 1 END, x ASC
24261 // For DESC NULLS LAST: ORDER BY CASE WHEN x IS NULL THEN 1 ELSE 0 END, x DESC
24262 let null_val = if desc { 1 } else { 0 };
24263 let non_null_val = if desc { 0 } else { 1 };
24264 let _case_expr = Expression::Case(Box::new(Case {
24265 operand: None,
24266 whens: vec![(
24267 Expression::IsNull(Box::new(crate::expressions::IsNull {
24268 this: o.this.clone(),
24269 not: false,
24270 postfix_form: false,
24271 })),
24272 Expression::number(null_val),
24273 )],
24274 else_: Some(Expression::number(non_null_val)),
24275 comments: Vec::new(),
24276 inferred_type: None,
24277 }));
24278 o.nulls_first = None;
24279 // Return a tuple of [case_expr, ordered_expr]
24280 // We need to return both as part of the ORDER BY
24281 // But since transform_recursive processes individual expressions,
24282 // we can't easily add extra ORDER BY items here.
24283 // Instead, strip the nulls_first
24284 o.nulls_first = None;
24285 Ok(Expression::Ordered(o))
24286 }
24287 } else {
24288 Ok(e)
24289 }
24290 }
24291
24292 Action::MysqlNullsLastRewrite => {
24293 // DuckDB -> MySQL: Add CASE WHEN IS NULL THEN 1 ELSE 0 END to ORDER BY
24294 // to simulate NULLS LAST for ASC ordering
24295 if let Expression::WindowFunction(mut wf) = e {
24296 let mut new_order_by = Vec::new();
24297 for o in wf.over.order_by {
24298 if !o.desc {
24299 // ASC: DuckDB has NULLS LAST, MySQL has NULLS FIRST
24300 // Add CASE WHEN expr IS NULL THEN 1 ELSE 0 END before expr
24301 let case_expr = Expression::Case(Box::new(Case {
24302 operand: None,
24303 whens: vec![(
24304 Expression::IsNull(Box::new(crate::expressions::IsNull {
24305 this: o.this.clone(),
24306 not: false,
24307 postfix_form: false,
24308 })),
24309 Expression::Literal(Box::new(Literal::Number(
24310 "1".to_string(),
24311 ))),
24312 )],
24313 else_: Some(Expression::Literal(Box::new(Literal::Number(
24314 "0".to_string(),
24315 )))),
24316 comments: Vec::new(),
24317 inferred_type: None,
24318 }));
24319 new_order_by.push(crate::expressions::Ordered {
24320 this: case_expr,
24321 desc: false,
24322 nulls_first: None,
24323 explicit_asc: false,
24324 with_fill: None,
24325 });
24326 let mut ordered = o;
24327 ordered.nulls_first = None;
24328 new_order_by.push(ordered);
24329 } else {
24330 // DESC: DuckDB has NULLS LAST, MySQL also has NULLS LAST (NULLs smallest in DESC)
24331 // No change needed
24332 let mut ordered = o;
24333 ordered.nulls_first = None;
24334 new_order_by.push(ordered);
24335 }
24336 }
24337 wf.over.order_by = new_order_by;
24338 Ok(Expression::WindowFunction(wf))
24339 } else {
24340 Ok(e)
24341 }
24342 }
24343
24344 Action::RespectNullsConvert => {
24345 // RESPECT NULLS -> strip for SQLite (FIRST_VALUE(c) OVER (...))
24346 if let Expression::WindowFunction(mut wf) = e {
24347 match &mut wf.this {
24348 Expression::FirstValue(ref mut vf) => {
24349 if vf.ignore_nulls == Some(false) {
24350 vf.ignore_nulls = None;
24351 // For SQLite, we'd need to add NULLS LAST to ORDER BY in the OVER clause
24352 // but that's handled by the generator's NULLS ordering
24353 }
24354 }
24355 Expression::LastValue(ref mut vf) => {
24356 if vf.ignore_nulls == Some(false) {
24357 vf.ignore_nulls = None;
24358 }
24359 }
24360 _ => {}
24361 }
24362 Ok(Expression::WindowFunction(wf))
24363 } else {
24364 Ok(e)
24365 }
24366 }
24367
24368 Action::SnowflakeWindowFrameStrip => {
24369 // Strip the default ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
24370 // for FIRST_VALUE/LAST_VALUE/NTH_VALUE when targeting Snowflake
24371 if let Expression::WindowFunction(mut wf) = e {
24372 wf.over.frame = None;
24373 Ok(Expression::WindowFunction(wf))
24374 } else {
24375 Ok(e)
24376 }
24377 }
24378
24379 Action::SnowflakeWindowFrameAdd => {
24380 // Add default ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
24381 // for FIRST_VALUE/LAST_VALUE/NTH_VALUE when transpiling from Snowflake to non-Snowflake
24382 if let Expression::WindowFunction(mut wf) = e {
24383 wf.over.frame = Some(crate::expressions::WindowFrame {
24384 kind: crate::expressions::WindowFrameKind::Rows,
24385 start: crate::expressions::WindowFrameBound::UnboundedPreceding,
24386 end: Some(crate::expressions::WindowFrameBound::UnboundedFollowing),
24387 exclude: None,
24388 kind_text: None,
24389 start_side_text: None,
24390 end_side_text: None,
24391 });
24392 Ok(Expression::WindowFunction(wf))
24393 } else {
24394 Ok(e)
24395 }
24396 }
24397
24398 Action::CreateTableStripComment => {
24399 // Strip COMMENT column constraint, USING, PARTITIONED BY for DuckDB
24400 if let Expression::CreateTable(mut ct) = e {
24401 for col in &mut ct.columns {
24402 col.comment = None;
24403 col.constraints.retain(|c| {
24404 !matches!(c, crate::expressions::ColumnConstraint::Comment(_))
24405 });
24406 // Also remove Comment from constraint_order
24407 col.constraint_order.retain(|c| {
24408 !matches!(c, crate::expressions::ConstraintType::Comment)
24409 });
24410 }
24411 // Strip properties (USING, PARTITIONED BY, etc.)
24412 ct.properties.clear();
24413 Ok(Expression::CreateTable(ct))
24414 } else {
24415 Ok(e)
24416 }
24417 }
24418
24419 Action::AlterTableToSpRename => {
24420 // ALTER TABLE db.t1 RENAME TO db.t2 -> EXEC sp_rename 'db.t1', 't2'
24421 if let Expression::AlterTable(ref at) = e {
24422 if let Some(crate::expressions::AlterTableAction::RenameTable(
24423 ref new_tbl,
24424 )) = at.actions.first()
24425 {
24426 // Build the old table name using TSQL bracket quoting
24427 let old_name = if let Some(ref schema) = at.name.schema {
24428 if at.name.name.quoted || schema.quoted {
24429 format!("[{}].[{}]", schema.name, at.name.name.name)
24430 } else {
24431 format!("{}.{}", schema.name, at.name.name.name)
24432 }
24433 } else {
24434 if at.name.name.quoted {
24435 format!("[{}]", at.name.name.name)
24436 } else {
24437 at.name.name.name.clone()
24438 }
24439 };
24440 let new_name = new_tbl.name.name.clone();
24441 // EXEC sp_rename 'old_name', 'new_name'
24442 let sql = format!("EXEC sp_rename '{}', '{}'", old_name, new_name);
24443 Ok(Expression::Raw(crate::expressions::Raw { sql }))
24444 } else {
24445 Ok(e)
24446 }
24447 } else {
24448 Ok(e)
24449 }
24450 }
24451
24452 Action::SnowflakeIntervalFormat => {
24453 // INTERVAL '2' HOUR -> INTERVAL '2 HOUR' for Snowflake
24454 if let Expression::Interval(mut iv) = e {
24455 if let (Some(Expression::Literal(lit)), Some(ref unit_spec)) =
24456 (&iv.this, &iv.unit)
24457 {
24458 if let Literal::String(ref val) = lit.as_ref() {
24459 let unit_str = match unit_spec {
24460 crate::expressions::IntervalUnitSpec::Simple {
24461 unit, ..
24462 } => match unit {
24463 crate::expressions::IntervalUnit::Year => "YEAR",
24464 crate::expressions::IntervalUnit::Quarter => "QUARTER",
24465 crate::expressions::IntervalUnit::Month => "MONTH",
24466 crate::expressions::IntervalUnit::Week => "WEEK",
24467 crate::expressions::IntervalUnit::Day => "DAY",
24468 crate::expressions::IntervalUnit::Hour => "HOUR",
24469 crate::expressions::IntervalUnit::Minute => "MINUTE",
24470 crate::expressions::IntervalUnit::Second => "SECOND",
24471 crate::expressions::IntervalUnit::Millisecond => {
24472 "MILLISECOND"
24473 }
24474 crate::expressions::IntervalUnit::Microsecond => {
24475 "MICROSECOND"
24476 }
24477 crate::expressions::IntervalUnit::Nanosecond => {
24478 "NANOSECOND"
24479 }
24480 },
24481 _ => "",
24482 };
24483 if !unit_str.is_empty() {
24484 let combined = format!("{} {}", val, unit_str);
24485 iv.this = Some(Expression::Literal(Box::new(Literal::String(
24486 combined,
24487 ))));
24488 iv.unit = None;
24489 }
24490 }
24491 }
24492 Ok(Expression::Interval(iv))
24493 } else {
24494 Ok(e)
24495 }
24496 }
24497
24498 Action::ArrayConcatBracketConvert => {
24499 // Expression::Array/ArrayFunc -> target-specific
24500 // For PostgreSQL: Array -> ArrayFunc (bracket_notation: false)
24501 // For Redshift: Array/ArrayFunc -> Function("ARRAY", args) to produce ARRAY(1, 2) with parens
24502 match e {
24503 Expression::Array(arr) => {
24504 if matches!(target, DialectType::Redshift) {
24505 Ok(Expression::Function(Box::new(Function::new(
24506 "ARRAY".to_string(),
24507 arr.expressions,
24508 ))))
24509 } else {
24510 Ok(Expression::ArrayFunc(Box::new(
24511 crate::expressions::ArrayConstructor {
24512 expressions: arr.expressions,
24513 bracket_notation: false,
24514 use_list_keyword: false,
24515 },
24516 )))
24517 }
24518 }
24519 Expression::ArrayFunc(arr) => {
24520 // Only for Redshift: convert bracket-notation ArrayFunc to Function("ARRAY")
24521 if matches!(target, DialectType::Redshift) {
24522 Ok(Expression::Function(Box::new(Function::new(
24523 "ARRAY".to_string(),
24524 arr.expressions,
24525 ))))
24526 } else {
24527 Ok(Expression::ArrayFunc(arr))
24528 }
24529 }
24530 _ => Ok(e),
24531 }
24532 }
24533
24534 Action::BitAggFloatCast => {
24535 // BIT_OR/BIT_AND/BIT_XOR with float/decimal cast arg -> wrap with ROUND+INT cast for DuckDB
24536 // For FLOAT/DOUBLE/REAL: CAST(ROUND(CAST(val AS type)) AS INT)
24537 // For DECIMAL: CAST(CAST(val AS DECIMAL(p,s)) AS INT)
24538 let int_type = DataType::Int {
24539 length: None,
24540 integer_spelling: false,
24541 };
24542 let wrap_agg = |agg_this: Expression, int_dt: DataType| -> Expression {
24543 if let Expression::Cast(c) = agg_this {
24544 match &c.to {
24545 DataType::Float { .. }
24546 | DataType::Double { .. }
24547 | DataType::Custom { .. } => {
24548 // FLOAT/DOUBLE/REAL: CAST(ROUND(CAST(val AS type)) AS INT)
24549 // Change FLOAT to REAL (Float with real_spelling=true) for DuckDB generator
24550 let inner_type = match &c.to {
24551 DataType::Float {
24552 precision, scale, ..
24553 } => DataType::Float {
24554 precision: *precision,
24555 scale: *scale,
24556 real_spelling: true,
24557 },
24558 other => other.clone(),
24559 };
24560 let inner_cast =
24561 Expression::Cast(Box::new(crate::expressions::Cast {
24562 this: c.this.clone(),
24563 to: inner_type,
24564 trailing_comments: Vec::new(),
24565 double_colon_syntax: false,
24566 format: None,
24567 default: None,
24568 inferred_type: None,
24569 }));
24570 let rounded = Expression::Function(Box::new(Function::new(
24571 "ROUND".to_string(),
24572 vec![inner_cast],
24573 )));
24574 Expression::Cast(Box::new(crate::expressions::Cast {
24575 this: rounded,
24576 to: int_dt,
24577 trailing_comments: Vec::new(),
24578 double_colon_syntax: false,
24579 format: None,
24580 default: None,
24581 inferred_type: None,
24582 }))
24583 }
24584 DataType::Decimal { .. } => {
24585 // DECIMAL: CAST(CAST(val AS DECIMAL(p,s)) AS INT)
24586 Expression::Cast(Box::new(crate::expressions::Cast {
24587 this: Expression::Cast(c),
24588 to: int_dt,
24589 trailing_comments: Vec::new(),
24590 double_colon_syntax: false,
24591 format: None,
24592 default: None,
24593 inferred_type: None,
24594 }))
24595 }
24596 _ => Expression::Cast(c),
24597 }
24598 } else {
24599 agg_this
24600 }
24601 };
24602 match e {
24603 Expression::BitwiseOrAgg(mut f) => {
24604 f.this = wrap_agg(f.this, int_type);
24605 Ok(Expression::BitwiseOrAgg(f))
24606 }
24607 Expression::BitwiseAndAgg(mut f) => {
24608 let int_type = DataType::Int {
24609 length: None,
24610 integer_spelling: false,
24611 };
24612 f.this = wrap_agg(f.this, int_type);
24613 Ok(Expression::BitwiseAndAgg(f))
24614 }
24615 Expression::BitwiseXorAgg(mut f) => {
24616 let int_type = DataType::Int {
24617 length: None,
24618 integer_spelling: false,
24619 };
24620 f.this = wrap_agg(f.this, int_type);
24621 Ok(Expression::BitwiseXorAgg(f))
24622 }
24623 _ => Ok(e),
24624 }
24625 }
24626
24627 Action::BitAggSnowflakeRename => {
24628 // BIT_OR -> BITORAGG, BIT_AND -> BITANDAGG, BIT_XOR -> BITXORAGG for Snowflake
24629 match e {
24630 Expression::BitwiseOrAgg(f) => Ok(Expression::Function(Box::new(
24631 Function::new("BITORAGG".to_string(), vec![f.this]),
24632 ))),
24633 Expression::BitwiseAndAgg(f) => Ok(Expression::Function(Box::new(
24634 Function::new("BITANDAGG".to_string(), vec![f.this]),
24635 ))),
24636 Expression::BitwiseXorAgg(f) => Ok(Expression::Function(Box::new(
24637 Function::new("BITXORAGG".to_string(), vec![f.this]),
24638 ))),
24639 _ => Ok(e),
24640 }
24641 }
24642
24643 Action::StrftimeCastTimestamp => {
24644 // CAST(x AS TIMESTAMP) -> CAST(x AS TIMESTAMP_NTZ) for Spark
24645 if let Expression::Cast(mut c) = e {
24646 if matches!(
24647 c.to,
24648 DataType::Timestamp {
24649 timezone: false,
24650 ..
24651 }
24652 ) {
24653 c.to = DataType::Custom {
24654 name: "TIMESTAMP_NTZ".to_string(),
24655 };
24656 }
24657 Ok(Expression::Cast(c))
24658 } else {
24659 Ok(e)
24660 }
24661 }
24662
24663 Action::DecimalDefaultPrecision => {
24664 // DECIMAL without precision -> DECIMAL(18, 3) for Snowflake
24665 if let Expression::Cast(mut c) = e {
24666 if matches!(
24667 c.to,
24668 DataType::Decimal {
24669 precision: None,
24670 ..
24671 }
24672 ) {
24673 c.to = DataType::Decimal {
24674 precision: Some(18),
24675 scale: Some(3),
24676 };
24677 }
24678 Ok(Expression::Cast(c))
24679 } else {
24680 Ok(e)
24681 }
24682 }
24683
24684 Action::FilterToIff => {
24685 // FILTER(WHERE cond) -> rewrite aggregate: AGG(IFF(cond, val, NULL))
24686 if let Expression::Filter(f) = e {
24687 let condition = *f.expression;
24688 let agg = *f.this;
24689 // Strip WHERE from condition
24690 let cond = match condition {
24691 Expression::Where(w) => w.this,
24692 other => other,
24693 };
24694 // Extract the aggregate function and its argument
24695 // We want AVG(IFF(condition, x, NULL))
24696 match agg {
24697 Expression::Function(mut func) => {
24698 if !func.args.is_empty() {
24699 let orig_arg = func.args[0].clone();
24700 let iff_call = Expression::Function(Box::new(Function::new(
24701 "IFF".to_string(),
24702 vec![cond, orig_arg, Expression::Null(Null)],
24703 )));
24704 func.args[0] = iff_call;
24705 Ok(Expression::Function(func))
24706 } else {
24707 Ok(Expression::Filter(Box::new(crate::expressions::Filter {
24708 this: Box::new(Expression::Function(func)),
24709 expression: Box::new(cond),
24710 })))
24711 }
24712 }
24713 Expression::Avg(mut avg) => {
24714 let iff_call = Expression::Function(Box::new(Function::new(
24715 "IFF".to_string(),
24716 vec![cond, avg.this.clone(), Expression::Null(Null)],
24717 )));
24718 avg.this = iff_call;
24719 Ok(Expression::Avg(avg))
24720 }
24721 Expression::Sum(mut s) => {
24722 let iff_call = Expression::Function(Box::new(Function::new(
24723 "IFF".to_string(),
24724 vec![cond, s.this.clone(), Expression::Null(Null)],
24725 )));
24726 s.this = iff_call;
24727 Ok(Expression::Sum(s))
24728 }
24729 Expression::Count(mut c) => {
24730 if let Some(ref this_expr) = c.this {
24731 let iff_call = Expression::Function(Box::new(Function::new(
24732 "IFF".to_string(),
24733 vec![cond, this_expr.clone(), Expression::Null(Null)],
24734 )));
24735 c.this = Some(iff_call);
24736 }
24737 Ok(Expression::Count(c))
24738 }
24739 other => {
24740 // Fallback: keep as Filter
24741 Ok(Expression::Filter(Box::new(crate::expressions::Filter {
24742 this: Box::new(other),
24743 expression: Box::new(cond),
24744 })))
24745 }
24746 }
24747 } else {
24748 Ok(e)
24749 }
24750 }
24751
24752 Action::AggFilterToIff => {
24753 // AggFunc.filter -> IFF wrapping: AVG(x) FILTER(WHERE cond) -> AVG(IFF(cond, x, NULL))
24754 // Helper macro to handle the common AggFunc case
24755 macro_rules! handle_agg_filter_to_iff {
24756 ($variant:ident, $agg:expr) => {{
24757 let mut agg = $agg;
24758 if let Some(filter_cond) = agg.filter.take() {
24759 let iff_call = Expression::Function(Box::new(Function::new(
24760 "IFF".to_string(),
24761 vec![filter_cond, agg.this.clone(), Expression::Null(Null)],
24762 )));
24763 agg.this = iff_call;
24764 }
24765 Ok(Expression::$variant(agg))
24766 }};
24767 }
24768
24769 match e {
24770 Expression::Avg(agg) => handle_agg_filter_to_iff!(Avg, agg),
24771 Expression::Sum(agg) => handle_agg_filter_to_iff!(Sum, agg),
24772 Expression::Min(agg) => handle_agg_filter_to_iff!(Min, agg),
24773 Expression::Max(agg) => handle_agg_filter_to_iff!(Max, agg),
24774 Expression::ArrayAgg(agg) => handle_agg_filter_to_iff!(ArrayAgg, agg),
24775 Expression::CountIf(agg) => handle_agg_filter_to_iff!(CountIf, agg),
24776 Expression::Stddev(agg) => handle_agg_filter_to_iff!(Stddev, agg),
24777 Expression::StddevPop(agg) => handle_agg_filter_to_iff!(StddevPop, agg),
24778 Expression::StddevSamp(agg) => handle_agg_filter_to_iff!(StddevSamp, agg),
24779 Expression::Variance(agg) => handle_agg_filter_to_iff!(Variance, agg),
24780 Expression::VarPop(agg) => handle_agg_filter_to_iff!(VarPop, agg),
24781 Expression::VarSamp(agg) => handle_agg_filter_to_iff!(VarSamp, agg),
24782 Expression::Median(agg) => handle_agg_filter_to_iff!(Median, agg),
24783 Expression::Mode(agg) => handle_agg_filter_to_iff!(Mode, agg),
24784 Expression::First(agg) => handle_agg_filter_to_iff!(First, agg),
24785 Expression::Last(agg) => handle_agg_filter_to_iff!(Last, agg),
24786 Expression::AnyValue(agg) => handle_agg_filter_to_iff!(AnyValue, agg),
24787 Expression::ApproxDistinct(agg) => {
24788 handle_agg_filter_to_iff!(ApproxDistinct, agg)
24789 }
24790 Expression::Count(mut c) => {
24791 if let Some(filter_cond) = c.filter.take() {
24792 if let Some(ref this_expr) = c.this {
24793 let iff_call = Expression::Function(Box::new(Function::new(
24794 "IFF".to_string(),
24795 vec![
24796 filter_cond,
24797 this_expr.clone(),
24798 Expression::Null(Null),
24799 ],
24800 )));
24801 c.this = Some(iff_call);
24802 }
24803 }
24804 Ok(Expression::Count(c))
24805 }
24806 other => Ok(other),
24807 }
24808 }
24809
24810 Action::JsonToGetPath => {
24811 // JSON_EXTRACT(x, '$.key') -> GET_PATH(PARSE_JSON(x), 'key')
24812 if let Expression::JsonExtract(je) = e {
24813 // Convert to PARSE_JSON() wrapper:
24814 // - JSON(x) -> PARSE_JSON(x)
24815 // - PARSE_JSON(x) -> keep as-is
24816 // - anything else -> wrap in PARSE_JSON()
24817 let this = match &je.this {
24818 Expression::Function(f)
24819 if f.name.eq_ignore_ascii_case("JSON") && f.args.len() == 1 =>
24820 {
24821 Expression::Function(Box::new(Function::new(
24822 "PARSE_JSON".to_string(),
24823 f.args.clone(),
24824 )))
24825 }
24826 Expression::Function(f)
24827 if f.name.eq_ignore_ascii_case("PARSE_JSON") =>
24828 {
24829 je.this.clone()
24830 }
24831 // GET_PATH result is already JSON, don't wrap
24832 Expression::Function(f) if f.name.eq_ignore_ascii_case("GET_PATH") => {
24833 je.this.clone()
24834 }
24835 other => {
24836 // Wrap non-JSON expressions in PARSE_JSON()
24837 Expression::Function(Box::new(Function::new(
24838 "PARSE_JSON".to_string(),
24839 vec![other.clone()],
24840 )))
24841 }
24842 };
24843 // Convert path: extract key from JSONPath or strip $. prefix from string
24844 let path = match &je.path {
24845 Expression::JSONPath(jp) => {
24846 // Extract the key from JSONPath: $root.key -> 'key'
24847 let mut key_parts = Vec::new();
24848 for expr in &jp.expressions {
24849 match expr {
24850 Expression::JSONPathRoot(_) => {} // skip root
24851 Expression::JSONPathKey(k) => {
24852 if let Expression::Literal(lit) = &*k.this {
24853 if let Literal::String(s) = lit.as_ref() {
24854 key_parts.push(s.clone());
24855 }
24856 }
24857 }
24858 _ => {}
24859 }
24860 }
24861 if !key_parts.is_empty() {
24862 Expression::Literal(Box::new(Literal::String(
24863 key_parts.join("."),
24864 )))
24865 } else {
24866 je.path.clone()
24867 }
24868 }
24869 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(s) if s.starts_with("$.")) =>
24870 {
24871 let Literal::String(s) = lit.as_ref() else {
24872 unreachable!()
24873 };
24874 let stripped = Self::strip_json_wildcards(&s[2..].to_string());
24875 Expression::Literal(Box::new(Literal::String(stripped)))
24876 }
24877 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(s) if s.starts_with('$')) =>
24878 {
24879 let Literal::String(s) = lit.as_ref() else {
24880 unreachable!()
24881 };
24882 let stripped = Self::strip_json_wildcards(&s[1..].to_string());
24883 Expression::Literal(Box::new(Literal::String(stripped)))
24884 }
24885 _ => je.path.clone(),
24886 };
24887 Ok(Expression::Function(Box::new(Function::new(
24888 "GET_PATH".to_string(),
24889 vec![this, path],
24890 ))))
24891 } else {
24892 Ok(e)
24893 }
24894 }
24895
24896 Action::StructToRow => {
24897 // DuckDB struct/dict -> BigQuery STRUCT(value AS key, ...) / Presto ROW
24898 // Handles both Expression::Struct and Expression::MapFunc(curly_brace_syntax=true)
24899
24900 // Extract key-value pairs from either Struct or MapFunc
24901 let kv_pairs: Option<Vec<(String, Expression)>> = match &e {
24902 Expression::Struct(s) => Some(
24903 s.fields
24904 .iter()
24905 .map(|(opt_name, field_expr)| {
24906 if let Some(name) = opt_name {
24907 (name.clone(), field_expr.clone())
24908 } else if let Expression::NamedArgument(na) = field_expr {
24909 (na.name.name.clone(), na.value.clone())
24910 } else {
24911 (String::new(), field_expr.clone())
24912 }
24913 })
24914 .collect(),
24915 ),
24916 Expression::MapFunc(m) if m.curly_brace_syntax => Some(
24917 m.keys
24918 .iter()
24919 .zip(m.values.iter())
24920 .map(|(key, value)| {
24921 let key_name = match key {
24922 Expression::Literal(lit)
24923 if matches!(lit.as_ref(), Literal::String(_)) =>
24924 {
24925 let Literal::String(s) = lit.as_ref() else {
24926 unreachable!()
24927 };
24928 s.clone()
24929 }
24930 Expression::Identifier(id) => id.name.clone(),
24931 _ => String::new(),
24932 };
24933 (key_name, value.clone())
24934 })
24935 .collect(),
24936 ),
24937 _ => None,
24938 };
24939
24940 if let Some(pairs) = kv_pairs {
24941 let mut named_args = Vec::new();
24942 for (key_name, value) in pairs {
24943 if matches!(target, DialectType::BigQuery) && !key_name.is_empty() {
24944 named_args.push(Expression::Alias(Box::new(
24945 crate::expressions::Alias::new(
24946 value,
24947 Identifier::new(key_name),
24948 ),
24949 )));
24950 } else if matches!(target, DialectType::Presto | DialectType::Trino) {
24951 named_args.push(value);
24952 } else {
24953 named_args.push(value);
24954 }
24955 }
24956
24957 if matches!(target, DialectType::BigQuery) {
24958 Ok(Expression::Function(Box::new(Function::new(
24959 "STRUCT".to_string(),
24960 named_args,
24961 ))))
24962 } else if matches!(target, DialectType::Presto | DialectType::Trino) {
24963 // For Presto/Trino, infer types and wrap in CAST(ROW(...) AS ROW(name TYPE, ...))
24964 let row_func = Expression::Function(Box::new(Function::new(
24965 "ROW".to_string(),
24966 named_args,
24967 )));
24968
24969 // Try to infer types for each pair
24970 let kv_pairs_again: Option<Vec<(String, Expression)>> = match &e {
24971 Expression::Struct(s) => Some(
24972 s.fields
24973 .iter()
24974 .map(|(opt_name, field_expr)| {
24975 if let Some(name) = opt_name {
24976 (name.clone(), field_expr.clone())
24977 } else if let Expression::NamedArgument(na) = field_expr
24978 {
24979 (na.name.name.clone(), na.value.clone())
24980 } else {
24981 (String::new(), field_expr.clone())
24982 }
24983 })
24984 .collect(),
24985 ),
24986 Expression::MapFunc(m) if m.curly_brace_syntax => Some(
24987 m.keys
24988 .iter()
24989 .zip(m.values.iter())
24990 .map(|(key, value)| {
24991 let key_name = match key {
24992 Expression::Literal(lit)
24993 if matches!(
24994 lit.as_ref(),
24995 Literal::String(_)
24996 ) =>
24997 {
24998 let Literal::String(s) = lit.as_ref() else {
24999 unreachable!()
25000 };
25001 s.clone()
25002 }
25003 Expression::Identifier(id) => id.name.clone(),
25004 _ => String::new(),
25005 };
25006 (key_name, value.clone())
25007 })
25008 .collect(),
25009 ),
25010 _ => None,
25011 };
25012
25013 if let Some(pairs) = kv_pairs_again {
25014 // Infer types for all values
25015 let mut all_inferred = true;
25016 let mut fields = Vec::new();
25017 for (name, value) in &pairs {
25018 let inferred_type = match value {
25019 Expression::Literal(lit)
25020 if matches!(lit.as_ref(), Literal::Number(_)) =>
25021 {
25022 let Literal::Number(n) = lit.as_ref() else {
25023 unreachable!()
25024 };
25025 if n.contains('.') {
25026 Some(DataType::Double {
25027 precision: None,
25028 scale: None,
25029 })
25030 } else {
25031 Some(DataType::Int {
25032 length: None,
25033 integer_spelling: true,
25034 })
25035 }
25036 }
25037 Expression::Literal(lit)
25038 if matches!(lit.as_ref(), Literal::String(_)) =>
25039 {
25040 Some(DataType::VarChar {
25041 length: None,
25042 parenthesized_length: false,
25043 })
25044 }
25045 Expression::Boolean(_) => Some(DataType::Boolean),
25046 _ => None,
25047 };
25048 if let Some(dt) = inferred_type {
25049 fields.push(crate::expressions::StructField::new(
25050 name.clone(),
25051 dt,
25052 ));
25053 } else {
25054 all_inferred = false;
25055 break;
25056 }
25057 }
25058
25059 if all_inferred && !fields.is_empty() {
25060 let row_type = DataType::Struct {
25061 fields,
25062 nested: true,
25063 };
25064 Ok(Expression::Cast(Box::new(Cast {
25065 this: row_func,
25066 to: row_type,
25067 trailing_comments: Vec::new(),
25068 double_colon_syntax: false,
25069 format: None,
25070 default: None,
25071 inferred_type: None,
25072 })))
25073 } else {
25074 Ok(row_func)
25075 }
25076 } else {
25077 Ok(row_func)
25078 }
25079 } else {
25080 Ok(Expression::Function(Box::new(Function::new(
25081 "ROW".to_string(),
25082 named_args,
25083 ))))
25084 }
25085 } else {
25086 Ok(e)
25087 }
25088 }
25089
25090 Action::SparkStructConvert => {
25091 // Spark STRUCT(val AS name, ...) -> Presto CAST(ROW(...) AS ROW(name TYPE, ...))
25092 // or DuckDB {'name': val, ...}
25093 if let Expression::Function(f) = e {
25094 // Extract name-value pairs from aliased args
25095 let mut pairs: Vec<(String, Expression)> = Vec::new();
25096 for arg in &f.args {
25097 match arg {
25098 Expression::Alias(a) => {
25099 pairs.push((a.alias.name.clone(), a.this.clone()));
25100 }
25101 _ => {
25102 pairs.push((String::new(), arg.clone()));
25103 }
25104 }
25105 }
25106
25107 match target {
25108 DialectType::DuckDB => {
25109 // Convert to DuckDB struct literal {'name': value, ...}
25110 let mut keys = Vec::new();
25111 let mut values = Vec::new();
25112 for (name, value) in &pairs {
25113 keys.push(Expression::Literal(Box::new(Literal::String(
25114 name.clone(),
25115 ))));
25116 values.push(value.clone());
25117 }
25118 Ok(Expression::MapFunc(Box::new(
25119 crate::expressions::MapConstructor {
25120 keys,
25121 values,
25122 curly_brace_syntax: true,
25123 with_map_keyword: false,
25124 },
25125 )))
25126 }
25127 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
25128 // Convert to CAST(ROW(val1, val2) AS ROW(name1 TYPE1, name2 TYPE2))
25129 let row_args: Vec<Expression> =
25130 pairs.iter().map(|(_, v)| v.clone()).collect();
25131 let row_func = Expression::Function(Box::new(Function::new(
25132 "ROW".to_string(),
25133 row_args,
25134 )));
25135
25136 // Infer types
25137 let mut all_inferred = true;
25138 let mut fields = Vec::new();
25139 for (name, value) in &pairs {
25140 let inferred_type = match value {
25141 Expression::Literal(lit)
25142 if matches!(lit.as_ref(), Literal::Number(_)) =>
25143 {
25144 let Literal::Number(n) = lit.as_ref() else {
25145 unreachable!()
25146 };
25147 if n.contains('.') {
25148 Some(DataType::Double {
25149 precision: None,
25150 scale: None,
25151 })
25152 } else {
25153 Some(DataType::Int {
25154 length: None,
25155 integer_spelling: true,
25156 })
25157 }
25158 }
25159 Expression::Literal(lit)
25160 if matches!(lit.as_ref(), Literal::String(_)) =>
25161 {
25162 Some(DataType::VarChar {
25163 length: None,
25164 parenthesized_length: false,
25165 })
25166 }
25167 Expression::Boolean(_) => Some(DataType::Boolean),
25168 _ => None,
25169 };
25170 if let Some(dt) = inferred_type {
25171 fields.push(crate::expressions::StructField::new(
25172 name.clone(),
25173 dt,
25174 ));
25175 } else {
25176 all_inferred = false;
25177 break;
25178 }
25179 }
25180
25181 if all_inferred && !fields.is_empty() {
25182 let row_type = DataType::Struct {
25183 fields,
25184 nested: true,
25185 };
25186 Ok(Expression::Cast(Box::new(Cast {
25187 this: row_func,
25188 to: row_type,
25189 trailing_comments: Vec::new(),
25190 double_colon_syntax: false,
25191 format: None,
25192 default: None,
25193 inferred_type: None,
25194 })))
25195 } else {
25196 Ok(row_func)
25197 }
25198 }
25199 _ => Ok(Expression::Function(f)),
25200 }
25201 } else {
25202 Ok(e)
25203 }
25204 }
25205
25206 Action::ApproxCountDistinctToApproxDistinct => {
25207 // APPROX_COUNT_DISTINCT(x) -> APPROX_DISTINCT(x)
25208 if let Expression::ApproxCountDistinct(f) = e {
25209 Ok(Expression::ApproxDistinct(f))
25210 } else {
25211 Ok(e)
25212 }
25213 }
25214
25215 Action::CollectListToArrayAgg => {
25216 // COLLECT_LIST(x) -> ARRAY_AGG(x) FILTER(WHERE x IS NOT NULL)
25217 if let Expression::AggregateFunction(f) = e {
25218 let filter_expr = if !f.args.is_empty() {
25219 let arg = f.args[0].clone();
25220 Some(Expression::IsNull(Box::new(crate::expressions::IsNull {
25221 this: arg,
25222 not: true,
25223 postfix_form: false,
25224 })))
25225 } else {
25226 None
25227 };
25228 let agg = crate::expressions::AggFunc {
25229 this: if f.args.is_empty() {
25230 Expression::Null(crate::expressions::Null)
25231 } else {
25232 f.args[0].clone()
25233 },
25234 distinct: f.distinct,
25235 order_by: f.order_by.clone(),
25236 filter: filter_expr,
25237 ignore_nulls: None,
25238 name: None,
25239 having_max: None,
25240 limit: None,
25241 inferred_type: None,
25242 };
25243 Ok(Expression::ArrayAgg(Box::new(agg)))
25244 } else {
25245 Ok(e)
25246 }
25247 }
25248
25249 Action::CollectSetConvert => {
25250 // COLLECT_SET(x) -> target-specific
25251 if let Expression::AggregateFunction(f) = e {
25252 match target {
25253 DialectType::Presto => Ok(Expression::AggregateFunction(Box::new(
25254 crate::expressions::AggregateFunction {
25255 name: "SET_AGG".to_string(),
25256 args: f.args,
25257 distinct: false,
25258 order_by: f.order_by,
25259 filter: f.filter,
25260 limit: f.limit,
25261 ignore_nulls: f.ignore_nulls,
25262 inferred_type: None,
25263 },
25264 ))),
25265 DialectType::Snowflake => Ok(Expression::AggregateFunction(Box::new(
25266 crate::expressions::AggregateFunction {
25267 name: "ARRAY_UNIQUE_AGG".to_string(),
25268 args: f.args,
25269 distinct: false,
25270 order_by: f.order_by,
25271 filter: f.filter,
25272 limit: f.limit,
25273 ignore_nulls: f.ignore_nulls,
25274 inferred_type: None,
25275 },
25276 ))),
25277 DialectType::Trino | DialectType::DuckDB => {
25278 let agg = crate::expressions::AggFunc {
25279 this: if f.args.is_empty() {
25280 Expression::Null(crate::expressions::Null)
25281 } else {
25282 f.args[0].clone()
25283 },
25284 distinct: true,
25285 order_by: Vec::new(),
25286 filter: None,
25287 ignore_nulls: None,
25288 name: None,
25289 having_max: None,
25290 limit: None,
25291 inferred_type: None,
25292 };
25293 Ok(Expression::ArrayAgg(Box::new(agg)))
25294 }
25295 _ => Ok(Expression::AggregateFunction(f)),
25296 }
25297 } else {
25298 Ok(e)
25299 }
25300 }
25301
25302 Action::PercentileConvert => {
25303 // PERCENTILE(x, 0.5) -> QUANTILE(x, 0.5) / APPROX_PERCENTILE(x, 0.5)
25304 if let Expression::AggregateFunction(f) = e {
25305 let name = match target {
25306 DialectType::DuckDB => "QUANTILE",
25307 DialectType::Presto | DialectType::Trino => "APPROX_PERCENTILE",
25308 _ => "PERCENTILE",
25309 };
25310 Ok(Expression::AggregateFunction(Box::new(
25311 crate::expressions::AggregateFunction {
25312 name: name.to_string(),
25313 args: f.args,
25314 distinct: f.distinct,
25315 order_by: f.order_by,
25316 filter: f.filter,
25317 limit: f.limit,
25318 ignore_nulls: f.ignore_nulls,
25319 inferred_type: None,
25320 },
25321 )))
25322 } else {
25323 Ok(e)
25324 }
25325 }
25326
25327 Action::CorrIsnanWrap => {
25328 // CORR(a, b) -> CASE WHEN ISNAN(CORR(a, b)) THEN NULL ELSE CORR(a, b) END
25329 // The CORR expression could be AggregateFunction, WindowFunction, or Filter-wrapped
25330 let corr_clone = e.clone();
25331 let isnan = Expression::Function(Box::new(Function::new(
25332 "ISNAN".to_string(),
25333 vec![corr_clone.clone()],
25334 )));
25335 let case_expr = Expression::Case(Box::new(Case {
25336 operand: None,
25337 whens: vec![(isnan, Expression::Null(crate::expressions::Null))],
25338 else_: Some(corr_clone),
25339 comments: Vec::new(),
25340 inferred_type: None,
25341 }));
25342 Ok(case_expr)
25343 }
25344
25345 Action::TruncToDateTrunc => {
25346 // TRUNC(timestamp, 'MONTH') -> DATE_TRUNC('MONTH', timestamp)
25347 if let Expression::Function(f) = e {
25348 if f.args.len() == 2 {
25349 let timestamp = f.args[0].clone();
25350 let unit_expr = f.args[1].clone();
25351
25352 if matches!(target, DialectType::ClickHouse) {
25353 // For ClickHouse, produce Expression::DateTrunc which the generator
25354 // outputs as DATE_TRUNC(...) without going through the ClickHouse
25355 // target transform that would convert it to dateTrunc
25356 let unit_str = Self::get_unit_str_static(&unit_expr);
25357 let dt_field = match unit_str.as_str() {
25358 "YEAR" => DateTimeField::Year,
25359 "MONTH" => DateTimeField::Month,
25360 "DAY" => DateTimeField::Day,
25361 "HOUR" => DateTimeField::Hour,
25362 "MINUTE" => DateTimeField::Minute,
25363 "SECOND" => DateTimeField::Second,
25364 "WEEK" => DateTimeField::Week,
25365 "QUARTER" => DateTimeField::Quarter,
25366 _ => DateTimeField::Custom(unit_str),
25367 };
25368 Ok(Expression::DateTrunc(Box::new(
25369 crate::expressions::DateTruncFunc {
25370 this: timestamp,
25371 unit: dt_field,
25372 },
25373 )))
25374 } else {
25375 let new_args = vec![unit_expr, timestamp];
25376 Ok(Expression::Function(Box::new(Function::new(
25377 "DATE_TRUNC".to_string(),
25378 new_args,
25379 ))))
25380 }
25381 } else {
25382 Ok(Expression::Function(f))
25383 }
25384 } else {
25385 Ok(e)
25386 }
25387 }
25388
25389 Action::ArrayContainsConvert => {
25390 if let Expression::ArrayContains(f) = e {
25391 match target {
25392 DialectType::Presto | DialectType::Trino => {
25393 // ARRAY_CONTAINS(arr, val) -> CONTAINS(arr, val)
25394 Ok(Expression::Function(Box::new(Function::new(
25395 "CONTAINS".to_string(),
25396 vec![f.this, f.expression],
25397 ))))
25398 }
25399 DialectType::Snowflake => {
25400 // ARRAY_CONTAINS(arr, val) -> ARRAY_CONTAINS(CAST(val AS VARIANT), arr)
25401 let cast_val =
25402 Expression::Cast(Box::new(crate::expressions::Cast {
25403 this: f.expression,
25404 to: crate::expressions::DataType::Custom {
25405 name: "VARIANT".to_string(),
25406 },
25407 trailing_comments: Vec::new(),
25408 double_colon_syntax: false,
25409 format: None,
25410 default: None,
25411 inferred_type: None,
25412 }));
25413 Ok(Expression::Function(Box::new(Function::new(
25414 "ARRAY_CONTAINS".to_string(),
25415 vec![cast_val, f.this],
25416 ))))
25417 }
25418 _ => Ok(Expression::ArrayContains(f)),
25419 }
25420 } else {
25421 Ok(e)
25422 }
25423 }
25424
25425 Action::ArrayExceptConvert => {
25426 if let Expression::ArrayExcept(f) = e {
25427 let source_arr = f.this;
25428 let exclude_arr = f.expression;
25429 match target {
25430 DialectType::DuckDB if matches!(source, DialectType::Snowflake) => {
25431 // Snowflake ARRAY_EXCEPT -> DuckDB bag semantics:
25432 // CASE WHEN source IS NULL OR exclude IS NULL THEN NULL
25433 // ELSE LIST_TRANSFORM(LIST_FILTER(
25434 // LIST_ZIP(source, GENERATE_SERIES(1, LENGTH(source))),
25435 // pair -> (LENGTH(LIST_FILTER(source[1:pair[2]], e -> e IS NOT DISTINCT FROM pair[1]))
25436 // > LENGTH(LIST_FILTER(exclude, e -> e IS NOT DISTINCT FROM pair[1])))),
25437 // pair -> pair[1])
25438 // END
25439
25440 // Build null check
25441 let source_is_null =
25442 Expression::IsNull(Box::new(crate::expressions::IsNull {
25443 this: source_arr.clone(),
25444 not: false,
25445 postfix_form: false,
25446 }));
25447 let exclude_is_null =
25448 Expression::IsNull(Box::new(crate::expressions::IsNull {
25449 this: exclude_arr.clone(),
25450 not: false,
25451 postfix_form: false,
25452 }));
25453 let null_check =
25454 Expression::Or(Box::new(crate::expressions::BinaryOp {
25455 left: source_is_null,
25456 right: exclude_is_null,
25457 left_comments: vec![],
25458 operator_comments: vec![],
25459 trailing_comments: vec![],
25460 inferred_type: None,
25461 }));
25462
25463 // GENERATE_SERIES(1, LENGTH(source))
25464 let gen_series = Expression::Function(Box::new(Function::new(
25465 "GENERATE_SERIES".to_string(),
25466 vec![
25467 Expression::number(1),
25468 Expression::Function(Box::new(Function::new(
25469 "LENGTH".to_string(),
25470 vec![source_arr.clone()],
25471 ))),
25472 ],
25473 )));
25474
25475 // LIST_ZIP(source, GENERATE_SERIES(1, LENGTH(source)))
25476 let list_zip = Expression::Function(Box::new(Function::new(
25477 "LIST_ZIP".to_string(),
25478 vec![source_arr.clone(), gen_series],
25479 )));
25480
25481 // pair[1] and pair[2]
25482 let pair_col = Expression::column("pair");
25483 let pair_1 = Expression::Subscript(Box::new(
25484 crate::expressions::Subscript {
25485 this: pair_col.clone(),
25486 index: Expression::number(1),
25487 },
25488 ));
25489 let pair_2 = Expression::Subscript(Box::new(
25490 crate::expressions::Subscript {
25491 this: pair_col.clone(),
25492 index: Expression::number(2),
25493 },
25494 ));
25495
25496 // source[1:pair[2]]
25497 let source_slice = Expression::ArraySlice(Box::new(
25498 crate::expressions::ArraySlice {
25499 this: source_arr.clone(),
25500 start: Some(Expression::number(1)),
25501 end: Some(pair_2),
25502 },
25503 ));
25504
25505 let e_col = Expression::column("e");
25506
25507 // e -> e IS NOT DISTINCT FROM pair[1]
25508 let inner_lambda1 =
25509 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
25510 parameters: vec![crate::expressions::Identifier::new("e")],
25511 body: Expression::NullSafeEq(Box::new(
25512 crate::expressions::BinaryOp {
25513 left: e_col.clone(),
25514 right: pair_1.clone(),
25515 left_comments: vec![],
25516 operator_comments: vec![],
25517 trailing_comments: vec![],
25518 inferred_type: None,
25519 },
25520 )),
25521 colon: false,
25522 parameter_types: vec![],
25523 }));
25524
25525 // LIST_FILTER(source[1:pair[2]], e -> e IS NOT DISTINCT FROM pair[1])
25526 let inner_filter1 = Expression::Function(Box::new(Function::new(
25527 "LIST_FILTER".to_string(),
25528 vec![source_slice, inner_lambda1],
25529 )));
25530
25531 // LENGTH(LIST_FILTER(source[1:pair[2]], ...))
25532 let len1 = Expression::Function(Box::new(Function::new(
25533 "LENGTH".to_string(),
25534 vec![inner_filter1],
25535 )));
25536
25537 // e -> e IS NOT DISTINCT FROM pair[1]
25538 let inner_lambda2 =
25539 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
25540 parameters: vec![crate::expressions::Identifier::new("e")],
25541 body: Expression::NullSafeEq(Box::new(
25542 crate::expressions::BinaryOp {
25543 left: e_col,
25544 right: pair_1.clone(),
25545 left_comments: vec![],
25546 operator_comments: vec![],
25547 trailing_comments: vec![],
25548 inferred_type: None,
25549 },
25550 )),
25551 colon: false,
25552 parameter_types: vec![],
25553 }));
25554
25555 // LIST_FILTER(exclude, e -> e IS NOT DISTINCT FROM pair[1])
25556 let inner_filter2 = Expression::Function(Box::new(Function::new(
25557 "LIST_FILTER".to_string(),
25558 vec![exclude_arr.clone(), inner_lambda2],
25559 )));
25560
25561 // LENGTH(LIST_FILTER(exclude, ...))
25562 let len2 = Expression::Function(Box::new(Function::new(
25563 "LENGTH".to_string(),
25564 vec![inner_filter2],
25565 )));
25566
25567 // (LENGTH(...) > LENGTH(...))
25568 let cond = Expression::Paren(Box::new(Paren {
25569 this: Expression::Gt(Box::new(crate::expressions::BinaryOp {
25570 left: len1,
25571 right: len2,
25572 left_comments: vec![],
25573 operator_comments: vec![],
25574 trailing_comments: vec![],
25575 inferred_type: None,
25576 })),
25577 trailing_comments: vec![],
25578 }));
25579
25580 // pair -> (condition)
25581 let filter_lambda =
25582 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
25583 parameters: vec![crate::expressions::Identifier::new(
25584 "pair",
25585 )],
25586 body: cond,
25587 colon: false,
25588 parameter_types: vec![],
25589 }));
25590
25591 // LIST_FILTER(LIST_ZIP(...), pair -> ...)
25592 let outer_filter = Expression::Function(Box::new(Function::new(
25593 "LIST_FILTER".to_string(),
25594 vec![list_zip, filter_lambda],
25595 )));
25596
25597 // pair -> pair[1]
25598 let transform_lambda =
25599 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
25600 parameters: vec![crate::expressions::Identifier::new(
25601 "pair",
25602 )],
25603 body: pair_1,
25604 colon: false,
25605 parameter_types: vec![],
25606 }));
25607
25608 // LIST_TRANSFORM(LIST_FILTER(...), pair -> pair[1])
25609 let list_transform = Expression::Function(Box::new(Function::new(
25610 "LIST_TRANSFORM".to_string(),
25611 vec![outer_filter, transform_lambda],
25612 )));
25613
25614 Ok(Expression::Case(Box::new(Case {
25615 operand: None,
25616 whens: vec![(null_check, Expression::Null(Null))],
25617 else_: Some(list_transform),
25618 comments: Vec::new(),
25619 inferred_type: None,
25620 })))
25621 }
25622 DialectType::DuckDB => {
25623 // ARRAY_EXCEPT(source, exclude) -> set semantics for DuckDB:
25624 // CASE WHEN source IS NULL OR exclude IS NULL THEN NULL
25625 // ELSE LIST_FILTER(LIST_DISTINCT(source),
25626 // e -> LENGTH(LIST_FILTER(exclude, x -> x IS NOT DISTINCT FROM e)) = 0)
25627 // END
25628
25629 // Build: source IS NULL
25630 let source_is_null =
25631 Expression::IsNull(Box::new(crate::expressions::IsNull {
25632 this: source_arr.clone(),
25633 not: false,
25634 postfix_form: false,
25635 }));
25636 // Build: exclude IS NULL
25637 let exclude_is_null =
25638 Expression::IsNull(Box::new(crate::expressions::IsNull {
25639 this: exclude_arr.clone(),
25640 not: false,
25641 postfix_form: false,
25642 }));
25643 // source IS NULL OR exclude IS NULL
25644 let null_check =
25645 Expression::Or(Box::new(crate::expressions::BinaryOp {
25646 left: source_is_null,
25647 right: exclude_is_null,
25648 left_comments: vec![],
25649 operator_comments: vec![],
25650 trailing_comments: vec![],
25651 inferred_type: None,
25652 }));
25653
25654 // LIST_DISTINCT(source)
25655 let list_distinct = Expression::Function(Box::new(Function::new(
25656 "LIST_DISTINCT".to_string(),
25657 vec![source_arr.clone()],
25658 )));
25659
25660 // x IS NOT DISTINCT FROM e
25661 let x_col = Expression::column("x");
25662 let e_col = Expression::column("e");
25663 let is_not_distinct = Expression::NullSafeEq(Box::new(
25664 crate::expressions::BinaryOp {
25665 left: x_col,
25666 right: e_col.clone(),
25667 left_comments: vec![],
25668 operator_comments: vec![],
25669 trailing_comments: vec![],
25670 inferred_type: None,
25671 },
25672 ));
25673
25674 // x -> x IS NOT DISTINCT FROM e
25675 let inner_lambda =
25676 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
25677 parameters: vec![crate::expressions::Identifier::new("x")],
25678 body: is_not_distinct,
25679 colon: false,
25680 parameter_types: vec![],
25681 }));
25682
25683 // LIST_FILTER(exclude, x -> x IS NOT DISTINCT FROM e)
25684 let inner_list_filter =
25685 Expression::Function(Box::new(Function::new(
25686 "LIST_FILTER".to_string(),
25687 vec![exclude_arr.clone(), inner_lambda],
25688 )));
25689
25690 // LENGTH(LIST_FILTER(exclude, x -> x IS NOT DISTINCT FROM e))
25691 let len_inner = Expression::Function(Box::new(Function::new(
25692 "LENGTH".to_string(),
25693 vec![inner_list_filter],
25694 )));
25695
25696 // LENGTH(...) = 0
25697 let eq_zero =
25698 Expression::Eq(Box::new(crate::expressions::BinaryOp {
25699 left: len_inner,
25700 right: Expression::number(0),
25701 left_comments: vec![],
25702 operator_comments: vec![],
25703 trailing_comments: vec![],
25704 inferred_type: None,
25705 }));
25706
25707 // e -> LENGTH(LIST_FILTER(...)) = 0
25708 let outer_lambda =
25709 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
25710 parameters: vec![crate::expressions::Identifier::new("e")],
25711 body: eq_zero,
25712 colon: false,
25713 parameter_types: vec![],
25714 }));
25715
25716 // LIST_FILTER(LIST_DISTINCT(source), e -> ...)
25717 let outer_list_filter =
25718 Expression::Function(Box::new(Function::new(
25719 "LIST_FILTER".to_string(),
25720 vec![list_distinct, outer_lambda],
25721 )));
25722
25723 // CASE WHEN ... IS NULL ... THEN NULL ELSE LIST_FILTER(...) END
25724 Ok(Expression::Case(Box::new(Case {
25725 operand: None,
25726 whens: vec![(null_check, Expression::Null(Null))],
25727 else_: Some(outer_list_filter),
25728 comments: Vec::new(),
25729 inferred_type: None,
25730 })))
25731 }
25732 DialectType::Snowflake => {
25733 // Snowflake: ARRAY_EXCEPT(source, exclude) - keep as-is
25734 Ok(Expression::ArrayExcept(Box::new(
25735 crate::expressions::BinaryFunc {
25736 this: source_arr,
25737 expression: exclude_arr,
25738 original_name: None,
25739 inferred_type: None,
25740 },
25741 )))
25742 }
25743 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
25744 // Presto/Trino: ARRAY_EXCEPT(source, exclude) - keep function name, array syntax already converted
25745 Ok(Expression::Function(Box::new(Function::new(
25746 "ARRAY_EXCEPT".to_string(),
25747 vec![source_arr, exclude_arr],
25748 ))))
25749 }
25750 _ => Ok(Expression::ArrayExcept(Box::new(
25751 crate::expressions::BinaryFunc {
25752 this: source_arr,
25753 expression: exclude_arr,
25754 original_name: None,
25755 inferred_type: None,
25756 },
25757 ))),
25758 }
25759 } else {
25760 Ok(e)
25761 }
25762 }
25763
25764 Action::RegexpLikeExasolAnchor => {
25765 // RegexpLike -> Exasol: wrap pattern with .*...*
25766 // Exasol REGEXP_LIKE does full-string match, but RLIKE/REGEXP from other
25767 // dialects does partial match, so we need to anchor with .* on both sides
25768 if let Expression::RegexpLike(mut f) = e {
25769 match &f.pattern {
25770 Expression::Literal(lit)
25771 if matches!(lit.as_ref(), Literal::String(_)) =>
25772 {
25773 let Literal::String(s) = lit.as_ref() else {
25774 unreachable!()
25775 };
25776 // String literal: wrap with .*...*
25777 f.pattern = Expression::Literal(Box::new(Literal::String(
25778 format!(".*{}.*", s),
25779 )));
25780 }
25781 _ => {
25782 // Non-literal: wrap with CONCAT('.*', pattern, '.*')
25783 f.pattern =
25784 Expression::Paren(Box::new(crate::expressions::Paren {
25785 this: Expression::Concat(Box::new(
25786 crate::expressions::BinaryOp {
25787 left: Expression::Concat(Box::new(
25788 crate::expressions::BinaryOp {
25789 left: Expression::Literal(Box::new(
25790 Literal::String(".*".to_string()),
25791 )),
25792 right: f.pattern,
25793 left_comments: vec![],
25794 operator_comments: vec![],
25795 trailing_comments: vec![],
25796 inferred_type: None,
25797 },
25798 )),
25799 right: Expression::Literal(Box::new(
25800 Literal::String(".*".to_string()),
25801 )),
25802 left_comments: vec![],
25803 operator_comments: vec![],
25804 trailing_comments: vec![],
25805 inferred_type: None,
25806 },
25807 )),
25808 trailing_comments: vec![],
25809 }));
25810 }
25811 }
25812 Ok(Expression::RegexpLike(f))
25813 } else {
25814 Ok(e)
25815 }
25816 }
25817
25818 Action::ArrayPositionSnowflakeSwap => {
25819 // ARRAY_POSITION(arr, elem) -> ARRAY_POSITION(elem, arr) for Snowflake
25820 if let Expression::ArrayPosition(f) = e {
25821 Ok(Expression::ArrayPosition(Box::new(
25822 crate::expressions::BinaryFunc {
25823 this: f.expression,
25824 expression: f.this,
25825 original_name: f.original_name,
25826 inferred_type: f.inferred_type,
25827 },
25828 )))
25829 } else {
25830 Ok(e)
25831 }
25832 }
25833
25834 Action::SnowflakeArrayPositionToDuckDB => {
25835 // Snowflake ARRAY_POSITION(value, array) -> DuckDB ARRAY_POSITION(array, value) - 1
25836 // Snowflake uses 0-based indexing, DuckDB uses 1-based
25837 // The parser has this=value, expression=array (Snowflake order)
25838 if let Expression::ArrayPosition(f) = e {
25839 // Create ARRAY_POSITION(array, value) in standard order
25840 let standard_pos =
25841 Expression::ArrayPosition(Box::new(crate::expressions::BinaryFunc {
25842 this: f.expression, // array
25843 expression: f.this, // value
25844 original_name: f.original_name,
25845 inferred_type: f.inferred_type,
25846 }));
25847 // Subtract 1 for zero-based indexing
25848 Ok(Expression::Sub(Box::new(BinaryOp {
25849 left: standard_pos,
25850 right: Expression::number(1),
25851 left_comments: vec![],
25852 operator_comments: vec![],
25853 trailing_comments: vec![],
25854 inferred_type: None,
25855 })))
25856 } else {
25857 Ok(e)
25858 }
25859 }
25860
25861 Action::ArrayDistinctConvert => {
25862 // ARRAY_DISTINCT(arr) -> DuckDB NULL-aware CASE:
25863 // CASE WHEN ARRAY_LENGTH(arr) <> LIST_COUNT(arr)
25864 // THEN LIST_APPEND(LIST_DISTINCT(LIST_FILTER(arr, _u -> NOT _u IS NULL)), NULL)
25865 // ELSE LIST_DISTINCT(arr)
25866 // END
25867 if let Expression::ArrayDistinct(f) = e {
25868 let arr = f.this;
25869
25870 // ARRAY_LENGTH(arr)
25871 let array_length = Expression::Function(Box::new(Function::new(
25872 "ARRAY_LENGTH".to_string(),
25873 vec![arr.clone()],
25874 )));
25875 // LIST_COUNT(arr)
25876 let list_count = Expression::Function(Box::new(Function::new(
25877 "LIST_COUNT".to_string(),
25878 vec![arr.clone()],
25879 )));
25880 // ARRAY_LENGTH(arr) <> LIST_COUNT(arr)
25881 let neq = Expression::Neq(Box::new(crate::expressions::BinaryOp {
25882 left: array_length,
25883 right: list_count,
25884 left_comments: vec![],
25885 operator_comments: vec![],
25886 trailing_comments: vec![],
25887 inferred_type: None,
25888 }));
25889
25890 // _u column
25891 let u_col = Expression::column("_u");
25892 // NOT _u IS NULL
25893 let u_is_null = Expression::IsNull(Box::new(crate::expressions::IsNull {
25894 this: u_col.clone(),
25895 not: false,
25896 postfix_form: false,
25897 }));
25898 let not_u_is_null =
25899 Expression::Not(Box::new(crate::expressions::UnaryOp {
25900 this: u_is_null,
25901 inferred_type: None,
25902 }));
25903 // _u -> NOT _u IS NULL
25904 let filter_lambda =
25905 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
25906 parameters: vec![crate::expressions::Identifier::new("_u")],
25907 body: not_u_is_null,
25908 colon: false,
25909 parameter_types: vec![],
25910 }));
25911 // LIST_FILTER(arr, _u -> NOT _u IS NULL)
25912 let list_filter = Expression::Function(Box::new(Function::new(
25913 "LIST_FILTER".to_string(),
25914 vec![arr.clone(), filter_lambda],
25915 )));
25916 // LIST_DISTINCT(LIST_FILTER(arr, ...))
25917 let list_distinct_filtered = Expression::Function(Box::new(Function::new(
25918 "LIST_DISTINCT".to_string(),
25919 vec![list_filter],
25920 )));
25921 // LIST_APPEND(LIST_DISTINCT(LIST_FILTER(...)), NULL)
25922 let list_append = Expression::Function(Box::new(Function::new(
25923 "LIST_APPEND".to_string(),
25924 vec![list_distinct_filtered, Expression::Null(Null)],
25925 )));
25926
25927 // LIST_DISTINCT(arr)
25928 let list_distinct = Expression::Function(Box::new(Function::new(
25929 "LIST_DISTINCT".to_string(),
25930 vec![arr],
25931 )));
25932
25933 // CASE WHEN neq THEN list_append ELSE list_distinct END
25934 Ok(Expression::Case(Box::new(Case {
25935 operand: None,
25936 whens: vec![(neq, list_append)],
25937 else_: Some(list_distinct),
25938 comments: Vec::new(),
25939 inferred_type: None,
25940 })))
25941 } else {
25942 Ok(e)
25943 }
25944 }
25945
25946 Action::ArrayDistinctClickHouse => {
25947 // ARRAY_DISTINCT(arr) -> arrayDistinct(arr) for ClickHouse
25948 if let Expression::ArrayDistinct(f) = e {
25949 Ok(Expression::Function(Box::new(Function::new(
25950 "arrayDistinct".to_string(),
25951 vec![f.this],
25952 ))))
25953 } else {
25954 Ok(e)
25955 }
25956 }
25957
25958 Action::ArrayContainsDuckDBConvert => {
25959 // Snowflake ARRAY_CONTAINS(value, array) -> DuckDB NULL-aware:
25960 // CASE WHEN value IS NULL
25961 // THEN NULLIF(ARRAY_LENGTH(array) <> LIST_COUNT(array), FALSE)
25962 // ELSE ARRAY_CONTAINS(array, value)
25963 // END
25964 // Note: In Rust AST from Snowflake parse, this=value (first arg), expression=array (second arg)
25965 if let Expression::ArrayContains(f) = e {
25966 let value = f.this;
25967 let array = f.expression;
25968
25969 // value IS NULL
25970 let value_is_null =
25971 Expression::IsNull(Box::new(crate::expressions::IsNull {
25972 this: value.clone(),
25973 not: false,
25974 postfix_form: false,
25975 }));
25976
25977 // ARRAY_LENGTH(array)
25978 let array_length = Expression::Function(Box::new(Function::new(
25979 "ARRAY_LENGTH".to_string(),
25980 vec![array.clone()],
25981 )));
25982 // LIST_COUNT(array)
25983 let list_count = Expression::Function(Box::new(Function::new(
25984 "LIST_COUNT".to_string(),
25985 vec![array.clone()],
25986 )));
25987 // ARRAY_LENGTH(array) <> LIST_COUNT(array)
25988 let neq = Expression::Neq(Box::new(crate::expressions::BinaryOp {
25989 left: array_length,
25990 right: list_count,
25991 left_comments: vec![],
25992 operator_comments: vec![],
25993 trailing_comments: vec![],
25994 inferred_type: None,
25995 }));
25996 // NULLIF(ARRAY_LENGTH(array) <> LIST_COUNT(array), FALSE)
25997 let nullif = Expression::Nullif(Box::new(crate::expressions::Nullif {
25998 this: Box::new(neq),
25999 expression: Box::new(Expression::Boolean(
26000 crate::expressions::BooleanLiteral { value: false },
26001 )),
26002 }));
26003
26004 // ARRAY_CONTAINS(array, value) - DuckDB syntax: array first, value second
26005 let array_contains = Expression::Function(Box::new(Function::new(
26006 "ARRAY_CONTAINS".to_string(),
26007 vec![array, value],
26008 )));
26009
26010 // CASE WHEN value IS NULL THEN NULLIF(...) ELSE ARRAY_CONTAINS(array, value) END
26011 Ok(Expression::Case(Box::new(Case {
26012 operand: None,
26013 whens: vec![(value_is_null, nullif)],
26014 else_: Some(array_contains),
26015 comments: Vec::new(),
26016 inferred_type: None,
26017 })))
26018 } else {
26019 Ok(e)
26020 }
26021 }
26022
26023 Action::StrPositionExpand => {
26024 // StrPosition with position arg -> complex STRPOS expansion for Presto/DuckDB
26025 // For Presto: IF(STRPOS(SUBSTRING(str, pos), substr) = 0, 0, STRPOS(SUBSTRING(str, pos), substr) + pos - 1)
26026 // For DuckDB: CASE WHEN STRPOS(SUBSTRING(str, pos), substr) = 0 THEN 0 ELSE STRPOS(SUBSTRING(str, pos), substr) + pos - 1 END
26027 if let Expression::StrPosition(sp) = e {
26028 let crate::expressions::StrPosition {
26029 this,
26030 substr,
26031 position,
26032 occurrence,
26033 } = *sp;
26034 let string = *this;
26035 let substr_expr = match substr {
26036 Some(s) => *s,
26037 None => Expression::Null(Null),
26038 };
26039 let pos = match position {
26040 Some(p) => *p,
26041 None => Expression::number(1),
26042 };
26043
26044 // SUBSTRING(string, pos)
26045 let substring_call = Expression::Function(Box::new(Function::new(
26046 "SUBSTRING".to_string(),
26047 vec![string.clone(), pos.clone()],
26048 )));
26049 // STRPOS(SUBSTRING(string, pos), substr)
26050 let strpos_call = Expression::Function(Box::new(Function::new(
26051 "STRPOS".to_string(),
26052 vec![substring_call, substr_expr.clone()],
26053 )));
26054 // STRPOS(...) + pos - 1
26055 let pos_adjusted =
26056 Expression::Sub(Box::new(crate::expressions::BinaryOp::new(
26057 Expression::Add(Box::new(crate::expressions::BinaryOp::new(
26058 strpos_call.clone(),
26059 pos.clone(),
26060 ))),
26061 Expression::number(1),
26062 )));
26063 // STRPOS(...) = 0
26064 let is_zero = Expression::Eq(Box::new(crate::expressions::BinaryOp::new(
26065 strpos_call.clone(),
26066 Expression::number(0),
26067 )));
26068
26069 match target {
26070 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
26071 // IF(STRPOS(SUBSTRING(str, pos), substr) = 0, 0, STRPOS(SUBSTRING(str, pos), substr) + pos - 1)
26072 Ok(Expression::Function(Box::new(Function::new(
26073 "IF".to_string(),
26074 vec![is_zero, Expression::number(0), pos_adjusted],
26075 ))))
26076 }
26077 DialectType::DuckDB => {
26078 // CASE WHEN STRPOS(SUBSTRING(str, pos), substr) = 0 THEN 0 ELSE STRPOS(SUBSTRING(str, pos), substr) + pos - 1 END
26079 Ok(Expression::Case(Box::new(Case {
26080 operand: None,
26081 whens: vec![(is_zero, Expression::number(0))],
26082 else_: Some(pos_adjusted),
26083 comments: Vec::new(),
26084 inferred_type: None,
26085 })))
26086 }
26087 _ => {
26088 // Reconstruct StrPosition
26089 Ok(Expression::StrPosition(Box::new(
26090 crate::expressions::StrPosition {
26091 this: Box::new(string),
26092 substr: Some(Box::new(substr_expr)),
26093 position: Some(Box::new(pos)),
26094 occurrence,
26095 },
26096 )))
26097 }
26098 }
26099 } else {
26100 Ok(e)
26101 }
26102 }
26103
26104 Action::MonthsBetweenConvert => {
26105 if let Expression::MonthsBetween(mb) = e {
26106 let crate::expressions::BinaryFunc {
26107 this: end_date,
26108 expression: start_date,
26109 ..
26110 } = *mb;
26111 match target {
26112 DialectType::DuckDB => {
26113 let cast_end = Self::ensure_cast_date(end_date);
26114 let cast_start = Self::ensure_cast_date(start_date);
26115 let dd = Expression::Function(Box::new(Function::new(
26116 "DATE_DIFF".to_string(),
26117 vec![
26118 Expression::string("MONTH"),
26119 cast_start.clone(),
26120 cast_end.clone(),
26121 ],
26122 )));
26123 let day_end = Expression::Function(Box::new(Function::new(
26124 "DAY".to_string(),
26125 vec![cast_end.clone()],
26126 )));
26127 let day_start = Expression::Function(Box::new(Function::new(
26128 "DAY".to_string(),
26129 vec![cast_start.clone()],
26130 )));
26131 let last_day_end = Expression::Function(Box::new(Function::new(
26132 "LAST_DAY".to_string(),
26133 vec![cast_end.clone()],
26134 )));
26135 let last_day_start = Expression::Function(Box::new(Function::new(
26136 "LAST_DAY".to_string(),
26137 vec![cast_start.clone()],
26138 )));
26139 let day_last_end = Expression::Function(Box::new(Function::new(
26140 "DAY".to_string(),
26141 vec![last_day_end],
26142 )));
26143 let day_last_start = Expression::Function(Box::new(Function::new(
26144 "DAY".to_string(),
26145 vec![last_day_start],
26146 )));
26147 let cond1 = Expression::Eq(Box::new(BinaryOp::new(
26148 day_end.clone(),
26149 day_last_end,
26150 )));
26151 let cond2 = Expression::Eq(Box::new(BinaryOp::new(
26152 day_start.clone(),
26153 day_last_start,
26154 )));
26155 let both_cond =
26156 Expression::And(Box::new(BinaryOp::new(cond1, cond2)));
26157 let day_diff =
26158 Expression::Sub(Box::new(BinaryOp::new(day_end, day_start)));
26159 let day_diff_paren =
26160 Expression::Paren(Box::new(crate::expressions::Paren {
26161 this: day_diff,
26162 trailing_comments: Vec::new(),
26163 }));
26164 let frac = Expression::Div(Box::new(BinaryOp::new(
26165 day_diff_paren,
26166 Expression::Literal(Box::new(Literal::Number(
26167 "31.0".to_string(),
26168 ))),
26169 )));
26170 let case_expr = Expression::Case(Box::new(Case {
26171 operand: None,
26172 whens: vec![(both_cond, Expression::number(0))],
26173 else_: Some(frac),
26174 comments: Vec::new(),
26175 inferred_type: None,
26176 }));
26177 Ok(Expression::Add(Box::new(BinaryOp::new(dd, case_expr))))
26178 }
26179 DialectType::Snowflake | DialectType::Redshift => {
26180 let unit = Expression::Identifier(Identifier::new("MONTH"));
26181 Ok(Expression::Function(Box::new(Function::new(
26182 "DATEDIFF".to_string(),
26183 vec![unit, start_date, end_date],
26184 ))))
26185 }
26186 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
26187 Ok(Expression::Function(Box::new(Function::new(
26188 "DATE_DIFF".to_string(),
26189 vec![Expression::string("MONTH"), start_date, end_date],
26190 ))))
26191 }
26192 _ => Ok(Expression::MonthsBetween(Box::new(
26193 crate::expressions::BinaryFunc {
26194 this: end_date,
26195 expression: start_date,
26196 original_name: None,
26197 inferred_type: None,
26198 },
26199 ))),
26200 }
26201 } else {
26202 Ok(e)
26203 }
26204 }
26205
26206 Action::AddMonthsConvert => {
26207 if let Expression::AddMonths(am) = e {
26208 let date = am.this;
26209 let val = am.expression;
26210 match target {
26211 DialectType::TSQL | DialectType::Fabric => {
26212 let cast_date = Self::ensure_cast_datetime2(date);
26213 Ok(Expression::Function(Box::new(Function::new(
26214 "DATEADD".to_string(),
26215 vec![
26216 Expression::Identifier(Identifier::new("MONTH")),
26217 val,
26218 cast_date,
26219 ],
26220 ))))
26221 }
26222 DialectType::DuckDB if matches!(source, DialectType::Snowflake) => {
26223 // DuckDB ADD_MONTHS from Snowflake: CASE WHEN LAST_DAY(date) = date THEN LAST_DAY(date + interval) ELSE date + interval END
26224 // Optionally wrapped in CAST(... AS type) if the input had a specific type
26225
26226 // Determine the cast type from the date expression
26227 let (cast_date, return_type) = match &date {
26228 Expression::Literal(lit)
26229 if matches!(lit.as_ref(), Literal::String(_)) =>
26230 {
26231 // String literal: CAST(str AS TIMESTAMP), no outer CAST
26232 (
26233 Expression::Cast(Box::new(Cast {
26234 this: date.clone(),
26235 to: DataType::Timestamp {
26236 precision: None,
26237 timezone: false,
26238 },
26239 trailing_comments: Vec::new(),
26240 double_colon_syntax: false,
26241 format: None,
26242 default: None,
26243 inferred_type: None,
26244 })),
26245 None,
26246 )
26247 }
26248 Expression::Cast(c) => {
26249 // Already cast (e.g., '2023-01-31'::DATE) - keep the cast, wrap result in CAST(... AS type)
26250 (date.clone(), Some(c.to.clone()))
26251 }
26252 _ => {
26253 // Expression or NULL::TYPE - keep as-is, check for cast type
26254 if let Expression::Cast(c) = &date {
26255 (date.clone(), Some(c.to.clone()))
26256 } else {
26257 (date.clone(), None)
26258 }
26259 }
26260 };
26261
26262 // Build the interval expression
26263 // For non-integer values (float, decimal, cast), use TO_MONTHS(CAST(ROUND(val) AS INT))
26264 // For integer values, use INTERVAL val MONTH
26265 let is_non_integer_val = match &val {
26266 Expression::Literal(lit)
26267 if matches!(lit.as_ref(), Literal::Number(_)) =>
26268 {
26269 let Literal::Number(n) = lit.as_ref() else {
26270 unreachable!()
26271 };
26272 n.contains('.')
26273 }
26274 Expression::Cast(_) => true, // e.g., 3.2::DECIMAL(10,2)
26275 Expression::Neg(n) => {
26276 if let Expression::Literal(lit) = &n.this {
26277 if let Literal::Number(s) = lit.as_ref() {
26278 s.contains('.')
26279 } else {
26280 false
26281 }
26282 } else {
26283 false
26284 }
26285 }
26286 _ => false,
26287 };
26288
26289 let add_interval = if is_non_integer_val {
26290 // TO_MONTHS(CAST(ROUND(val) AS INT))
26291 let round_val = Expression::Function(Box::new(Function::new(
26292 "ROUND".to_string(),
26293 vec![val.clone()],
26294 )));
26295 let cast_int = Expression::Cast(Box::new(Cast {
26296 this: round_val,
26297 to: DataType::Int {
26298 length: None,
26299 integer_spelling: false,
26300 },
26301 trailing_comments: Vec::new(),
26302 double_colon_syntax: false,
26303 format: None,
26304 default: None,
26305 inferred_type: None,
26306 }));
26307 Expression::Function(Box::new(Function::new(
26308 "TO_MONTHS".to_string(),
26309 vec![cast_int],
26310 )))
26311 } else {
26312 // INTERVAL val MONTH
26313 // For negative numbers, wrap in parens
26314 let interval_val = match &val {
26315 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(n) if n.starts_with('-')) =>
26316 {
26317 let Literal::Number(_) = lit.as_ref() else {
26318 unreachable!()
26319 };
26320 Expression::Paren(Box::new(Paren {
26321 this: val.clone(),
26322 trailing_comments: Vec::new(),
26323 }))
26324 }
26325 Expression::Neg(_) => Expression::Paren(Box::new(Paren {
26326 this: val.clone(),
26327 trailing_comments: Vec::new(),
26328 })),
26329 Expression::Null(_) => Expression::Paren(Box::new(Paren {
26330 this: val.clone(),
26331 trailing_comments: Vec::new(),
26332 })),
26333 _ => val.clone(),
26334 };
26335 Expression::Interval(Box::new(crate::expressions::Interval {
26336 this: Some(interval_val),
26337 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
26338 unit: crate::expressions::IntervalUnit::Month,
26339 use_plural: false,
26340 }),
26341 }))
26342 };
26343
26344 // Build: date + interval
26345 let date_plus_interval = Expression::Add(Box::new(BinaryOp::new(
26346 cast_date.clone(),
26347 add_interval.clone(),
26348 )));
26349
26350 // Build LAST_DAY(date)
26351 let last_day_date = Expression::Function(Box::new(Function::new(
26352 "LAST_DAY".to_string(),
26353 vec![cast_date.clone()],
26354 )));
26355
26356 // Build LAST_DAY(date + interval)
26357 let last_day_date_plus =
26358 Expression::Function(Box::new(Function::new(
26359 "LAST_DAY".to_string(),
26360 vec![date_plus_interval.clone()],
26361 )));
26362
26363 // Build: CASE WHEN LAST_DAY(date) = date THEN LAST_DAY(date + interval) ELSE date + interval END
26364 let case_expr = Expression::Case(Box::new(Case {
26365 operand: None,
26366 whens: vec![(
26367 Expression::Eq(Box::new(BinaryOp::new(
26368 last_day_date,
26369 cast_date.clone(),
26370 ))),
26371 last_day_date_plus,
26372 )],
26373 else_: Some(date_plus_interval),
26374 comments: Vec::new(),
26375 inferred_type: None,
26376 }));
26377
26378 // Wrap in CAST(... AS type) if needed
26379 if let Some(dt) = return_type {
26380 Ok(Expression::Cast(Box::new(Cast {
26381 this: case_expr,
26382 to: dt,
26383 trailing_comments: Vec::new(),
26384 double_colon_syntax: false,
26385 format: None,
26386 default: None,
26387 inferred_type: None,
26388 })))
26389 } else {
26390 Ok(case_expr)
26391 }
26392 }
26393 DialectType::DuckDB => {
26394 // Non-Snowflake source: simple date + INTERVAL
26395 let cast_date = if matches!(&date, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)))
26396 {
26397 Expression::Cast(Box::new(Cast {
26398 this: date,
26399 to: DataType::Timestamp {
26400 precision: None,
26401 timezone: false,
26402 },
26403 trailing_comments: Vec::new(),
26404 double_colon_syntax: false,
26405 format: None,
26406 default: None,
26407 inferred_type: None,
26408 }))
26409 } else {
26410 date
26411 };
26412 let interval =
26413 Expression::Interval(Box::new(crate::expressions::Interval {
26414 this: Some(val),
26415 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
26416 unit: crate::expressions::IntervalUnit::Month,
26417 use_plural: false,
26418 }),
26419 }));
26420 Ok(Expression::Add(Box::new(BinaryOp::new(
26421 cast_date, interval,
26422 ))))
26423 }
26424 DialectType::Snowflake => {
26425 // Keep ADD_MONTHS when source is also Snowflake
26426 if matches!(source, DialectType::Snowflake) {
26427 Ok(Expression::Function(Box::new(Function::new(
26428 "ADD_MONTHS".to_string(),
26429 vec![date, val],
26430 ))))
26431 } else {
26432 Ok(Expression::Function(Box::new(Function::new(
26433 "DATEADD".to_string(),
26434 vec![
26435 Expression::Identifier(Identifier::new("MONTH")),
26436 val,
26437 date,
26438 ],
26439 ))))
26440 }
26441 }
26442 DialectType::Redshift => {
26443 Ok(Expression::Function(Box::new(Function::new(
26444 "DATEADD".to_string(),
26445 vec![
26446 Expression::Identifier(Identifier::new("MONTH")),
26447 val,
26448 date,
26449 ],
26450 ))))
26451 }
26452 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
26453 let cast_date = if matches!(&date, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)))
26454 {
26455 Expression::Cast(Box::new(Cast {
26456 this: date,
26457 to: DataType::Timestamp {
26458 precision: None,
26459 timezone: false,
26460 },
26461 trailing_comments: Vec::new(),
26462 double_colon_syntax: false,
26463 format: None,
26464 default: None,
26465 inferred_type: None,
26466 }))
26467 } else {
26468 date
26469 };
26470 Ok(Expression::Function(Box::new(Function::new(
26471 "DATE_ADD".to_string(),
26472 vec![Expression::string("MONTH"), val, cast_date],
26473 ))))
26474 }
26475 DialectType::BigQuery => {
26476 let interval =
26477 Expression::Interval(Box::new(crate::expressions::Interval {
26478 this: Some(val),
26479 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
26480 unit: crate::expressions::IntervalUnit::Month,
26481 use_plural: false,
26482 }),
26483 }));
26484 let cast_date = if matches!(&date, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)))
26485 {
26486 Expression::Cast(Box::new(Cast {
26487 this: date,
26488 to: DataType::Custom {
26489 name: "DATETIME".to_string(),
26490 },
26491 trailing_comments: Vec::new(),
26492 double_colon_syntax: false,
26493 format: None,
26494 default: None,
26495 inferred_type: None,
26496 }))
26497 } else {
26498 date
26499 };
26500 Ok(Expression::Function(Box::new(Function::new(
26501 "DATE_ADD".to_string(),
26502 vec![cast_date, interval],
26503 ))))
26504 }
26505 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
26506 Ok(Expression::Function(Box::new(Function::new(
26507 "ADD_MONTHS".to_string(),
26508 vec![date, val],
26509 ))))
26510 }
26511 _ => {
26512 // Default: keep as AddMonths expression
26513 Ok(Expression::AddMonths(Box::new(
26514 crate::expressions::BinaryFunc {
26515 this: date,
26516 expression: val,
26517 original_name: None,
26518 inferred_type: None,
26519 },
26520 )))
26521 }
26522 }
26523 } else {
26524 Ok(e)
26525 }
26526 }
26527
26528 Action::PercentileContConvert => {
26529 // PERCENTILE_CONT(p) WITHIN GROUP (ORDER BY col) ->
26530 // Presto/Trino: APPROX_PERCENTILE(col, p)
26531 // Spark/Databricks: PERCENTILE_APPROX(col, p)
26532 if let Expression::WithinGroup(wg) = e {
26533 // Extract percentile value and order by column
26534 let (percentile, _is_disc) = match &wg.this {
26535 Expression::Function(f) => {
26536 let is_disc = f.name.eq_ignore_ascii_case("PERCENTILE_DISC");
26537 let pct = f.args.first().cloned().unwrap_or(Expression::Literal(
26538 Box::new(Literal::Number("0.5".to_string())),
26539 ));
26540 (pct, is_disc)
26541 }
26542 Expression::AggregateFunction(af) => {
26543 let is_disc = af.name.eq_ignore_ascii_case("PERCENTILE_DISC");
26544 let pct = af.args.first().cloned().unwrap_or(Expression::Literal(
26545 Box::new(Literal::Number("0.5".to_string())),
26546 ));
26547 (pct, is_disc)
26548 }
26549 Expression::PercentileCont(pc) => (pc.percentile.clone(), false),
26550 _ => return Ok(Expression::WithinGroup(wg)),
26551 };
26552 let col = wg.order_by.first().map(|o| o.this.clone()).unwrap_or(
26553 Expression::Literal(Box::new(Literal::Number("1".to_string()))),
26554 );
26555
26556 let func_name = match target {
26557 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
26558 "APPROX_PERCENTILE"
26559 }
26560 _ => "PERCENTILE_APPROX", // Spark, Databricks
26561 };
26562 Ok(Expression::Function(Box::new(Function::new(
26563 func_name.to_string(),
26564 vec![col, percentile],
26565 ))))
26566 } else {
26567 Ok(e)
26568 }
26569 }
26570
26571 Action::CurrentUserSparkParens => {
26572 // CURRENT_USER -> CURRENT_USER() for Spark
26573 if let Expression::CurrentUser(_) = e {
26574 Ok(Expression::Function(Box::new(Function::new(
26575 "CURRENT_USER".to_string(),
26576 vec![],
26577 ))))
26578 } else {
26579 Ok(e)
26580 }
26581 }
26582
26583 Action::SparkDateFuncCast => {
26584 // MONTH/YEAR/DAY('string') from Spark -> wrap arg in CAST to DATE
26585 let cast_arg = |arg: Expression| -> Expression {
26586 match target {
26587 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
26588 Self::double_cast_timestamp_date(arg)
26589 }
26590 _ => {
26591 // DuckDB, PostgreSQL, etc: CAST(arg AS DATE)
26592 Self::ensure_cast_date(arg)
26593 }
26594 }
26595 };
26596 match e {
26597 Expression::Month(f) => Ok(Expression::Month(Box::new(
26598 crate::expressions::UnaryFunc::new(cast_arg(f.this)),
26599 ))),
26600 Expression::Year(f) => Ok(Expression::Year(Box::new(
26601 crate::expressions::UnaryFunc::new(cast_arg(f.this)),
26602 ))),
26603 Expression::Day(f) => Ok(Expression::Day(Box::new(
26604 crate::expressions::UnaryFunc::new(cast_arg(f.this)),
26605 ))),
26606 other => Ok(other),
26607 }
26608 }
26609
26610 Action::MapFromArraysConvert => {
26611 // Expression::MapFromArrays -> target-specific
26612 if let Expression::MapFromArrays(mfa) = e {
26613 let keys = mfa.this;
26614 let values = mfa.expression;
26615 match target {
26616 DialectType::Snowflake => Ok(Expression::Function(Box::new(
26617 Function::new("OBJECT_CONSTRUCT".to_string(), vec![keys, values]),
26618 ))),
26619 _ => {
26620 // Hive, Presto, DuckDB, etc.: MAP(keys, values)
26621 Ok(Expression::Function(Box::new(Function::new(
26622 "MAP".to_string(),
26623 vec![keys, values],
26624 ))))
26625 }
26626 }
26627 } else {
26628 Ok(e)
26629 }
26630 }
26631
26632 Action::AnyToExists => {
26633 if let Expression::Any(q) = e {
26634 if let Some(op) = q.op.clone() {
26635 let lambda_param = crate::expressions::Identifier::new("x");
26636 let rhs = Expression::Identifier(lambda_param.clone());
26637 let body = match op {
26638 crate::expressions::QuantifiedOp::Eq => {
26639 Expression::Eq(Box::new(BinaryOp::new(q.this, rhs)))
26640 }
26641 crate::expressions::QuantifiedOp::Neq => {
26642 Expression::Neq(Box::new(BinaryOp::new(q.this, rhs)))
26643 }
26644 crate::expressions::QuantifiedOp::Lt => {
26645 Expression::Lt(Box::new(BinaryOp::new(q.this, rhs)))
26646 }
26647 crate::expressions::QuantifiedOp::Lte => {
26648 Expression::Lte(Box::new(BinaryOp::new(q.this, rhs)))
26649 }
26650 crate::expressions::QuantifiedOp::Gt => {
26651 Expression::Gt(Box::new(BinaryOp::new(q.this, rhs)))
26652 }
26653 crate::expressions::QuantifiedOp::Gte => {
26654 Expression::Gte(Box::new(BinaryOp::new(q.this, rhs)))
26655 }
26656 };
26657 let lambda =
26658 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
26659 parameters: vec![lambda_param],
26660 body,
26661 colon: false,
26662 parameter_types: Vec::new(),
26663 }));
26664 Ok(Expression::Function(Box::new(Function::new(
26665 "EXISTS".to_string(),
26666 vec![q.subquery, lambda],
26667 ))))
26668 } else {
26669 Ok(Expression::Any(q))
26670 }
26671 } else {
26672 Ok(e)
26673 }
26674 }
26675
26676 Action::GenerateSeriesConvert => {
26677 // GENERATE_SERIES(start, end[, step]) -> SEQUENCE for Spark/Databricks/Hive, wrapped in UNNEST/EXPLODE
26678 // For DuckDB target: wrap in UNNEST(GENERATE_SERIES(...))
26679 // For PG/Redshift target: keep as GENERATE_SERIES but normalize interval string step
26680 if let Expression::Function(f) = e {
26681 if f.name.eq_ignore_ascii_case("GENERATE_SERIES") && f.args.len() >= 2 {
26682 let start = f.args[0].clone();
26683 let end = f.args[1].clone();
26684 let step = f.args.get(2).cloned();
26685
26686 // Normalize step: convert string interval like '1day' or ' 2 days ' to INTERVAL expression
26687 let step = step.map(|s| Self::normalize_interval_string(s, target));
26688
26689 // Helper: wrap CURRENT_TIMESTAMP in CAST(... AS TIMESTAMP) for Presto/Trino/Spark
26690 let maybe_cast_timestamp = |arg: Expression| -> Expression {
26691 if matches!(
26692 target,
26693 DialectType::Presto
26694 | DialectType::Trino
26695 | DialectType::Athena
26696 | DialectType::Spark
26697 | DialectType::Databricks
26698 | DialectType::Hive
26699 ) {
26700 match &arg {
26701 Expression::CurrentTimestamp(_) => {
26702 Expression::Cast(Box::new(Cast {
26703 this: arg,
26704 to: DataType::Timestamp {
26705 precision: None,
26706 timezone: false,
26707 },
26708 trailing_comments: Vec::new(),
26709 double_colon_syntax: false,
26710 format: None,
26711 default: None,
26712 inferred_type: None,
26713 }))
26714 }
26715 _ => arg,
26716 }
26717 } else {
26718 arg
26719 }
26720 };
26721
26722 let start = maybe_cast_timestamp(start);
26723 let end = maybe_cast_timestamp(end);
26724
26725 // For PostgreSQL/Redshift target, keep as GENERATE_SERIES
26726 if matches!(target, DialectType::PostgreSQL | DialectType::Redshift) {
26727 let mut gs_args = vec![start, end];
26728 if let Some(step) = step {
26729 gs_args.push(step);
26730 }
26731 return Ok(Expression::Function(Box::new(Function::new(
26732 "GENERATE_SERIES".to_string(),
26733 gs_args,
26734 ))));
26735 }
26736
26737 // For DuckDB target: wrap in UNNEST(GENERATE_SERIES(...))
26738 if matches!(target, DialectType::DuckDB) {
26739 let mut gs_args = vec![start, end];
26740 if let Some(step) = step {
26741 gs_args.push(step);
26742 }
26743 let gs = Expression::Function(Box::new(Function::new(
26744 "GENERATE_SERIES".to_string(),
26745 gs_args,
26746 )));
26747 return Ok(Expression::Function(Box::new(Function::new(
26748 "UNNEST".to_string(),
26749 vec![gs],
26750 ))));
26751 }
26752
26753 let mut seq_args = vec![start, end];
26754 if let Some(step) = step {
26755 seq_args.push(step);
26756 }
26757
26758 let seq = Expression::Function(Box::new(Function::new(
26759 "SEQUENCE".to_string(),
26760 seq_args,
26761 )));
26762
26763 match target {
26764 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
26765 // Wrap in UNNEST
26766 Ok(Expression::Function(Box::new(Function::new(
26767 "UNNEST".to_string(),
26768 vec![seq],
26769 ))))
26770 }
26771 DialectType::Spark
26772 | DialectType::Databricks
26773 | DialectType::Hive => {
26774 // Wrap in EXPLODE
26775 Ok(Expression::Function(Box::new(Function::new(
26776 "EXPLODE".to_string(),
26777 vec![seq],
26778 ))))
26779 }
26780 _ => {
26781 // Just SEQUENCE for others
26782 Ok(seq)
26783 }
26784 }
26785 } else {
26786 Ok(Expression::Function(f))
26787 }
26788 } else {
26789 Ok(e)
26790 }
26791 }
26792
26793 Action::ConcatCoalesceWrap => {
26794 // CONCAT(a, b) function -> CONCAT(COALESCE(CAST(a AS VARCHAR), ''), ...) for Presto
26795 // CONCAT(a, b) function -> CONCAT(COALESCE(a, ''), ...) for ClickHouse
26796 if let Expression::Function(f) = e {
26797 if f.name.eq_ignore_ascii_case("CONCAT") {
26798 let new_args: Vec<Expression> = f
26799 .args
26800 .into_iter()
26801 .map(|arg| {
26802 let cast_arg = if matches!(
26803 target,
26804 DialectType::Presto
26805 | DialectType::Trino
26806 | DialectType::Athena
26807 ) {
26808 Expression::Cast(Box::new(Cast {
26809 this: arg,
26810 to: DataType::VarChar {
26811 length: None,
26812 parenthesized_length: false,
26813 },
26814 trailing_comments: Vec::new(),
26815 double_colon_syntax: false,
26816 format: None,
26817 default: None,
26818 inferred_type: None,
26819 }))
26820 } else {
26821 arg
26822 };
26823 Expression::Function(Box::new(Function::new(
26824 "COALESCE".to_string(),
26825 vec![cast_arg, Expression::string("")],
26826 )))
26827 })
26828 .collect();
26829 Ok(Expression::Function(Box::new(Function::new(
26830 "CONCAT".to_string(),
26831 new_args,
26832 ))))
26833 } else {
26834 Ok(Expression::Function(f))
26835 }
26836 } else {
26837 Ok(e)
26838 }
26839 }
26840
26841 Action::PipeConcatToConcat => {
26842 // a || b (Concat operator) -> CONCAT(CAST(a AS VARCHAR), CAST(b AS VARCHAR)) for Presto/Trino
26843 if let Expression::Concat(op) = e {
26844 let cast_left = Expression::Cast(Box::new(Cast {
26845 this: op.left,
26846 to: DataType::VarChar {
26847 length: None,
26848 parenthesized_length: false,
26849 },
26850 trailing_comments: Vec::new(),
26851 double_colon_syntax: false,
26852 format: None,
26853 default: None,
26854 inferred_type: None,
26855 }));
26856 let cast_right = Expression::Cast(Box::new(Cast {
26857 this: op.right,
26858 to: DataType::VarChar {
26859 length: None,
26860 parenthesized_length: false,
26861 },
26862 trailing_comments: Vec::new(),
26863 double_colon_syntax: false,
26864 format: None,
26865 default: None,
26866 inferred_type: None,
26867 }));
26868 Ok(Expression::Function(Box::new(Function::new(
26869 "CONCAT".to_string(),
26870 vec![cast_left, cast_right],
26871 ))))
26872 } else {
26873 Ok(e)
26874 }
26875 }
26876
26877 Action::DivFuncConvert => {
26878 // DIV(a, b) -> target-specific integer division
26879 if let Expression::Function(f) = e {
26880 if f.name.eq_ignore_ascii_case("DIV") && f.args.len() == 2 {
26881 let a = f.args[0].clone();
26882 let b = f.args[1].clone();
26883 match target {
26884 DialectType::DuckDB => {
26885 // DIV(a, b) -> CAST(a // b AS DECIMAL)
26886 let int_div = Expression::IntDiv(Box::new(
26887 crate::expressions::BinaryFunc {
26888 this: a,
26889 expression: b,
26890 original_name: None,
26891 inferred_type: None,
26892 },
26893 ));
26894 Ok(Expression::Cast(Box::new(Cast {
26895 this: int_div,
26896 to: DataType::Decimal {
26897 precision: None,
26898 scale: None,
26899 },
26900 trailing_comments: Vec::new(),
26901 double_colon_syntax: false,
26902 format: None,
26903 default: None,
26904 inferred_type: None,
26905 })))
26906 }
26907 DialectType::BigQuery => {
26908 // DIV(a, b) -> CAST(DIV(a, b) AS NUMERIC)
26909 let div_func = Expression::Function(Box::new(Function::new(
26910 "DIV".to_string(),
26911 vec![a, b],
26912 )));
26913 Ok(Expression::Cast(Box::new(Cast {
26914 this: div_func,
26915 to: DataType::Custom {
26916 name: "NUMERIC".to_string(),
26917 },
26918 trailing_comments: Vec::new(),
26919 double_colon_syntax: false,
26920 format: None,
26921 default: None,
26922 inferred_type: None,
26923 })))
26924 }
26925 DialectType::SQLite => {
26926 // DIV(a, b) -> CAST(CAST(CAST(a AS REAL) / b AS INTEGER) AS REAL)
26927 let cast_a = Expression::Cast(Box::new(Cast {
26928 this: a,
26929 to: DataType::Custom {
26930 name: "REAL".to_string(),
26931 },
26932 trailing_comments: Vec::new(),
26933 double_colon_syntax: false,
26934 format: None,
26935 default: None,
26936 inferred_type: None,
26937 }));
26938 let div = Expression::Div(Box::new(BinaryOp::new(cast_a, b)));
26939 let cast_int = Expression::Cast(Box::new(Cast {
26940 this: div,
26941 to: DataType::Int {
26942 length: None,
26943 integer_spelling: true,
26944 },
26945 trailing_comments: Vec::new(),
26946 double_colon_syntax: false,
26947 format: None,
26948 default: None,
26949 inferred_type: None,
26950 }));
26951 Ok(Expression::Cast(Box::new(Cast {
26952 this: cast_int,
26953 to: DataType::Custom {
26954 name: "REAL".to_string(),
26955 },
26956 trailing_comments: Vec::new(),
26957 double_colon_syntax: false,
26958 format: None,
26959 default: None,
26960 inferred_type: None,
26961 })))
26962 }
26963 _ => Ok(Expression::Function(f)),
26964 }
26965 } else {
26966 Ok(Expression::Function(f))
26967 }
26968 } else {
26969 Ok(e)
26970 }
26971 }
26972
26973 Action::JsonObjectAggConvert => {
26974 // JSON_OBJECT_AGG/JSONB_OBJECT_AGG -> JSON_GROUP_OBJECT for DuckDB
26975 match e {
26976 Expression::Function(f) => Ok(Expression::Function(Box::new(
26977 Function::new("JSON_GROUP_OBJECT".to_string(), f.args),
26978 ))),
26979 Expression::AggregateFunction(af) => {
26980 // AggregateFunction stores all args in the `args` vec
26981 Ok(Expression::Function(Box::new(Function::new(
26982 "JSON_GROUP_OBJECT".to_string(),
26983 af.args,
26984 ))))
26985 }
26986 other => Ok(other),
26987 }
26988 }
26989
26990 Action::JsonbExistsConvert => {
26991 // JSONB_EXISTS('json', 'key') -> JSON_EXISTS('json', '$.key') for DuckDB
26992 if let Expression::Function(f) = e {
26993 if f.args.len() == 2 {
26994 let json_expr = f.args[0].clone();
26995 let key = match &f.args[1] {
26996 Expression::Literal(lit)
26997 if matches!(
26998 lit.as_ref(),
26999 crate::expressions::Literal::String(_)
27000 ) =>
27001 {
27002 let crate::expressions::Literal::String(s) = lit.as_ref()
27003 else {
27004 unreachable!()
27005 };
27006 format!("$.{}", s)
27007 }
27008 _ => return Ok(Expression::Function(f)),
27009 };
27010 Ok(Expression::Function(Box::new(Function::new(
27011 "JSON_EXISTS".to_string(),
27012 vec![json_expr, Expression::string(&key)],
27013 ))))
27014 } else {
27015 Ok(Expression::Function(f))
27016 }
27017 } else {
27018 Ok(e)
27019 }
27020 }
27021
27022 Action::DateBinConvert => {
27023 // DATE_BIN('interval', ts, origin) -> TIME_BUCKET('interval', ts, origin) for DuckDB
27024 if let Expression::Function(f) = e {
27025 Ok(Expression::Function(Box::new(Function::new(
27026 "TIME_BUCKET".to_string(),
27027 f.args,
27028 ))))
27029 } else {
27030 Ok(e)
27031 }
27032 }
27033
27034 Action::MysqlCastCharToText => {
27035 // MySQL CAST(x AS CHAR) was originally TEXT -> convert to target text type
27036 if let Expression::Cast(mut c) = e {
27037 c.to = DataType::Text;
27038 Ok(Expression::Cast(c))
27039 } else {
27040 Ok(e)
27041 }
27042 }
27043
27044 Action::SparkCastVarcharToString => {
27045 // Spark parses VARCHAR(n)/CHAR(n) as TEXT -> normalize to STRING
27046 match e {
27047 Expression::Cast(mut c) => {
27048 c.to = Self::normalize_varchar_to_string(c.to);
27049 Ok(Expression::Cast(c))
27050 }
27051 Expression::TryCast(mut c) => {
27052 c.to = Self::normalize_varchar_to_string(c.to);
27053 Ok(Expression::TryCast(c))
27054 }
27055 _ => Ok(e),
27056 }
27057 }
27058
27059 Action::MinMaxToLeastGreatest => {
27060 // Multi-arg MIN(a,b,c) -> LEAST(a,b,c), MAX(a,b,c) -> GREATEST(a,b,c)
27061 if let Expression::Function(f) = e {
27062 let new_name = if f.name.eq_ignore_ascii_case("MIN") {
27063 "LEAST"
27064 } else if f.name.eq_ignore_ascii_case("MAX") {
27065 "GREATEST"
27066 } else {
27067 return Ok(Expression::Function(f));
27068 };
27069 Ok(Expression::Function(Box::new(Function::new(
27070 new_name.to_string(),
27071 f.args,
27072 ))))
27073 } else {
27074 Ok(e)
27075 }
27076 }
27077
27078 Action::ClickHouseUniqToApproxCountDistinct => {
27079 // ClickHouse uniq(x) -> APPROX_COUNT_DISTINCT(x) for non-ClickHouse targets
27080 if let Expression::Function(f) = e {
27081 Ok(Expression::Function(Box::new(Function::new(
27082 "APPROX_COUNT_DISTINCT".to_string(),
27083 f.args,
27084 ))))
27085 } else {
27086 Ok(e)
27087 }
27088 }
27089
27090 Action::ClickHouseAnyToAnyValue => {
27091 // ClickHouse any(x) -> ANY_VALUE(x) for non-ClickHouse targets
27092 if let Expression::Function(f) = e {
27093 Ok(Expression::Function(Box::new(Function::new(
27094 "ANY_VALUE".to_string(),
27095 f.args,
27096 ))))
27097 } else {
27098 Ok(e)
27099 }
27100 }
27101
27102 Action::OracleVarchar2ToVarchar => {
27103 // Oracle VARCHAR2(N CHAR/BYTE) / NVARCHAR2(N) -> VarChar(N) for non-Oracle targets
27104 if let Expression::DataType(DataType::Custom { ref name }) = e {
27105 // Extract length from VARCHAR2(N ...) or NVARCHAR2(N ...)
27106 let starts_varchar2 =
27107 name.len() >= 9 && name[..9].eq_ignore_ascii_case("VARCHAR2(");
27108 let starts_nvarchar2 =
27109 name.len() >= 10 && name[..10].eq_ignore_ascii_case("NVARCHAR2(");
27110 let inner = if starts_varchar2 || starts_nvarchar2 {
27111 let start = if starts_nvarchar2 { 10 } else { 9 }; // skip "NVARCHAR2(" or "VARCHAR2("
27112 let end = name.len() - 1; // skip trailing ")"
27113 Some(&name[start..end])
27114 } else {
27115 Option::None
27116 };
27117 if let Some(inner_str) = inner {
27118 // Parse the number part, ignoring BYTE/CHAR qualifier
27119 let num_str = inner_str.split_whitespace().next().unwrap_or("");
27120 if let Ok(n) = num_str.parse::<u32>() {
27121 Ok(Expression::DataType(DataType::VarChar {
27122 length: Some(n),
27123 parenthesized_length: false,
27124 }))
27125 } else {
27126 Ok(e)
27127 }
27128 } else {
27129 // Plain VARCHAR2 / NVARCHAR2 without parens
27130 Ok(Expression::DataType(DataType::VarChar {
27131 length: Option::None,
27132 parenthesized_length: false,
27133 }))
27134 }
27135 } else {
27136 Ok(e)
27137 }
27138 }
27139
27140 Action::Nvl2Expand => {
27141 // NVL2(a, b[, c]) -> CASE WHEN NOT a IS NULL THEN b [ELSE c] END
27142 // But keep as NVL2 for dialects that support it natively
27143 let nvl2_native = matches!(
27144 target,
27145 DialectType::Oracle
27146 | DialectType::Snowflake
27147 | DialectType::Redshift
27148 | DialectType::Teradata
27149 | DialectType::Spark
27150 | DialectType::Databricks
27151 );
27152 let (a, b, c) = if let Expression::Nvl2(nvl2) = e {
27153 if nvl2_native {
27154 return Ok(Expression::Nvl2(nvl2));
27155 }
27156 (nvl2.this, nvl2.true_value, Some(nvl2.false_value))
27157 } else if let Expression::Function(f) = e {
27158 if nvl2_native {
27159 return Ok(Expression::Function(Box::new(Function::new(
27160 "NVL2".to_string(),
27161 f.args,
27162 ))));
27163 }
27164 if f.args.len() < 2 {
27165 return Ok(Expression::Function(f));
27166 }
27167 let mut args = f.args;
27168 let a = args.remove(0);
27169 let b = args.remove(0);
27170 let c = if !args.is_empty() {
27171 Some(args.remove(0))
27172 } else {
27173 Option::None
27174 };
27175 (a, b, c)
27176 } else {
27177 return Ok(e);
27178 };
27179 // Build: NOT (a IS NULL)
27180 let is_null = Expression::IsNull(Box::new(IsNull {
27181 this: a,
27182 not: false,
27183 postfix_form: false,
27184 }));
27185 let not_null = Expression::Not(Box::new(crate::expressions::UnaryOp {
27186 this: is_null,
27187 inferred_type: None,
27188 }));
27189 Ok(Expression::Case(Box::new(Case {
27190 operand: Option::None,
27191 whens: vec![(not_null, b)],
27192 else_: c,
27193 comments: Vec::new(),
27194 inferred_type: None,
27195 })))
27196 }
27197
27198 Action::IfnullToCoalesce => {
27199 // IFNULL(a, b) -> COALESCE(a, b): clear original_name to output COALESCE
27200 if let Expression::Coalesce(mut cf) = e {
27201 cf.original_name = Option::None;
27202 Ok(Expression::Coalesce(cf))
27203 } else if let Expression::Function(f) = e {
27204 Ok(Expression::Function(Box::new(Function::new(
27205 "COALESCE".to_string(),
27206 f.args,
27207 ))))
27208 } else {
27209 Ok(e)
27210 }
27211 }
27212
27213 Action::IsAsciiConvert => {
27214 // IS_ASCII(x) -> dialect-specific ASCII check
27215 if let Expression::Function(f) = e {
27216 let arg = f.args.into_iter().next().unwrap();
27217 match target {
27218 DialectType::MySQL | DialectType::SingleStore | DialectType::TiDB => {
27219 // REGEXP_LIKE(x, '^[[:ascii:]]*$')
27220 Ok(Expression::Function(Box::new(Function::new(
27221 "REGEXP_LIKE".to_string(),
27222 vec![
27223 arg,
27224 Expression::Literal(Box::new(Literal::String(
27225 "^[[:ascii:]]*$".to_string(),
27226 ))),
27227 ],
27228 ))))
27229 }
27230 DialectType::PostgreSQL
27231 | DialectType::Redshift
27232 | DialectType::Materialize
27233 | DialectType::RisingWave => {
27234 // (x ~ '^[[:ascii:]]*$')
27235 Ok(Expression::Paren(Box::new(Paren {
27236 this: Expression::RegexpLike(Box::new(
27237 crate::expressions::RegexpFunc {
27238 this: arg,
27239 pattern: Expression::Literal(Box::new(
27240 Literal::String("^[[:ascii:]]*$".to_string()),
27241 )),
27242 flags: Option::None,
27243 },
27244 )),
27245 trailing_comments: Vec::new(),
27246 })))
27247 }
27248 DialectType::SQLite => {
27249 // (NOT x GLOB CAST(x'2a5b5e012d7f5d2a' AS TEXT))
27250 let hex_lit = Expression::Literal(Box::new(Literal::HexString(
27251 "2a5b5e012d7f5d2a".to_string(),
27252 )));
27253 let cast_expr = Expression::Cast(Box::new(Cast {
27254 this: hex_lit,
27255 to: DataType::Text,
27256 trailing_comments: Vec::new(),
27257 double_colon_syntax: false,
27258 format: Option::None,
27259 default: Option::None,
27260 inferred_type: None,
27261 }));
27262 let glob = Expression::Glob(Box::new(BinaryOp {
27263 left: arg,
27264 right: cast_expr,
27265 left_comments: Vec::new(),
27266 operator_comments: Vec::new(),
27267 trailing_comments: Vec::new(),
27268 inferred_type: None,
27269 }));
27270 Ok(Expression::Paren(Box::new(Paren {
27271 this: Expression::Not(Box::new(crate::expressions::UnaryOp {
27272 this: glob,
27273 inferred_type: None,
27274 })),
27275 trailing_comments: Vec::new(),
27276 })))
27277 }
27278 DialectType::TSQL | DialectType::Fabric => {
27279 // (PATINDEX(CONVERT(VARCHAR(MAX), 0x255b5e002d7f5d25) COLLATE Latin1_General_BIN, x) = 0)
27280 let hex_lit = Expression::Literal(Box::new(Literal::HexNumber(
27281 "255b5e002d7f5d25".to_string(),
27282 )));
27283 let convert_expr = Expression::Convert(Box::new(
27284 crate::expressions::ConvertFunc {
27285 this: hex_lit,
27286 to: DataType::Text, // Text generates as VARCHAR(MAX) for TSQL
27287 style: None,
27288 },
27289 ));
27290 let collated = Expression::Collation(Box::new(
27291 crate::expressions::CollationExpr {
27292 this: convert_expr,
27293 collation: "Latin1_General_BIN".to_string(),
27294 quoted: false,
27295 double_quoted: false,
27296 },
27297 ));
27298 let patindex = Expression::Function(Box::new(Function::new(
27299 "PATINDEX".to_string(),
27300 vec![collated, arg],
27301 )));
27302 let zero =
27303 Expression::Literal(Box::new(Literal::Number("0".to_string())));
27304 let eq_zero = Expression::Eq(Box::new(BinaryOp {
27305 left: patindex,
27306 right: zero,
27307 left_comments: Vec::new(),
27308 operator_comments: Vec::new(),
27309 trailing_comments: Vec::new(),
27310 inferred_type: None,
27311 }));
27312 Ok(Expression::Paren(Box::new(Paren {
27313 this: eq_zero,
27314 trailing_comments: Vec::new(),
27315 })))
27316 }
27317 DialectType::Oracle => {
27318 // NVL(REGEXP_LIKE(x, '^[' || CHR(1) || '-' || CHR(127) || ']*$'), TRUE)
27319 // Build the pattern: '^[' || CHR(1) || '-' || CHR(127) || ']*$'
27320 let s1 = Expression::Literal(Box::new(Literal::String(
27321 "^[".to_string(),
27322 )));
27323 let chr1 = Expression::Function(Box::new(Function::new(
27324 "CHR".to_string(),
27325 vec![Expression::Literal(Box::new(Literal::Number(
27326 "1".to_string(),
27327 )))],
27328 )));
27329 let dash =
27330 Expression::Literal(Box::new(Literal::String("-".to_string())));
27331 let chr127 = Expression::Function(Box::new(Function::new(
27332 "CHR".to_string(),
27333 vec![Expression::Literal(Box::new(Literal::Number(
27334 "127".to_string(),
27335 )))],
27336 )));
27337 let s2 = Expression::Literal(Box::new(Literal::String(
27338 "]*$".to_string(),
27339 )));
27340 // Build: '^[' || CHR(1) || '-' || CHR(127) || ']*$'
27341 let concat1 =
27342 Expression::DPipe(Box::new(crate::expressions::DPipe {
27343 this: Box::new(s1),
27344 expression: Box::new(chr1),
27345 safe: None,
27346 }));
27347 let concat2 =
27348 Expression::DPipe(Box::new(crate::expressions::DPipe {
27349 this: Box::new(concat1),
27350 expression: Box::new(dash),
27351 safe: None,
27352 }));
27353 let concat3 =
27354 Expression::DPipe(Box::new(crate::expressions::DPipe {
27355 this: Box::new(concat2),
27356 expression: Box::new(chr127),
27357 safe: None,
27358 }));
27359 let concat4 =
27360 Expression::DPipe(Box::new(crate::expressions::DPipe {
27361 this: Box::new(concat3),
27362 expression: Box::new(s2),
27363 safe: None,
27364 }));
27365 let regexp_like = Expression::Function(Box::new(Function::new(
27366 "REGEXP_LIKE".to_string(),
27367 vec![arg, concat4],
27368 )));
27369 // Use Column("TRUE") to output literal TRUE keyword (not boolean 1/0)
27370 let true_expr =
27371 Expression::Column(Box::new(crate::expressions::Column {
27372 name: Identifier {
27373 name: "TRUE".to_string(),
27374 quoted: false,
27375 trailing_comments: Vec::new(),
27376 span: None,
27377 },
27378 table: None,
27379 join_mark: false,
27380 trailing_comments: Vec::new(),
27381 span: None,
27382 inferred_type: None,
27383 }));
27384 let nvl = Expression::Function(Box::new(Function::new(
27385 "NVL".to_string(),
27386 vec![regexp_like, true_expr],
27387 )));
27388 Ok(nvl)
27389 }
27390 _ => Ok(Expression::Function(Box::new(Function::new(
27391 "IS_ASCII".to_string(),
27392 vec![arg],
27393 )))),
27394 }
27395 } else {
27396 Ok(e)
27397 }
27398 }
27399
27400 Action::StrPositionConvert => {
27401 // STR_POSITION(haystack, needle[, position[, occurrence]]) -> dialect-specific
27402 if let Expression::Function(f) = e {
27403 if f.args.len() < 2 {
27404 return Ok(Expression::Function(f));
27405 }
27406 let mut args = f.args;
27407
27408 let haystack = args.remove(0);
27409 let needle = args.remove(0);
27410 let position = if !args.is_empty() {
27411 Some(args.remove(0))
27412 } else {
27413 Option::None
27414 };
27415 let occurrence = if !args.is_empty() {
27416 Some(args.remove(0))
27417 } else {
27418 Option::None
27419 };
27420
27421 // Helper to build: STRPOS/INSTR(SUBSTRING(haystack, pos), needle) expansion
27422 // Returns: CASE/IF WHEN func(SUBSTRING(haystack, pos), needle[, occ]) = 0 THEN 0 ELSE ... + pos - 1 END
27423 fn build_position_expansion(
27424 haystack: Expression,
27425 needle: Expression,
27426 pos: Expression,
27427 occurrence: Option<Expression>,
27428 inner_func: &str,
27429 wrapper: &str, // "CASE", "IF", "IIF"
27430 ) -> Expression {
27431 let substr = Expression::Function(Box::new(Function::new(
27432 "SUBSTRING".to_string(),
27433 vec![haystack, pos.clone()],
27434 )));
27435 let mut inner_args = vec![substr, needle];
27436 if let Some(occ) = occurrence {
27437 inner_args.push(occ);
27438 }
27439 let inner_call = Expression::Function(Box::new(Function::new(
27440 inner_func.to_string(),
27441 inner_args,
27442 )));
27443 let zero =
27444 Expression::Literal(Box::new(Literal::Number("0".to_string())));
27445 let one =
27446 Expression::Literal(Box::new(Literal::Number("1".to_string())));
27447 let eq_zero = Expression::Eq(Box::new(BinaryOp {
27448 left: inner_call.clone(),
27449 right: zero.clone(),
27450 left_comments: Vec::new(),
27451 operator_comments: Vec::new(),
27452 trailing_comments: Vec::new(),
27453 inferred_type: None,
27454 }));
27455 let add_pos = Expression::Add(Box::new(BinaryOp {
27456 left: inner_call,
27457 right: pos,
27458 left_comments: Vec::new(),
27459 operator_comments: Vec::new(),
27460 trailing_comments: Vec::new(),
27461 inferred_type: None,
27462 }));
27463 let sub_one = Expression::Sub(Box::new(BinaryOp {
27464 left: add_pos,
27465 right: one,
27466 left_comments: Vec::new(),
27467 operator_comments: Vec::new(),
27468 trailing_comments: Vec::new(),
27469 inferred_type: None,
27470 }));
27471
27472 match wrapper {
27473 "CASE" => Expression::Case(Box::new(Case {
27474 operand: Option::None,
27475 whens: vec![(eq_zero, zero)],
27476 else_: Some(sub_one),
27477 comments: Vec::new(),
27478 inferred_type: None,
27479 })),
27480 "IIF" => Expression::Function(Box::new(Function::new(
27481 "IIF".to_string(),
27482 vec![eq_zero, zero, sub_one],
27483 ))),
27484 _ => Expression::Function(Box::new(Function::new(
27485 "IF".to_string(),
27486 vec![eq_zero, zero, sub_one],
27487 ))),
27488 }
27489 }
27490
27491 match target {
27492 // STRPOS group: Athena, DuckDB, Presto, Trino, Drill
27493 DialectType::Athena
27494 | DialectType::DuckDB
27495 | DialectType::Presto
27496 | DialectType::Trino
27497 | DialectType::Drill => {
27498 if let Some(pos) = position {
27499 let wrapper = if matches!(target, DialectType::DuckDB) {
27500 "CASE"
27501 } else {
27502 "IF"
27503 };
27504 let result = build_position_expansion(
27505 haystack, needle, pos, occurrence, "STRPOS", wrapper,
27506 );
27507 if matches!(target, DialectType::Drill) {
27508 // Drill uses backtick-quoted `IF`
27509 if let Expression::Function(mut f) = result {
27510 f.name = "`IF`".to_string();
27511 Ok(Expression::Function(f))
27512 } else {
27513 Ok(result)
27514 }
27515 } else {
27516 Ok(result)
27517 }
27518 } else {
27519 Ok(Expression::Function(Box::new(Function::new(
27520 "STRPOS".to_string(),
27521 vec![haystack, needle],
27522 ))))
27523 }
27524 }
27525 // SQLite: IIF wrapper
27526 DialectType::SQLite => {
27527 if let Some(pos) = position {
27528 Ok(build_position_expansion(
27529 haystack, needle, pos, occurrence, "INSTR", "IIF",
27530 ))
27531 } else {
27532 Ok(Expression::Function(Box::new(Function::new(
27533 "INSTR".to_string(),
27534 vec![haystack, needle],
27535 ))))
27536 }
27537 }
27538 // INSTR group: Teradata, BigQuery, Oracle
27539 DialectType::Teradata | DialectType::BigQuery | DialectType::Oracle => {
27540 let mut a = vec![haystack, needle];
27541 if let Some(pos) = position {
27542 a.push(pos);
27543 }
27544 if let Some(occ) = occurrence {
27545 a.push(occ);
27546 }
27547 Ok(Expression::Function(Box::new(Function::new(
27548 "INSTR".to_string(),
27549 a,
27550 ))))
27551 }
27552 // CHARINDEX group: Snowflake, TSQL
27553 DialectType::Snowflake | DialectType::TSQL | DialectType::Fabric => {
27554 let mut a = vec![needle, haystack];
27555 if let Some(pos) = position {
27556 a.push(pos);
27557 }
27558 Ok(Expression::Function(Box::new(Function::new(
27559 "CHARINDEX".to_string(),
27560 a,
27561 ))))
27562 }
27563 // POSITION(needle IN haystack): PostgreSQL, Materialize, RisingWave, Redshift
27564 DialectType::PostgreSQL
27565 | DialectType::Materialize
27566 | DialectType::RisingWave
27567 | DialectType::Redshift => {
27568 if let Some(pos) = position {
27569 // Build: CASE WHEN POSITION(needle IN SUBSTRING(haystack FROM pos)) = 0 THEN 0
27570 // ELSE POSITION(...) + pos - 1 END
27571 let substr = Expression::Substring(Box::new(
27572 crate::expressions::SubstringFunc {
27573 this: haystack,
27574 start: pos.clone(),
27575 length: Option::None,
27576 from_for_syntax: true,
27577 },
27578 ));
27579 let pos_in = Expression::StrPosition(Box::new(
27580 crate::expressions::StrPosition {
27581 this: Box::new(substr),
27582 substr: Some(Box::new(needle)),
27583 position: Option::None,
27584 occurrence: Option::None,
27585 },
27586 ));
27587 let zero = Expression::Literal(Box::new(Literal::Number(
27588 "0".to_string(),
27589 )));
27590 let one = Expression::Literal(Box::new(Literal::Number(
27591 "1".to_string(),
27592 )));
27593 let eq_zero = Expression::Eq(Box::new(BinaryOp {
27594 left: pos_in.clone(),
27595 right: zero.clone(),
27596 left_comments: Vec::new(),
27597 operator_comments: Vec::new(),
27598 trailing_comments: Vec::new(),
27599 inferred_type: None,
27600 }));
27601 let add_pos = Expression::Add(Box::new(BinaryOp {
27602 left: pos_in,
27603 right: pos,
27604 left_comments: Vec::new(),
27605 operator_comments: Vec::new(),
27606 trailing_comments: Vec::new(),
27607 inferred_type: None,
27608 }));
27609 let sub_one = Expression::Sub(Box::new(BinaryOp {
27610 left: add_pos,
27611 right: one,
27612 left_comments: Vec::new(),
27613 operator_comments: Vec::new(),
27614 trailing_comments: Vec::new(),
27615 inferred_type: None,
27616 }));
27617 Ok(Expression::Case(Box::new(Case {
27618 operand: Option::None,
27619 whens: vec![(eq_zero, zero)],
27620 else_: Some(sub_one),
27621 comments: Vec::new(),
27622 inferred_type: None,
27623 })))
27624 } else {
27625 Ok(Expression::StrPosition(Box::new(
27626 crate::expressions::StrPosition {
27627 this: Box::new(haystack),
27628 substr: Some(Box::new(needle)),
27629 position: Option::None,
27630 occurrence: Option::None,
27631 },
27632 )))
27633 }
27634 }
27635 // LOCATE group: MySQL, Hive, Spark, Databricks, Doris
27636 DialectType::MySQL
27637 | DialectType::SingleStore
27638 | DialectType::TiDB
27639 | DialectType::Hive
27640 | DialectType::Spark
27641 | DialectType::Databricks
27642 | DialectType::Doris
27643 | DialectType::StarRocks => {
27644 let mut a = vec![needle, haystack];
27645 if let Some(pos) = position {
27646 a.push(pos);
27647 }
27648 Ok(Expression::Function(Box::new(Function::new(
27649 "LOCATE".to_string(),
27650 a,
27651 ))))
27652 }
27653 // ClickHouse: POSITION(haystack, needle[, position])
27654 DialectType::ClickHouse => {
27655 let mut a = vec![haystack, needle];
27656 if let Some(pos) = position {
27657 a.push(pos);
27658 }
27659 Ok(Expression::Function(Box::new(Function::new(
27660 "POSITION".to_string(),
27661 a,
27662 ))))
27663 }
27664 _ => {
27665 let mut a = vec![haystack, needle];
27666 if let Some(pos) = position {
27667 a.push(pos);
27668 }
27669 if let Some(occ) = occurrence {
27670 a.push(occ);
27671 }
27672 Ok(Expression::Function(Box::new(Function::new(
27673 "STR_POSITION".to_string(),
27674 a,
27675 ))))
27676 }
27677 }
27678 } else {
27679 Ok(e)
27680 }
27681 }
27682
27683 Action::ArraySumConvert => {
27684 // ARRAY_SUM(arr) -> dialect-specific
27685 if let Expression::Function(f) = e {
27686 let args = f.args;
27687 match target {
27688 DialectType::DuckDB => Ok(Expression::Function(Box::new(
27689 Function::new("LIST_SUM".to_string(), args),
27690 ))),
27691 DialectType::Spark | DialectType::Databricks => {
27692 // AGGREGATE(arr, 0, (acc, x) -> acc + x, acc -> acc)
27693 let arr = args.into_iter().next().unwrap();
27694 let zero =
27695 Expression::Literal(Box::new(Literal::Number("0".to_string())));
27696 let acc_id = Identifier::new("acc");
27697 let x_id = Identifier::new("x");
27698 let acc = Expression::Identifier(acc_id.clone());
27699 let x = Expression::Identifier(x_id.clone());
27700 let add = Expression::Add(Box::new(BinaryOp {
27701 left: acc.clone(),
27702 right: x,
27703 left_comments: Vec::new(),
27704 operator_comments: Vec::new(),
27705 trailing_comments: Vec::new(),
27706 inferred_type: None,
27707 }));
27708 let lambda1 =
27709 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
27710 parameters: vec![acc_id.clone(), x_id],
27711 body: add,
27712 colon: false,
27713 parameter_types: Vec::new(),
27714 }));
27715 let lambda2 =
27716 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
27717 parameters: vec![acc_id],
27718 body: acc,
27719 colon: false,
27720 parameter_types: Vec::new(),
27721 }));
27722 Ok(Expression::Function(Box::new(Function::new(
27723 "AGGREGATE".to_string(),
27724 vec![arr, zero, lambda1, lambda2],
27725 ))))
27726 }
27727 DialectType::Presto | DialectType::Athena => {
27728 // Presto/Athena keep ARRAY_SUM natively
27729 Ok(Expression::Function(Box::new(Function::new(
27730 "ARRAY_SUM".to_string(),
27731 args,
27732 ))))
27733 }
27734 DialectType::Trino => {
27735 // REDUCE(arr, 0, (acc, x) -> acc + x, acc -> acc)
27736 if args.len() == 1 {
27737 let arr = args.into_iter().next().unwrap();
27738 let zero = Expression::Literal(Box::new(Literal::Number(
27739 "0".to_string(),
27740 )));
27741 let acc_id = Identifier::new("acc");
27742 let x_id = Identifier::new("x");
27743 let acc = Expression::Identifier(acc_id.clone());
27744 let x = Expression::Identifier(x_id.clone());
27745 let add = Expression::Add(Box::new(BinaryOp {
27746 left: acc.clone(),
27747 right: x,
27748 left_comments: Vec::new(),
27749 operator_comments: Vec::new(),
27750 trailing_comments: Vec::new(),
27751 inferred_type: None,
27752 }));
27753 let lambda1 = Expression::Lambda(Box::new(
27754 crate::expressions::LambdaExpr {
27755 parameters: vec![acc_id.clone(), x_id],
27756 body: add,
27757 colon: false,
27758 parameter_types: Vec::new(),
27759 },
27760 ));
27761 let lambda2 = Expression::Lambda(Box::new(
27762 crate::expressions::LambdaExpr {
27763 parameters: vec![acc_id],
27764 body: acc,
27765 colon: false,
27766 parameter_types: Vec::new(),
27767 },
27768 ));
27769 Ok(Expression::Function(Box::new(Function::new(
27770 "REDUCE".to_string(),
27771 vec![arr, zero, lambda1, lambda2],
27772 ))))
27773 } else {
27774 Ok(Expression::Function(Box::new(Function::new(
27775 "ARRAY_SUM".to_string(),
27776 args,
27777 ))))
27778 }
27779 }
27780 DialectType::ClickHouse => {
27781 // arraySum(lambda, arr) or arraySum(arr)
27782 Ok(Expression::Function(Box::new(Function::new(
27783 "arraySum".to_string(),
27784 args,
27785 ))))
27786 }
27787 _ => Ok(Expression::Function(Box::new(Function::new(
27788 "ARRAY_SUM".to_string(),
27789 args,
27790 )))),
27791 }
27792 } else {
27793 Ok(e)
27794 }
27795 }
27796
27797 Action::ArraySizeConvert => {
27798 if let Expression::Function(f) = e {
27799 Ok(Expression::Function(Box::new(Function::new(
27800 "REPEATED_COUNT".to_string(),
27801 f.args,
27802 ))))
27803 } else {
27804 Ok(e)
27805 }
27806 }
27807
27808 Action::ArrayAnyConvert => {
27809 if let Expression::Function(f) = e {
27810 let mut args = f.args;
27811 if args.len() == 2 {
27812 let arr = args.remove(0);
27813 let lambda = args.remove(0);
27814
27815 // Extract lambda parameter name and body
27816 let (param_name, pred_body) =
27817 if let Expression::Lambda(ref lam) = lambda {
27818 let name = if let Some(p) = lam.parameters.first() {
27819 p.name.clone()
27820 } else {
27821 "x".to_string()
27822 };
27823 (name, lam.body.clone())
27824 } else {
27825 ("x".to_string(), lambda.clone())
27826 };
27827
27828 // Helper: build a function call Expression
27829 let make_func = |name: &str, args: Vec<Expression>| -> Expression {
27830 Expression::Function(Box::new(Function::new(
27831 name.to_string(),
27832 args,
27833 )))
27834 };
27835
27836 // Helper: build (len_func(arr) = 0 OR len_func(filter_expr) <> 0) wrapped in Paren
27837 let build_filter_pattern = |len_func: &str,
27838 len_args_extra: Vec<Expression>,
27839 filter_expr: Expression|
27840 -> Expression {
27841 // len_func(arr, ...extra) = 0
27842 let mut len_arr_args = vec![arr.clone()];
27843 len_arr_args.extend(len_args_extra.clone());
27844 let len_arr = make_func(len_func, len_arr_args);
27845 let eq_zero = Expression::Eq(Box::new(BinaryOp::new(
27846 len_arr,
27847 Expression::number(0),
27848 )));
27849
27850 // len_func(filter_expr, ...extra) <> 0
27851 let mut len_filter_args = vec![filter_expr];
27852 len_filter_args.extend(len_args_extra);
27853 let len_filter = make_func(len_func, len_filter_args);
27854 let neq_zero = Expression::Neq(Box::new(BinaryOp::new(
27855 len_filter,
27856 Expression::number(0),
27857 )));
27858
27859 // (eq_zero OR neq_zero)
27860 let or_expr =
27861 Expression::Or(Box::new(BinaryOp::new(eq_zero, neq_zero)));
27862 Expression::Paren(Box::new(Paren {
27863 this: or_expr,
27864 trailing_comments: Vec::new(),
27865 }))
27866 };
27867
27868 match target {
27869 DialectType::Trino | DialectType::Presto | DialectType::Athena => {
27870 Ok(make_func("ANY_MATCH", vec![arr, lambda]))
27871 }
27872 DialectType::ClickHouse => {
27873 // (LENGTH(arr) = 0 OR LENGTH(arrayFilter(x -> pred, arr)) <> 0)
27874 // ClickHouse arrayFilter takes lambda first, then array
27875 let filter_expr =
27876 make_func("arrayFilter", vec![lambda, arr.clone()]);
27877 Ok(build_filter_pattern("LENGTH", vec![], filter_expr))
27878 }
27879 DialectType::Databricks | DialectType::Spark => {
27880 // (SIZE(arr) = 0 OR SIZE(FILTER(arr, x -> pred)) <> 0)
27881 let filter_expr =
27882 make_func("FILTER", vec![arr.clone(), lambda]);
27883 Ok(build_filter_pattern("SIZE", vec![], filter_expr))
27884 }
27885 DialectType::DuckDB => {
27886 // (ARRAY_LENGTH(arr) = 0 OR ARRAY_LENGTH(LIST_FILTER(arr, x -> pred)) <> 0)
27887 let filter_expr =
27888 make_func("LIST_FILTER", vec![arr.clone(), lambda]);
27889 Ok(build_filter_pattern("ARRAY_LENGTH", vec![], filter_expr))
27890 }
27891 DialectType::Teradata => {
27892 // (CARDINALITY(arr) = 0 OR CARDINALITY(FILTER(arr, x -> pred)) <> 0)
27893 let filter_expr =
27894 make_func("FILTER", vec![arr.clone(), lambda]);
27895 Ok(build_filter_pattern("CARDINALITY", vec![], filter_expr))
27896 }
27897 DialectType::BigQuery => {
27898 // (ARRAY_LENGTH(arr) = 0 OR ARRAY_LENGTH(ARRAY(SELECT x FROM UNNEST(arr) AS x WHERE pred)) <> 0)
27899 // Build: SELECT x FROM UNNEST(arr) AS x WHERE pred
27900 let param_col = Expression::column(¶m_name);
27901 let unnest_expr = Expression::Unnest(Box::new(
27902 crate::expressions::UnnestFunc {
27903 this: arr.clone(),
27904 expressions: vec![],
27905 with_ordinality: false,
27906 alias: Some(Identifier::new(¶m_name)),
27907 offset_alias: None,
27908 },
27909 ));
27910 let mut sel = crate::expressions::Select::default();
27911 sel.expressions = vec![param_col];
27912 sel.from = Some(crate::expressions::From {
27913 expressions: vec![unnest_expr],
27914 });
27915 sel.where_clause =
27916 Some(crate::expressions::Where { this: pred_body });
27917 let array_subquery =
27918 make_func("ARRAY", vec![Expression::Select(Box::new(sel))]);
27919 Ok(build_filter_pattern("ARRAY_LENGTH", vec![], array_subquery))
27920 }
27921 DialectType::PostgreSQL => {
27922 // (ARRAY_LENGTH(arr, 1) = 0 OR ARRAY_LENGTH(ARRAY(SELECT x FROM UNNEST(arr) AS _t0(x) WHERE pred), 1) <> 0)
27923 // Build: SELECT x FROM UNNEST(arr) AS _t0(x) WHERE pred
27924 let param_col = Expression::column(¶m_name);
27925 // For PostgreSQL, UNNEST uses AS _t0(x) syntax - use TableAlias
27926 let unnest_with_alias =
27927 Expression::Alias(Box::new(crate::expressions::Alias {
27928 this: Expression::Unnest(Box::new(
27929 crate::expressions::UnnestFunc {
27930 this: arr.clone(),
27931 expressions: vec![],
27932 with_ordinality: false,
27933 alias: None,
27934 offset_alias: None,
27935 },
27936 )),
27937 alias: Identifier::new("_t0"),
27938 column_aliases: vec![Identifier::new(¶m_name)],
27939 pre_alias_comments: Vec::new(),
27940 trailing_comments: Vec::new(),
27941 inferred_type: None,
27942 }));
27943 let mut sel = crate::expressions::Select::default();
27944 sel.expressions = vec![param_col];
27945 sel.from = Some(crate::expressions::From {
27946 expressions: vec![unnest_with_alias],
27947 });
27948 sel.where_clause =
27949 Some(crate::expressions::Where { this: pred_body });
27950 let array_subquery =
27951 make_func("ARRAY", vec![Expression::Select(Box::new(sel))]);
27952 Ok(build_filter_pattern(
27953 "ARRAY_LENGTH",
27954 vec![Expression::number(1)],
27955 array_subquery,
27956 ))
27957 }
27958 _ => Ok(Expression::Function(Box::new(Function::new(
27959 "ARRAY_ANY".to_string(),
27960 vec![arr, lambda],
27961 )))),
27962 }
27963 } else {
27964 Ok(Expression::Function(Box::new(Function::new(
27965 "ARRAY_ANY".to_string(),
27966 args,
27967 ))))
27968 }
27969 } else {
27970 Ok(e)
27971 }
27972 }
27973
27974 Action::DecodeSimplify => {
27975 // DECODE(x, search1, result1, ..., default) -> CASE WHEN ... THEN result1 ... [ELSE default] END
27976 // For literal search values: CASE WHEN x = search THEN result
27977 // For NULL search: CASE WHEN x IS NULL THEN result
27978 // For non-literal (column, expr): CASE WHEN x = search OR (x IS NULL AND search IS NULL) THEN result
27979 fn is_decode_literal(e: &Expression) -> bool {
27980 matches!(
27981 e,
27982 Expression::Literal(_) | Expression::Boolean(_) | Expression::Neg(_)
27983 )
27984 }
27985
27986 let build_decode_case =
27987 |this_expr: Expression,
27988 pairs: Vec<(Expression, Expression)>,
27989 default: Option<Expression>| {
27990 let whens: Vec<(Expression, Expression)> = pairs
27991 .into_iter()
27992 .map(|(search, result)| {
27993 if matches!(&search, Expression::Null(_)) {
27994 // NULL search -> IS NULL
27995 let condition = Expression::Is(Box::new(BinaryOp {
27996 left: this_expr.clone(),
27997 right: Expression::Null(crate::expressions::Null),
27998 left_comments: Vec::new(),
27999 operator_comments: Vec::new(),
28000 trailing_comments: Vec::new(),
28001 inferred_type: None,
28002 }));
28003 (condition, result)
28004 } else if is_decode_literal(&search)
28005 || is_decode_literal(&this_expr)
28006 {
28007 // At least one side is a literal -> simple equality (no NULL check needed)
28008 let eq = Expression::Eq(Box::new(BinaryOp {
28009 left: this_expr.clone(),
28010 right: search,
28011 left_comments: Vec::new(),
28012 operator_comments: Vec::new(),
28013 trailing_comments: Vec::new(),
28014 inferred_type: None,
28015 }));
28016 (eq, result)
28017 } else {
28018 // Non-literal -> null-safe comparison
28019 let needs_paren = matches!(
28020 &search,
28021 Expression::Eq(_)
28022 | Expression::Neq(_)
28023 | Expression::Gt(_)
28024 | Expression::Gte(_)
28025 | Expression::Lt(_)
28026 | Expression::Lte(_)
28027 );
28028 let search_ref = if needs_paren {
28029 Expression::Paren(Box::new(crate::expressions::Paren {
28030 this: search.clone(),
28031 trailing_comments: Vec::new(),
28032 }))
28033 } else {
28034 search.clone()
28035 };
28036 // Build: x = search OR (x IS NULL AND search IS NULL)
28037 let eq = Expression::Eq(Box::new(BinaryOp {
28038 left: this_expr.clone(),
28039 right: search_ref,
28040 left_comments: Vec::new(),
28041 operator_comments: Vec::new(),
28042 trailing_comments: Vec::new(),
28043 inferred_type: None,
28044 }));
28045 let search_in_null = if needs_paren {
28046 Expression::Paren(Box::new(crate::expressions::Paren {
28047 this: search.clone(),
28048 trailing_comments: Vec::new(),
28049 }))
28050 } else {
28051 search.clone()
28052 };
28053 let x_is_null = Expression::Is(Box::new(BinaryOp {
28054 left: this_expr.clone(),
28055 right: Expression::Null(crate::expressions::Null),
28056 left_comments: Vec::new(),
28057 operator_comments: Vec::new(),
28058 trailing_comments: Vec::new(),
28059 inferred_type: None,
28060 }));
28061 let search_is_null = Expression::Is(Box::new(BinaryOp {
28062 left: search_in_null,
28063 right: Expression::Null(crate::expressions::Null),
28064 left_comments: Vec::new(),
28065 operator_comments: Vec::new(),
28066 trailing_comments: Vec::new(),
28067 inferred_type: None,
28068 }));
28069 let both_null = Expression::And(Box::new(BinaryOp {
28070 left: x_is_null,
28071 right: search_is_null,
28072 left_comments: Vec::new(),
28073 operator_comments: Vec::new(),
28074 trailing_comments: Vec::new(),
28075 inferred_type: None,
28076 }));
28077 let condition = Expression::Or(Box::new(BinaryOp {
28078 left: eq,
28079 right: Expression::Paren(Box::new(
28080 crate::expressions::Paren {
28081 this: both_null,
28082 trailing_comments: Vec::new(),
28083 },
28084 )),
28085 left_comments: Vec::new(),
28086 operator_comments: Vec::new(),
28087 trailing_comments: Vec::new(),
28088 inferred_type: None,
28089 }));
28090 (condition, result)
28091 }
28092 })
28093 .collect();
28094 Expression::Case(Box::new(Case {
28095 operand: None,
28096 whens,
28097 else_: default,
28098 comments: Vec::new(),
28099 inferred_type: None,
28100 }))
28101 };
28102
28103 if let Expression::Decode(decode) = e {
28104 Ok(build_decode_case(
28105 decode.this,
28106 decode.search_results,
28107 decode.default,
28108 ))
28109 } else if let Expression::DecodeCase(dc) = e {
28110 // DecodeCase has flat expressions: [x, s1, r1, s2, r2, ..., default?]
28111 let mut exprs = dc.expressions;
28112 if exprs.len() < 3 {
28113 return Ok(Expression::DecodeCase(Box::new(
28114 crate::expressions::DecodeCase { expressions: exprs },
28115 )));
28116 }
28117 let this_expr = exprs.remove(0);
28118 let mut pairs = Vec::new();
28119 let mut default = None;
28120 let mut i = 0;
28121 while i + 1 < exprs.len() {
28122 pairs.push((exprs[i].clone(), exprs[i + 1].clone()));
28123 i += 2;
28124 }
28125 if i < exprs.len() {
28126 // Odd remaining element is the default
28127 default = Some(exprs[i].clone());
28128 }
28129 Ok(build_decode_case(this_expr, pairs, default))
28130 } else {
28131 Ok(e)
28132 }
28133 }
28134
28135 Action::CreateTableLikeToCtas => {
28136 // CREATE TABLE a LIKE b -> CREATE TABLE a AS SELECT * FROM b LIMIT 0
28137 if let Expression::CreateTable(ct) = e {
28138 let like_source = ct.constraints.iter().find_map(|c| {
28139 if let crate::expressions::TableConstraint::Like { source, .. } = c {
28140 Some(source.clone())
28141 } else {
28142 None
28143 }
28144 });
28145 if let Some(source_table) = like_source {
28146 let mut new_ct = *ct;
28147 new_ct.constraints.clear();
28148 // Build: SELECT * FROM b LIMIT 0
28149 let select = Expression::Select(Box::new(crate::expressions::Select {
28150 expressions: vec![Expression::Star(crate::expressions::Star {
28151 table: None,
28152 except: None,
28153 replace: None,
28154 rename: None,
28155 trailing_comments: Vec::new(),
28156 span: None,
28157 })],
28158 from: Some(crate::expressions::From {
28159 expressions: vec![Expression::Table(Box::new(source_table))],
28160 }),
28161 limit: Some(crate::expressions::Limit {
28162 this: Expression::Literal(Box::new(Literal::Number(
28163 "0".to_string(),
28164 ))),
28165 percent: false,
28166 comments: Vec::new(),
28167 }),
28168 ..Default::default()
28169 }));
28170 new_ct.as_select = Some(select);
28171 Ok(Expression::CreateTable(Box::new(new_ct)))
28172 } else {
28173 Ok(Expression::CreateTable(ct))
28174 }
28175 } else {
28176 Ok(e)
28177 }
28178 }
28179
28180 Action::CreateTableLikeToSelectInto => {
28181 // CREATE TABLE a LIKE b -> SELECT TOP 0 * INTO a FROM b AS temp
28182 if let Expression::CreateTable(ct) = e {
28183 let like_source = ct.constraints.iter().find_map(|c| {
28184 if let crate::expressions::TableConstraint::Like { source, .. } = c {
28185 Some(source.clone())
28186 } else {
28187 None
28188 }
28189 });
28190 if let Some(source_table) = like_source {
28191 let mut aliased_source = source_table;
28192 aliased_source.alias = Some(Identifier::new("temp"));
28193 // Build: SELECT TOP 0 * INTO a FROM b AS temp
28194 let select = Expression::Select(Box::new(crate::expressions::Select {
28195 expressions: vec![Expression::Star(crate::expressions::Star {
28196 table: None,
28197 except: None,
28198 replace: None,
28199 rename: None,
28200 trailing_comments: Vec::new(),
28201 span: None,
28202 })],
28203 from: Some(crate::expressions::From {
28204 expressions: vec![Expression::Table(Box::new(aliased_source))],
28205 }),
28206 into: Some(crate::expressions::SelectInto {
28207 this: Expression::Table(Box::new(ct.name.clone())),
28208 temporary: false,
28209 unlogged: false,
28210 bulk_collect: false,
28211 expressions: Vec::new(),
28212 }),
28213 top: Some(crate::expressions::Top {
28214 this: Expression::Literal(Box::new(Literal::Number(
28215 "0".to_string(),
28216 ))),
28217 percent: false,
28218 with_ties: false,
28219 parenthesized: false,
28220 }),
28221 ..Default::default()
28222 }));
28223 Ok(select)
28224 } else {
28225 Ok(Expression::CreateTable(ct))
28226 }
28227 } else {
28228 Ok(e)
28229 }
28230 }
28231
28232 Action::CreateTableLikeToAs => {
28233 // CREATE TABLE a LIKE b -> CREATE TABLE a AS b (ClickHouse)
28234 if let Expression::CreateTable(ct) = e {
28235 let like_source = ct.constraints.iter().find_map(|c| {
28236 if let crate::expressions::TableConstraint::Like { source, .. } = c {
28237 Some(source.clone())
28238 } else {
28239 None
28240 }
28241 });
28242 if let Some(source_table) = like_source {
28243 let mut new_ct = *ct;
28244 new_ct.constraints.clear();
28245 // AS b (just a table reference, not a SELECT)
28246 new_ct.as_select = Some(Expression::Table(Box::new(source_table)));
28247 Ok(Expression::CreateTable(Box::new(new_ct)))
28248 } else {
28249 Ok(Expression::CreateTable(ct))
28250 }
28251 } else {
28252 Ok(e)
28253 }
28254 }
28255
28256 Action::TsOrDsToDateConvert => {
28257 // TS_OR_DS_TO_DATE(x[, fmt]) -> dialect-specific date conversion
28258 if let Expression::Function(f) = e {
28259 let mut args = f.args;
28260 let this = args.remove(0);
28261 let fmt = if !args.is_empty() {
28262 match &args[0] {
28263 Expression::Literal(lit)
28264 if matches!(lit.as_ref(), Literal::String(_)) =>
28265 {
28266 let Literal::String(s) = lit.as_ref() else {
28267 unreachable!()
28268 };
28269 Some(s.clone())
28270 }
28271 _ => None,
28272 }
28273 } else {
28274 None
28275 };
28276 Ok(Expression::TsOrDsToDate(Box::new(
28277 crate::expressions::TsOrDsToDate {
28278 this: Box::new(this),
28279 format: fmt,
28280 safe: None,
28281 },
28282 )))
28283 } else {
28284 Ok(e)
28285 }
28286 }
28287
28288 Action::TsOrDsToDateStrConvert => {
28289 // TS_OR_DS_TO_DATE_STR(x) -> SUBSTRING(CAST(x AS type), 1, 10)
28290 if let Expression::Function(f) = e {
28291 let arg = f.args.into_iter().next().unwrap();
28292 let str_type = match target {
28293 DialectType::DuckDB
28294 | DialectType::PostgreSQL
28295 | DialectType::Materialize => DataType::Text,
28296 DialectType::Hive | DialectType::Spark | DialectType::Databricks => {
28297 DataType::Custom {
28298 name: "STRING".to_string(),
28299 }
28300 }
28301 DialectType::Presto
28302 | DialectType::Trino
28303 | DialectType::Athena
28304 | DialectType::Drill => DataType::VarChar {
28305 length: None,
28306 parenthesized_length: false,
28307 },
28308 DialectType::MySQL | DialectType::Doris | DialectType::StarRocks => {
28309 DataType::Custom {
28310 name: "STRING".to_string(),
28311 }
28312 }
28313 _ => DataType::VarChar {
28314 length: None,
28315 parenthesized_length: false,
28316 },
28317 };
28318 let cast_expr = Expression::Cast(Box::new(Cast {
28319 this: arg,
28320 to: str_type,
28321 double_colon_syntax: false,
28322 trailing_comments: Vec::new(),
28323 format: None,
28324 default: None,
28325 inferred_type: None,
28326 }));
28327 Ok(Expression::Substring(Box::new(
28328 crate::expressions::SubstringFunc {
28329 this: cast_expr,
28330 start: Expression::number(1),
28331 length: Some(Expression::number(10)),
28332 from_for_syntax: false,
28333 },
28334 )))
28335 } else {
28336 Ok(e)
28337 }
28338 }
28339
28340 Action::DateStrToDateConvert => {
28341 // DATE_STR_TO_DATE(x) -> dialect-specific
28342 if let Expression::Function(f) = e {
28343 let arg = f.args.into_iter().next().unwrap();
28344 match target {
28345 DialectType::SQLite => {
28346 // SQLite: just the bare expression (dates are strings)
28347 Ok(arg)
28348 }
28349 _ => Ok(Expression::Cast(Box::new(Cast {
28350 this: arg,
28351 to: DataType::Date,
28352 double_colon_syntax: false,
28353 trailing_comments: Vec::new(),
28354 format: None,
28355 default: None,
28356 inferred_type: None,
28357 }))),
28358 }
28359 } else {
28360 Ok(e)
28361 }
28362 }
28363
28364 Action::TimeStrToDateConvert => {
28365 // TIME_STR_TO_DATE(x) -> dialect-specific
28366 if let Expression::Function(f) = e {
28367 let arg = f.args.into_iter().next().unwrap();
28368 match target {
28369 DialectType::Hive
28370 | DialectType::Doris
28371 | DialectType::StarRocks
28372 | DialectType::Snowflake => Ok(Expression::Function(Box::new(
28373 Function::new("TO_DATE".to_string(), vec![arg]),
28374 ))),
28375 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
28376 // Presto: CAST(x AS TIMESTAMP)
28377 Ok(Expression::Cast(Box::new(Cast {
28378 this: arg,
28379 to: DataType::Timestamp {
28380 timezone: false,
28381 precision: None,
28382 },
28383 double_colon_syntax: false,
28384 trailing_comments: Vec::new(),
28385 format: None,
28386 default: None,
28387 inferred_type: None,
28388 })))
28389 }
28390 _ => {
28391 // Default: CAST(x AS DATE)
28392 Ok(Expression::Cast(Box::new(Cast {
28393 this: arg,
28394 to: DataType::Date,
28395 double_colon_syntax: false,
28396 trailing_comments: Vec::new(),
28397 format: None,
28398 default: None,
28399 inferred_type: None,
28400 })))
28401 }
28402 }
28403 } else {
28404 Ok(e)
28405 }
28406 }
28407
28408 Action::TimeStrToTimeConvert => {
28409 // TIME_STR_TO_TIME(x[, zone]) -> dialect-specific CAST to timestamp type
28410 if let Expression::Function(f) = e {
28411 let mut args = f.args;
28412 let this = args.remove(0);
28413 let zone = if !args.is_empty() {
28414 match &args[0] {
28415 Expression::Literal(lit)
28416 if matches!(lit.as_ref(), Literal::String(_)) =>
28417 {
28418 let Literal::String(s) = lit.as_ref() else {
28419 unreachable!()
28420 };
28421 Some(s.clone())
28422 }
28423 _ => None,
28424 }
28425 } else {
28426 None
28427 };
28428 let has_zone = zone.is_some();
28429
28430 match target {
28431 DialectType::SQLite => {
28432 // SQLite: just the bare expression
28433 Ok(this)
28434 }
28435 DialectType::MySQL => {
28436 if has_zone {
28437 // MySQL with zone: TIMESTAMP(x)
28438 Ok(Expression::Function(Box::new(Function::new(
28439 "TIMESTAMP".to_string(),
28440 vec![this],
28441 ))))
28442 } else {
28443 // MySQL: CAST(x AS DATETIME) or with precision
28444 // Use DataType::Custom to avoid MySQL's transform_cast converting
28445 // CAST(x AS TIMESTAMP) -> TIMESTAMP(x)
28446 let precision = if let Expression::Literal(ref lit) = this {
28447 if let Literal::String(ref s) = lit.as_ref() {
28448 if let Some(dot_pos) = s.rfind('.') {
28449 let frac = &s[dot_pos + 1..];
28450 let digit_count = frac
28451 .chars()
28452 .take_while(|c| c.is_ascii_digit())
28453 .count();
28454 if digit_count > 0 {
28455 Some(digit_count)
28456 } else {
28457 None
28458 }
28459 } else {
28460 None
28461 }
28462 } else {
28463 None
28464 }
28465 } else {
28466 None
28467 };
28468 let type_name = match precision {
28469 Some(p) => format!("DATETIME({})", p),
28470 None => "DATETIME".to_string(),
28471 };
28472 Ok(Expression::Cast(Box::new(Cast {
28473 this,
28474 to: DataType::Custom { name: type_name },
28475 double_colon_syntax: false,
28476 trailing_comments: Vec::new(),
28477 format: None,
28478 default: None,
28479 inferred_type: None,
28480 })))
28481 }
28482 }
28483 DialectType::ClickHouse => {
28484 if has_zone {
28485 // ClickHouse with zone: CAST(x AS DateTime64(6, 'zone'))
28486 // We need to strip the timezone offset from the literal if present
28487 let clean_this = if let Expression::Literal(ref lit) = this {
28488 if let Literal::String(ref s) = lit.as_ref() {
28489 // Strip timezone offset like "-08:00" or "+00:00"
28490 let re_offset = s.rfind(|c: char| c == '+' || c == '-');
28491 if let Some(offset_pos) = re_offset {
28492 if offset_pos > 10 {
28493 // After the date part
28494 let trimmed = s[..offset_pos].to_string();
28495 Expression::Literal(Box::new(Literal::String(
28496 trimmed,
28497 )))
28498 } else {
28499 this.clone()
28500 }
28501 } else {
28502 this.clone()
28503 }
28504 } else {
28505 this.clone()
28506 }
28507 } else {
28508 this.clone()
28509 };
28510 let zone_str = zone.unwrap();
28511 // Build: CAST(x AS DateTime64(6, 'zone'))
28512 let type_name = format!("DateTime64(6, '{}')", zone_str);
28513 Ok(Expression::Cast(Box::new(Cast {
28514 this: clean_this,
28515 to: DataType::Custom { name: type_name },
28516 double_colon_syntax: false,
28517 trailing_comments: Vec::new(),
28518 format: None,
28519 default: None,
28520 inferred_type: None,
28521 })))
28522 } else {
28523 Ok(Expression::Cast(Box::new(Cast {
28524 this,
28525 to: DataType::Custom {
28526 name: "DateTime64(6)".to_string(),
28527 },
28528 double_colon_syntax: false,
28529 trailing_comments: Vec::new(),
28530 format: None,
28531 default: None,
28532 inferred_type: None,
28533 })))
28534 }
28535 }
28536 DialectType::BigQuery => {
28537 if has_zone {
28538 // BigQuery with zone: CAST(x AS TIMESTAMP)
28539 Ok(Expression::Cast(Box::new(Cast {
28540 this,
28541 to: DataType::Timestamp {
28542 timezone: false,
28543 precision: None,
28544 },
28545 double_colon_syntax: false,
28546 trailing_comments: Vec::new(),
28547 format: None,
28548 default: None,
28549 inferred_type: None,
28550 })))
28551 } else {
28552 // BigQuery: CAST(x AS DATETIME) - Timestamp{tz:false} renders as DATETIME for BigQuery
28553 Ok(Expression::Cast(Box::new(Cast {
28554 this,
28555 to: DataType::Custom {
28556 name: "DATETIME".to_string(),
28557 },
28558 double_colon_syntax: false,
28559 trailing_comments: Vec::new(),
28560 format: None,
28561 default: None,
28562 inferred_type: None,
28563 })))
28564 }
28565 }
28566 DialectType::Doris => {
28567 // Doris: CAST(x AS DATETIME)
28568 Ok(Expression::Cast(Box::new(Cast {
28569 this,
28570 to: DataType::Custom {
28571 name: "DATETIME".to_string(),
28572 },
28573 double_colon_syntax: false,
28574 trailing_comments: Vec::new(),
28575 format: None,
28576 default: None,
28577 inferred_type: None,
28578 })))
28579 }
28580 DialectType::TSQL | DialectType::Fabric => {
28581 if has_zone {
28582 // TSQL with zone: CAST(x AS DATETIMEOFFSET) AT TIME ZONE 'UTC'
28583 let cast_expr = Expression::Cast(Box::new(Cast {
28584 this,
28585 to: DataType::Custom {
28586 name: "DATETIMEOFFSET".to_string(),
28587 },
28588 double_colon_syntax: false,
28589 trailing_comments: Vec::new(),
28590 format: None,
28591 default: None,
28592 inferred_type: None,
28593 }));
28594 Ok(Expression::AtTimeZone(Box::new(
28595 crate::expressions::AtTimeZone {
28596 this: cast_expr,
28597 zone: Expression::Literal(Box::new(Literal::String(
28598 "UTC".to_string(),
28599 ))),
28600 },
28601 )))
28602 } else {
28603 // TSQL: CAST(x AS DATETIME2)
28604 Ok(Expression::Cast(Box::new(Cast {
28605 this,
28606 to: DataType::Custom {
28607 name: "DATETIME2".to_string(),
28608 },
28609 double_colon_syntax: false,
28610 trailing_comments: Vec::new(),
28611 format: None,
28612 default: None,
28613 inferred_type: None,
28614 })))
28615 }
28616 }
28617 DialectType::DuckDB => {
28618 if has_zone {
28619 // DuckDB with zone: CAST(x AS TIMESTAMPTZ)
28620 Ok(Expression::Cast(Box::new(Cast {
28621 this,
28622 to: DataType::Timestamp {
28623 timezone: true,
28624 precision: None,
28625 },
28626 double_colon_syntax: false,
28627 trailing_comments: Vec::new(),
28628 format: None,
28629 default: None,
28630 inferred_type: None,
28631 })))
28632 } else {
28633 // DuckDB: CAST(x AS TIMESTAMP)
28634 Ok(Expression::Cast(Box::new(Cast {
28635 this,
28636 to: DataType::Timestamp {
28637 timezone: false,
28638 precision: None,
28639 },
28640 double_colon_syntax: false,
28641 trailing_comments: Vec::new(),
28642 format: None,
28643 default: None,
28644 inferred_type: None,
28645 })))
28646 }
28647 }
28648 DialectType::PostgreSQL
28649 | DialectType::Materialize
28650 | DialectType::RisingWave => {
28651 if has_zone {
28652 // PostgreSQL with zone: CAST(x AS TIMESTAMPTZ)
28653 Ok(Expression::Cast(Box::new(Cast {
28654 this,
28655 to: DataType::Timestamp {
28656 timezone: true,
28657 precision: None,
28658 },
28659 double_colon_syntax: false,
28660 trailing_comments: Vec::new(),
28661 format: None,
28662 default: None,
28663 inferred_type: None,
28664 })))
28665 } else {
28666 // PostgreSQL: CAST(x AS TIMESTAMP)
28667 Ok(Expression::Cast(Box::new(Cast {
28668 this,
28669 to: DataType::Timestamp {
28670 timezone: false,
28671 precision: None,
28672 },
28673 double_colon_syntax: false,
28674 trailing_comments: Vec::new(),
28675 format: None,
28676 default: None,
28677 inferred_type: None,
28678 })))
28679 }
28680 }
28681 DialectType::Snowflake => {
28682 if has_zone {
28683 // Snowflake with zone: CAST(x AS TIMESTAMPTZ)
28684 Ok(Expression::Cast(Box::new(Cast {
28685 this,
28686 to: DataType::Timestamp {
28687 timezone: true,
28688 precision: None,
28689 },
28690 double_colon_syntax: false,
28691 trailing_comments: Vec::new(),
28692 format: None,
28693 default: None,
28694 inferred_type: None,
28695 })))
28696 } else {
28697 // Snowflake: CAST(x AS TIMESTAMP)
28698 Ok(Expression::Cast(Box::new(Cast {
28699 this,
28700 to: DataType::Timestamp {
28701 timezone: false,
28702 precision: None,
28703 },
28704 double_colon_syntax: false,
28705 trailing_comments: Vec::new(),
28706 format: None,
28707 default: None,
28708 inferred_type: None,
28709 })))
28710 }
28711 }
28712 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
28713 if has_zone {
28714 // Presto/Trino with zone: CAST(x AS TIMESTAMP WITH TIME ZONE)
28715 // Check for precision from sub-second digits
28716 let precision = if let Expression::Literal(ref lit) = this {
28717 if let Literal::String(ref s) = lit.as_ref() {
28718 if let Some(dot_pos) = s.rfind('.') {
28719 let frac = &s[dot_pos + 1..];
28720 let digit_count = frac
28721 .chars()
28722 .take_while(|c| c.is_ascii_digit())
28723 .count();
28724 if digit_count > 0
28725 && matches!(target, DialectType::Trino)
28726 {
28727 Some(digit_count as u32)
28728 } else {
28729 None
28730 }
28731 } else {
28732 None
28733 }
28734 } else {
28735 None
28736 }
28737 } else {
28738 None
28739 };
28740 let dt = if let Some(prec) = precision {
28741 DataType::Timestamp {
28742 timezone: true,
28743 precision: Some(prec),
28744 }
28745 } else {
28746 DataType::Timestamp {
28747 timezone: true,
28748 precision: None,
28749 }
28750 };
28751 Ok(Expression::Cast(Box::new(Cast {
28752 this,
28753 to: dt,
28754 double_colon_syntax: false,
28755 trailing_comments: Vec::new(),
28756 format: None,
28757 default: None,
28758 inferred_type: None,
28759 })))
28760 } else {
28761 // Check for sub-second precision for Trino
28762 let precision = if let Expression::Literal(ref lit) = this {
28763 if let Literal::String(ref s) = lit.as_ref() {
28764 if let Some(dot_pos) = s.rfind('.') {
28765 let frac = &s[dot_pos + 1..];
28766 let digit_count = frac
28767 .chars()
28768 .take_while(|c| c.is_ascii_digit())
28769 .count();
28770 if digit_count > 0
28771 && matches!(target, DialectType::Trino)
28772 {
28773 Some(digit_count as u32)
28774 } else {
28775 None
28776 }
28777 } else {
28778 None
28779 }
28780 } else {
28781 None
28782 }
28783 } else {
28784 None
28785 };
28786 let dt = DataType::Timestamp {
28787 timezone: false,
28788 precision,
28789 };
28790 Ok(Expression::Cast(Box::new(Cast {
28791 this,
28792 to: dt,
28793 double_colon_syntax: false,
28794 trailing_comments: Vec::new(),
28795 format: None,
28796 default: None,
28797 inferred_type: None,
28798 })))
28799 }
28800 }
28801 DialectType::Redshift => {
28802 if has_zone {
28803 // Redshift with zone: CAST(x AS TIMESTAMP WITH TIME ZONE)
28804 Ok(Expression::Cast(Box::new(Cast {
28805 this,
28806 to: DataType::Timestamp {
28807 timezone: true,
28808 precision: None,
28809 },
28810 double_colon_syntax: false,
28811 trailing_comments: Vec::new(),
28812 format: None,
28813 default: None,
28814 inferred_type: None,
28815 })))
28816 } else {
28817 // Redshift: CAST(x AS TIMESTAMP)
28818 Ok(Expression::Cast(Box::new(Cast {
28819 this,
28820 to: DataType::Timestamp {
28821 timezone: false,
28822 precision: None,
28823 },
28824 double_colon_syntax: false,
28825 trailing_comments: Vec::new(),
28826 format: None,
28827 default: None,
28828 inferred_type: None,
28829 })))
28830 }
28831 }
28832 _ => {
28833 // Default: CAST(x AS TIMESTAMP)
28834 Ok(Expression::Cast(Box::new(Cast {
28835 this,
28836 to: DataType::Timestamp {
28837 timezone: false,
28838 precision: None,
28839 },
28840 double_colon_syntax: false,
28841 trailing_comments: Vec::new(),
28842 format: None,
28843 default: None,
28844 inferred_type: None,
28845 })))
28846 }
28847 }
28848 } else {
28849 Ok(e)
28850 }
28851 }
28852
28853 Action::DateToDateStrConvert => {
28854 // DATE_TO_DATE_STR(x) -> CAST(x AS text_type) per dialect
28855 if let Expression::Function(f) = e {
28856 let arg = f.args.into_iter().next().unwrap();
28857 let str_type = match target {
28858 DialectType::DuckDB => DataType::Text,
28859 DialectType::Hive | DialectType::Spark | DialectType::Databricks => {
28860 DataType::Custom {
28861 name: "STRING".to_string(),
28862 }
28863 }
28864 DialectType::Presto
28865 | DialectType::Trino
28866 | DialectType::Athena
28867 | DialectType::Drill => DataType::VarChar {
28868 length: None,
28869 parenthesized_length: false,
28870 },
28871 _ => DataType::VarChar {
28872 length: None,
28873 parenthesized_length: false,
28874 },
28875 };
28876 Ok(Expression::Cast(Box::new(Cast {
28877 this: arg,
28878 to: str_type,
28879 double_colon_syntax: false,
28880 trailing_comments: Vec::new(),
28881 format: None,
28882 default: None,
28883 inferred_type: None,
28884 })))
28885 } else {
28886 Ok(e)
28887 }
28888 }
28889
28890 Action::DateToDiConvert => {
28891 // DATE_TO_DI(x) -> CAST(format_func(x, fmt) AS INT)
28892 if let Expression::Function(f) = e {
28893 let arg = f.args.into_iter().next().unwrap();
28894 let inner = match target {
28895 DialectType::DuckDB => {
28896 // STRFTIME(x, '%Y%m%d')
28897 Expression::Function(Box::new(Function::new(
28898 "STRFTIME".to_string(),
28899 vec![arg, Expression::string("%Y%m%d")],
28900 )))
28901 }
28902 DialectType::Hive | DialectType::Spark | DialectType::Databricks => {
28903 // DATE_FORMAT(x, 'yyyyMMdd')
28904 Expression::Function(Box::new(Function::new(
28905 "DATE_FORMAT".to_string(),
28906 vec![arg, Expression::string("yyyyMMdd")],
28907 )))
28908 }
28909 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
28910 // DATE_FORMAT(x, '%Y%m%d')
28911 Expression::Function(Box::new(Function::new(
28912 "DATE_FORMAT".to_string(),
28913 vec![arg, Expression::string("%Y%m%d")],
28914 )))
28915 }
28916 DialectType::Drill => {
28917 // TO_DATE(x, 'yyyyMMdd')
28918 Expression::Function(Box::new(Function::new(
28919 "TO_DATE".to_string(),
28920 vec![arg, Expression::string("yyyyMMdd")],
28921 )))
28922 }
28923 _ => {
28924 // Default: STRFTIME(x, '%Y%m%d')
28925 Expression::Function(Box::new(Function::new(
28926 "STRFTIME".to_string(),
28927 vec![arg, Expression::string("%Y%m%d")],
28928 )))
28929 }
28930 };
28931 // Use INT (not INTEGER) for Presto/Trino
28932 let int_type = match target {
28933 DialectType::Presto
28934 | DialectType::Trino
28935 | DialectType::Athena
28936 | DialectType::TSQL
28937 | DialectType::Fabric
28938 | DialectType::SQLite
28939 | DialectType::Redshift => DataType::Custom {
28940 name: "INT".to_string(),
28941 },
28942 _ => DataType::Int {
28943 length: None,
28944 integer_spelling: false,
28945 },
28946 };
28947 Ok(Expression::Cast(Box::new(Cast {
28948 this: inner,
28949 to: int_type,
28950 double_colon_syntax: false,
28951 trailing_comments: Vec::new(),
28952 format: None,
28953 default: None,
28954 inferred_type: None,
28955 })))
28956 } else {
28957 Ok(e)
28958 }
28959 }
28960
28961 Action::DiToDateConvert => {
28962 // DI_TO_DATE(x) -> dialect-specific integer-to-date conversion
28963 if let Expression::Function(f) = e {
28964 let arg = f.args.into_iter().next().unwrap();
28965 match target {
28966 DialectType::DuckDB => {
28967 // CAST(STRPTIME(CAST(x AS TEXT), '%Y%m%d') AS DATE)
28968 let cast_text = Expression::Cast(Box::new(Cast {
28969 this: arg,
28970 to: DataType::Text,
28971 double_colon_syntax: false,
28972 trailing_comments: Vec::new(),
28973 format: None,
28974 default: None,
28975 inferred_type: None,
28976 }));
28977 let strptime = Expression::Function(Box::new(Function::new(
28978 "STRPTIME".to_string(),
28979 vec![cast_text, Expression::string("%Y%m%d")],
28980 )));
28981 Ok(Expression::Cast(Box::new(Cast {
28982 this: strptime,
28983 to: DataType::Date,
28984 double_colon_syntax: false,
28985 trailing_comments: Vec::new(),
28986 format: None,
28987 default: None,
28988 inferred_type: None,
28989 })))
28990 }
28991 DialectType::Hive | DialectType::Spark | DialectType::Databricks => {
28992 // TO_DATE(CAST(x AS STRING), 'yyyyMMdd')
28993 let cast_str = Expression::Cast(Box::new(Cast {
28994 this: arg,
28995 to: DataType::Custom {
28996 name: "STRING".to_string(),
28997 },
28998 double_colon_syntax: false,
28999 trailing_comments: Vec::new(),
29000 format: None,
29001 default: None,
29002 inferred_type: None,
29003 }));
29004 Ok(Expression::Function(Box::new(Function::new(
29005 "TO_DATE".to_string(),
29006 vec![cast_str, Expression::string("yyyyMMdd")],
29007 ))))
29008 }
29009 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
29010 // CAST(DATE_PARSE(CAST(x AS VARCHAR), '%Y%m%d') AS DATE)
29011 let cast_varchar = Expression::Cast(Box::new(Cast {
29012 this: arg,
29013 to: DataType::VarChar {
29014 length: None,
29015 parenthesized_length: false,
29016 },
29017 double_colon_syntax: false,
29018 trailing_comments: Vec::new(),
29019 format: None,
29020 default: None,
29021 inferred_type: None,
29022 }));
29023 let date_parse = Expression::Function(Box::new(Function::new(
29024 "DATE_PARSE".to_string(),
29025 vec![cast_varchar, Expression::string("%Y%m%d")],
29026 )));
29027 Ok(Expression::Cast(Box::new(Cast {
29028 this: date_parse,
29029 to: DataType::Date,
29030 double_colon_syntax: false,
29031 trailing_comments: Vec::new(),
29032 format: None,
29033 default: None,
29034 inferred_type: None,
29035 })))
29036 }
29037 DialectType::Drill => {
29038 // TO_DATE(CAST(x AS VARCHAR), 'yyyyMMdd')
29039 let cast_varchar = Expression::Cast(Box::new(Cast {
29040 this: arg,
29041 to: DataType::VarChar {
29042 length: None,
29043 parenthesized_length: false,
29044 },
29045 double_colon_syntax: false,
29046 trailing_comments: Vec::new(),
29047 format: None,
29048 default: None,
29049 inferred_type: None,
29050 }));
29051 Ok(Expression::Function(Box::new(Function::new(
29052 "TO_DATE".to_string(),
29053 vec![cast_varchar, Expression::string("yyyyMMdd")],
29054 ))))
29055 }
29056 _ => Ok(Expression::Function(Box::new(Function::new(
29057 "DI_TO_DATE".to_string(),
29058 vec![arg],
29059 )))),
29060 }
29061 } else {
29062 Ok(e)
29063 }
29064 }
29065
29066 Action::TsOrDiToDiConvert => {
29067 // TS_OR_DI_TO_DI(x) -> CAST(SUBSTR(REPLACE(CAST(x AS type), '-', ''), 1, 8) AS INT)
29068 if let Expression::Function(f) = e {
29069 let arg = f.args.into_iter().next().unwrap();
29070 let str_type = match target {
29071 DialectType::DuckDB => DataType::Text,
29072 DialectType::Hive | DialectType::Spark | DialectType::Databricks => {
29073 DataType::Custom {
29074 name: "STRING".to_string(),
29075 }
29076 }
29077 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
29078 DataType::VarChar {
29079 length: None,
29080 parenthesized_length: false,
29081 }
29082 }
29083 _ => DataType::VarChar {
29084 length: None,
29085 parenthesized_length: false,
29086 },
29087 };
29088 let cast_str = Expression::Cast(Box::new(Cast {
29089 this: arg,
29090 to: str_type,
29091 double_colon_syntax: false,
29092 trailing_comments: Vec::new(),
29093 format: None,
29094 default: None,
29095 inferred_type: None,
29096 }));
29097 let replace_expr = Expression::Function(Box::new(Function::new(
29098 "REPLACE".to_string(),
29099 vec![cast_str, Expression::string("-"), Expression::string("")],
29100 )));
29101 let substr_name = match target {
29102 DialectType::DuckDB
29103 | DialectType::Hive
29104 | DialectType::Spark
29105 | DialectType::Databricks => "SUBSTR",
29106 _ => "SUBSTR",
29107 };
29108 let substr = Expression::Function(Box::new(Function::new(
29109 substr_name.to_string(),
29110 vec![replace_expr, Expression::number(1), Expression::number(8)],
29111 )));
29112 // Use INT (not INTEGER) for Presto/Trino etc.
29113 let int_type = match target {
29114 DialectType::Presto
29115 | DialectType::Trino
29116 | DialectType::Athena
29117 | DialectType::TSQL
29118 | DialectType::Fabric
29119 | DialectType::SQLite
29120 | DialectType::Redshift => DataType::Custom {
29121 name: "INT".to_string(),
29122 },
29123 _ => DataType::Int {
29124 length: None,
29125 integer_spelling: false,
29126 },
29127 };
29128 Ok(Expression::Cast(Box::new(Cast {
29129 this: substr,
29130 to: int_type,
29131 double_colon_syntax: false,
29132 trailing_comments: Vec::new(),
29133 format: None,
29134 default: None,
29135 inferred_type: None,
29136 })))
29137 } else {
29138 Ok(e)
29139 }
29140 }
29141
29142 Action::UnixToStrConvert => {
29143 // UNIX_TO_STR(x, fmt) -> convert to Expression::UnixToStr for generator
29144 if let Expression::Function(f) = e {
29145 let mut args = f.args;
29146 let this = args.remove(0);
29147 let fmt_expr = if !args.is_empty() {
29148 Some(args.remove(0))
29149 } else {
29150 None
29151 };
29152
29153 // Check if format is a string literal
29154 let fmt_str = fmt_expr.as_ref().and_then(|f| {
29155 if let Expression::Literal(lit) = f {
29156 if let Literal::String(s) = lit.as_ref() {
29157 Some(s.clone())
29158 } else {
29159 None
29160 }
29161 } else {
29162 None
29163 }
29164 });
29165
29166 if let Some(fmt_string) = fmt_str {
29167 // String literal format -> use UnixToStr expression (generator handles it)
29168 Ok(Expression::UnixToStr(Box::new(
29169 crate::expressions::UnixToStr {
29170 this: Box::new(this),
29171 format: Some(fmt_string),
29172 },
29173 )))
29174 } else if let Some(fmt_e) = fmt_expr {
29175 // Non-literal format (e.g., identifier `y`) -> build target expression directly
29176 match target {
29177 DialectType::DuckDB => {
29178 // STRFTIME(TO_TIMESTAMP(x), y)
29179 let to_ts = Expression::Function(Box::new(Function::new(
29180 "TO_TIMESTAMP".to_string(),
29181 vec![this],
29182 )));
29183 Ok(Expression::Function(Box::new(Function::new(
29184 "STRFTIME".to_string(),
29185 vec![to_ts, fmt_e],
29186 ))))
29187 }
29188 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
29189 // DATE_FORMAT(FROM_UNIXTIME(x), y)
29190 let from_unix = Expression::Function(Box::new(Function::new(
29191 "FROM_UNIXTIME".to_string(),
29192 vec![this],
29193 )));
29194 Ok(Expression::Function(Box::new(Function::new(
29195 "DATE_FORMAT".to_string(),
29196 vec![from_unix, fmt_e],
29197 ))))
29198 }
29199 DialectType::Hive
29200 | DialectType::Spark
29201 | DialectType::Databricks
29202 | DialectType::Doris
29203 | DialectType::StarRocks => {
29204 // FROM_UNIXTIME(x, y)
29205 Ok(Expression::Function(Box::new(Function::new(
29206 "FROM_UNIXTIME".to_string(),
29207 vec![this, fmt_e],
29208 ))))
29209 }
29210 _ => {
29211 // Default: keep as UNIX_TO_STR(x, y)
29212 Ok(Expression::Function(Box::new(Function::new(
29213 "UNIX_TO_STR".to_string(),
29214 vec![this, fmt_e],
29215 ))))
29216 }
29217 }
29218 } else {
29219 Ok(Expression::UnixToStr(Box::new(
29220 crate::expressions::UnixToStr {
29221 this: Box::new(this),
29222 format: None,
29223 },
29224 )))
29225 }
29226 } else {
29227 Ok(e)
29228 }
29229 }
29230
29231 Action::UnixToTimeConvert => {
29232 // UNIX_TO_TIME(x) -> convert to Expression::UnixToTime for generator
29233 if let Expression::Function(f) = e {
29234 let arg = f.args.into_iter().next().unwrap();
29235 Ok(Expression::UnixToTime(Box::new(
29236 crate::expressions::UnixToTime {
29237 this: Box::new(arg),
29238 scale: None,
29239 zone: None,
29240 hours: None,
29241 minutes: None,
29242 format: None,
29243 target_type: None,
29244 },
29245 )))
29246 } else {
29247 Ok(e)
29248 }
29249 }
29250
29251 Action::UnixToTimeStrConvert => {
29252 // UNIX_TO_TIME_STR(x) -> dialect-specific
29253 if let Expression::Function(f) = e {
29254 let arg = f.args.into_iter().next().unwrap();
29255 match target {
29256 DialectType::Hive | DialectType::Spark | DialectType::Databricks => {
29257 // FROM_UNIXTIME(x)
29258 Ok(Expression::Function(Box::new(Function::new(
29259 "FROM_UNIXTIME".to_string(),
29260 vec![arg],
29261 ))))
29262 }
29263 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
29264 // CAST(FROM_UNIXTIME(x) AS VARCHAR)
29265 let from_unix = Expression::Function(Box::new(Function::new(
29266 "FROM_UNIXTIME".to_string(),
29267 vec![arg],
29268 )));
29269 Ok(Expression::Cast(Box::new(Cast {
29270 this: from_unix,
29271 to: DataType::VarChar {
29272 length: None,
29273 parenthesized_length: false,
29274 },
29275 double_colon_syntax: false,
29276 trailing_comments: Vec::new(),
29277 format: None,
29278 default: None,
29279 inferred_type: None,
29280 })))
29281 }
29282 DialectType::DuckDB => {
29283 // CAST(TO_TIMESTAMP(x) AS TEXT)
29284 let to_ts = Expression::Function(Box::new(Function::new(
29285 "TO_TIMESTAMP".to_string(),
29286 vec![arg],
29287 )));
29288 Ok(Expression::Cast(Box::new(Cast {
29289 this: to_ts,
29290 to: DataType::Text,
29291 double_colon_syntax: false,
29292 trailing_comments: Vec::new(),
29293 format: None,
29294 default: None,
29295 inferred_type: None,
29296 })))
29297 }
29298 _ => Ok(Expression::Function(Box::new(Function::new(
29299 "UNIX_TO_TIME_STR".to_string(),
29300 vec![arg],
29301 )))),
29302 }
29303 } else {
29304 Ok(e)
29305 }
29306 }
29307
29308 Action::TimeToUnixConvert => {
29309 // TIME_TO_UNIX(x) -> convert to Expression::TimeToUnix for generator
29310 if let Expression::Function(f) = e {
29311 let arg = f.args.into_iter().next().unwrap();
29312 Ok(Expression::TimeToUnix(Box::new(
29313 crate::expressions::UnaryFunc {
29314 this: arg,
29315 original_name: None,
29316 inferred_type: None,
29317 },
29318 )))
29319 } else {
29320 Ok(e)
29321 }
29322 }
29323
29324 Action::TimeToStrConvert => {
29325 // TIME_TO_STR(x, fmt) -> convert to Expression::TimeToStr for generator
29326 if let Expression::Function(f) = e {
29327 let mut args = f.args;
29328 let this = args.remove(0);
29329 let fmt = match args.remove(0) {
29330 Expression::Literal(lit)
29331 if matches!(lit.as_ref(), Literal::String(_)) =>
29332 {
29333 let Literal::String(s) = lit.as_ref() else {
29334 unreachable!()
29335 };
29336 s.clone()
29337 }
29338 other => {
29339 return Ok(Expression::Function(Box::new(Function::new(
29340 "TIME_TO_STR".to_string(),
29341 vec![this, other],
29342 ))));
29343 }
29344 };
29345 Ok(Expression::TimeToStr(Box::new(
29346 crate::expressions::TimeToStr {
29347 this: Box::new(this),
29348 format: fmt,
29349 culture: None,
29350 zone: None,
29351 },
29352 )))
29353 } else {
29354 Ok(e)
29355 }
29356 }
29357
29358 Action::StrToUnixConvert => {
29359 // STR_TO_UNIX(x, fmt) -> convert to Expression::StrToUnix for generator
29360 if let Expression::Function(f) = e {
29361 let mut args = f.args;
29362 let this = args.remove(0);
29363 let fmt = match args.remove(0) {
29364 Expression::Literal(lit)
29365 if matches!(lit.as_ref(), Literal::String(_)) =>
29366 {
29367 let Literal::String(s) = lit.as_ref() else {
29368 unreachable!()
29369 };
29370 s.clone()
29371 }
29372 other => {
29373 return Ok(Expression::Function(Box::new(Function::new(
29374 "STR_TO_UNIX".to_string(),
29375 vec![this, other],
29376 ))));
29377 }
29378 };
29379 Ok(Expression::StrToUnix(Box::new(
29380 crate::expressions::StrToUnix {
29381 this: Some(Box::new(this)),
29382 format: Some(fmt),
29383 },
29384 )))
29385 } else {
29386 Ok(e)
29387 }
29388 }
29389
29390 Action::TimeStrToUnixConvert => {
29391 // TIME_STR_TO_UNIX(x) -> dialect-specific
29392 if let Expression::Function(f) = e {
29393 let arg = f.args.into_iter().next().unwrap();
29394 match target {
29395 DialectType::DuckDB => {
29396 // EPOCH(CAST(x AS TIMESTAMP))
29397 let cast_ts = Expression::Cast(Box::new(Cast {
29398 this: arg,
29399 to: DataType::Timestamp {
29400 timezone: false,
29401 precision: None,
29402 },
29403 double_colon_syntax: false,
29404 trailing_comments: Vec::new(),
29405 format: None,
29406 default: None,
29407 inferred_type: None,
29408 }));
29409 Ok(Expression::Function(Box::new(Function::new(
29410 "EPOCH".to_string(),
29411 vec![cast_ts],
29412 ))))
29413 }
29414 DialectType::Hive
29415 | DialectType::Doris
29416 | DialectType::StarRocks
29417 | DialectType::MySQL => {
29418 // UNIX_TIMESTAMP(x)
29419 Ok(Expression::Function(Box::new(Function::new(
29420 "UNIX_TIMESTAMP".to_string(),
29421 vec![arg],
29422 ))))
29423 }
29424 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
29425 // TO_UNIXTIME(DATE_PARSE(x, '%Y-%m-%d %T'))
29426 let date_parse = Expression::Function(Box::new(Function::new(
29427 "DATE_PARSE".to_string(),
29428 vec![arg, Expression::string("%Y-%m-%d %T")],
29429 )));
29430 Ok(Expression::Function(Box::new(Function::new(
29431 "TO_UNIXTIME".to_string(),
29432 vec![date_parse],
29433 ))))
29434 }
29435 _ => Ok(Expression::Function(Box::new(Function::new(
29436 "TIME_STR_TO_UNIX".to_string(),
29437 vec![arg],
29438 )))),
29439 }
29440 } else {
29441 Ok(e)
29442 }
29443 }
29444
29445 Action::TimeToTimeStrConvert => {
29446 // TIME_TO_TIME_STR(x) -> CAST(x AS str_type) per dialect
29447 if let Expression::Function(f) = e {
29448 let arg = f.args.into_iter().next().unwrap();
29449 let str_type = match target {
29450 DialectType::DuckDB => DataType::Text,
29451 DialectType::Hive
29452 | DialectType::Spark
29453 | DialectType::Databricks
29454 | DialectType::Doris
29455 | DialectType::StarRocks => DataType::Custom {
29456 name: "STRING".to_string(),
29457 },
29458 DialectType::Redshift => DataType::Custom {
29459 name: "VARCHAR(MAX)".to_string(),
29460 },
29461 _ => DataType::VarChar {
29462 length: None,
29463 parenthesized_length: false,
29464 },
29465 };
29466 Ok(Expression::Cast(Box::new(Cast {
29467 this: arg,
29468 to: str_type,
29469 double_colon_syntax: false,
29470 trailing_comments: Vec::new(),
29471 format: None,
29472 default: None,
29473 inferred_type: None,
29474 })))
29475 } else {
29476 Ok(e)
29477 }
29478 }
29479
29480 Action::DateTruncSwapArgs => {
29481 // DATE_TRUNC('unit', x) from Generic -> target-specific
29482 if let Expression::Function(f) = e {
29483 if f.args.len() == 2 {
29484 let unit_arg = f.args[0].clone();
29485 let expr_arg = f.args[1].clone();
29486 // Extract unit string from the first arg
29487 let unit_str = match &unit_arg {
29488 Expression::Literal(lit)
29489 if matches!(lit.as_ref(), Literal::String(_)) =>
29490 {
29491 let Literal::String(s) = lit.as_ref() else {
29492 unreachable!()
29493 };
29494 s.to_ascii_uppercase()
29495 }
29496 _ => return Ok(Expression::Function(f)),
29497 };
29498 match target {
29499 DialectType::BigQuery => {
29500 // BigQuery: DATE_TRUNC(x, UNIT) - unquoted unit
29501 let unit_ident =
29502 Expression::Column(Box::new(crate::expressions::Column {
29503 name: crate::expressions::Identifier::new(unit_str),
29504 table: None,
29505 join_mark: false,
29506 trailing_comments: Vec::new(),
29507 span: None,
29508 inferred_type: None,
29509 }));
29510 Ok(Expression::Function(Box::new(Function::new(
29511 "DATE_TRUNC".to_string(),
29512 vec![expr_arg, unit_ident],
29513 ))))
29514 }
29515 DialectType::Doris => {
29516 // Doris: DATE_TRUNC(x, 'UNIT')
29517 Ok(Expression::Function(Box::new(Function::new(
29518 "DATE_TRUNC".to_string(),
29519 vec![expr_arg, Expression::string(&unit_str)],
29520 ))))
29521 }
29522 DialectType::StarRocks => {
29523 // StarRocks: DATE_TRUNC('UNIT', x) - keep standard order
29524 Ok(Expression::Function(Box::new(Function::new(
29525 "DATE_TRUNC".to_string(),
29526 vec![Expression::string(&unit_str), expr_arg],
29527 ))))
29528 }
29529 DialectType::Spark | DialectType::Databricks => {
29530 // Spark: TRUNC(x, 'UNIT')
29531 Ok(Expression::Function(Box::new(Function::new(
29532 "TRUNC".to_string(),
29533 vec![expr_arg, Expression::string(&unit_str)],
29534 ))))
29535 }
29536 DialectType::MySQL => {
29537 // MySQL: complex expansion based on unit
29538 Self::date_trunc_to_mysql(&unit_str, &expr_arg)
29539 }
29540 _ => Ok(Expression::Function(f)),
29541 }
29542 } else {
29543 Ok(Expression::Function(f))
29544 }
29545 } else {
29546 Ok(e)
29547 }
29548 }
29549
29550 Action::TimestampTruncConvert => {
29551 // TIMESTAMP_TRUNC(x, UNIT[, tz]) from Generic -> target-specific
29552 if let Expression::Function(f) = e {
29553 if f.args.len() >= 2 {
29554 let expr_arg = f.args[0].clone();
29555 let unit_arg = f.args[1].clone();
29556 let tz_arg = if f.args.len() >= 3 {
29557 Some(f.args[2].clone())
29558 } else {
29559 None
29560 };
29561 // Extract unit string
29562 let unit_str = match &unit_arg {
29563 Expression::Literal(lit)
29564 if matches!(lit.as_ref(), Literal::String(_)) =>
29565 {
29566 let Literal::String(s) = lit.as_ref() else {
29567 unreachable!()
29568 };
29569 s.to_ascii_uppercase()
29570 }
29571 Expression::Column(c) => c.name.name.to_ascii_uppercase(),
29572 _ => {
29573 return Ok(Expression::Function(f));
29574 }
29575 };
29576 match target {
29577 DialectType::Spark | DialectType::Databricks => {
29578 // Spark: DATE_TRUNC('UNIT', x)
29579 Ok(Expression::Function(Box::new(Function::new(
29580 "DATE_TRUNC".to_string(),
29581 vec![Expression::string(&unit_str), expr_arg],
29582 ))))
29583 }
29584 DialectType::Doris | DialectType::StarRocks => {
29585 // Doris: DATE_TRUNC(x, 'UNIT')
29586 Ok(Expression::Function(Box::new(Function::new(
29587 "DATE_TRUNC".to_string(),
29588 vec![expr_arg, Expression::string(&unit_str)],
29589 ))))
29590 }
29591 DialectType::BigQuery => {
29592 // BigQuery: TIMESTAMP_TRUNC(x, UNIT) - keep but with unquoted unit
29593 let unit_ident =
29594 Expression::Column(Box::new(crate::expressions::Column {
29595 name: crate::expressions::Identifier::new(unit_str),
29596 table: None,
29597 join_mark: false,
29598 trailing_comments: Vec::new(),
29599 span: None,
29600 inferred_type: None,
29601 }));
29602 let mut args = vec![expr_arg, unit_ident];
29603 if let Some(tz) = tz_arg {
29604 args.push(tz);
29605 }
29606 Ok(Expression::Function(Box::new(Function::new(
29607 "TIMESTAMP_TRUNC".to_string(),
29608 args,
29609 ))))
29610 }
29611 DialectType::DuckDB => {
29612 // DuckDB with timezone: DATE_TRUNC('UNIT', x AT TIME ZONE 'tz') AT TIME ZONE 'tz'
29613 if let Some(tz) = tz_arg {
29614 let tz_str = match &tz {
29615 Expression::Literal(lit)
29616 if matches!(lit.as_ref(), Literal::String(_)) =>
29617 {
29618 let Literal::String(s) = lit.as_ref() else {
29619 unreachable!()
29620 };
29621 s.clone()
29622 }
29623 _ => "UTC".to_string(),
29624 };
29625 // x AT TIME ZONE 'tz'
29626 let at_tz = Expression::AtTimeZone(Box::new(
29627 crate::expressions::AtTimeZone {
29628 this: expr_arg,
29629 zone: Expression::string(&tz_str),
29630 },
29631 ));
29632 // DATE_TRUNC('UNIT', x AT TIME ZONE 'tz')
29633 let trunc = Expression::Function(Box::new(Function::new(
29634 "DATE_TRUNC".to_string(),
29635 vec![Expression::string(&unit_str), at_tz],
29636 )));
29637 // DATE_TRUNC(...) AT TIME ZONE 'tz'
29638 Ok(Expression::AtTimeZone(Box::new(
29639 crate::expressions::AtTimeZone {
29640 this: trunc,
29641 zone: Expression::string(&tz_str),
29642 },
29643 )))
29644 } else {
29645 Ok(Expression::Function(Box::new(Function::new(
29646 "DATE_TRUNC".to_string(),
29647 vec![Expression::string(&unit_str), expr_arg],
29648 ))))
29649 }
29650 }
29651 DialectType::Presto
29652 | DialectType::Trino
29653 | DialectType::Athena
29654 | DialectType::Snowflake => {
29655 // Presto/Snowflake: DATE_TRUNC('UNIT', x) - drop timezone
29656 Ok(Expression::Function(Box::new(Function::new(
29657 "DATE_TRUNC".to_string(),
29658 vec![Expression::string(&unit_str), expr_arg],
29659 ))))
29660 }
29661 _ => {
29662 // For most dialects: DATE_TRUNC('UNIT', x) + tz handling
29663 let mut args = vec![Expression::string(&unit_str), expr_arg];
29664 if let Some(tz) = tz_arg {
29665 args.push(tz);
29666 }
29667 Ok(Expression::Function(Box::new(Function::new(
29668 "DATE_TRUNC".to_string(),
29669 args,
29670 ))))
29671 }
29672 }
29673 } else {
29674 Ok(Expression::Function(f))
29675 }
29676 } else {
29677 Ok(e)
29678 }
29679 }
29680
29681 Action::StrToDateConvert => {
29682 // STR_TO_DATE(x, fmt) from Generic -> dialect-specific date parsing
29683 if let Expression::Function(f) = e {
29684 if f.args.len() == 2 {
29685 let mut args = f.args;
29686 let this = args.remove(0);
29687 let fmt_expr = args.remove(0);
29688 let fmt_str = match &fmt_expr {
29689 Expression::Literal(lit)
29690 if matches!(lit.as_ref(), Literal::String(_)) =>
29691 {
29692 let Literal::String(s) = lit.as_ref() else {
29693 unreachable!()
29694 };
29695 Some(s.clone())
29696 }
29697 _ => None,
29698 };
29699 let default_date = "%Y-%m-%d";
29700 let default_time = "%Y-%m-%d %H:%M:%S";
29701 let is_default = fmt_str
29702 .as_ref()
29703 .map_or(false, |f| f == default_date || f == default_time);
29704
29705 if is_default {
29706 // Default format: handle per-dialect
29707 match target {
29708 DialectType::MySQL
29709 | DialectType::Doris
29710 | DialectType::StarRocks => {
29711 // Keep STR_TO_DATE(x, fmt) as-is
29712 Ok(Expression::Function(Box::new(Function::new(
29713 "STR_TO_DATE".to_string(),
29714 vec![this, fmt_expr],
29715 ))))
29716 }
29717 DialectType::Hive => {
29718 // Hive: CAST(x AS DATE)
29719 Ok(Expression::Cast(Box::new(Cast {
29720 this,
29721 to: DataType::Date,
29722 double_colon_syntax: false,
29723 trailing_comments: Vec::new(),
29724 format: None,
29725 default: None,
29726 inferred_type: None,
29727 })))
29728 }
29729 DialectType::Presto
29730 | DialectType::Trino
29731 | DialectType::Athena => {
29732 // Presto: CAST(DATE_PARSE(x, '%Y-%m-%d') AS DATE)
29733 let date_parse =
29734 Expression::Function(Box::new(Function::new(
29735 "DATE_PARSE".to_string(),
29736 vec![this, fmt_expr],
29737 )));
29738 Ok(Expression::Cast(Box::new(Cast {
29739 this: date_parse,
29740 to: DataType::Date,
29741 double_colon_syntax: false,
29742 trailing_comments: Vec::new(),
29743 format: None,
29744 default: None,
29745 inferred_type: None,
29746 })))
29747 }
29748 _ => {
29749 // Others: TsOrDsToDate (delegates to generator)
29750 Ok(Expression::TsOrDsToDate(Box::new(
29751 crate::expressions::TsOrDsToDate {
29752 this: Box::new(this),
29753 format: None,
29754 safe: None,
29755 },
29756 )))
29757 }
29758 }
29759 } else if let Some(fmt) = fmt_str {
29760 match target {
29761 DialectType::Doris
29762 | DialectType::StarRocks
29763 | DialectType::MySQL => {
29764 // Keep STR_TO_DATE but with normalized format (%H:%M:%S -> %T, %-d -> %e)
29765 let mut normalized = fmt.clone();
29766 normalized = normalized.replace("%-d", "%e");
29767 normalized = normalized.replace("%-m", "%c");
29768 normalized = normalized.replace("%H:%M:%S", "%T");
29769 Ok(Expression::Function(Box::new(Function::new(
29770 "STR_TO_DATE".to_string(),
29771 vec![this, Expression::string(&normalized)],
29772 ))))
29773 }
29774 DialectType::Hive => {
29775 // Hive: CAST(FROM_UNIXTIME(UNIX_TIMESTAMP(x, java_fmt)) AS DATE)
29776 let java_fmt = crate::generator::Generator::strftime_to_java_format_static(&fmt);
29777 let unix_ts =
29778 Expression::Function(Box::new(Function::new(
29779 "UNIX_TIMESTAMP".to_string(),
29780 vec![this, Expression::string(&java_fmt)],
29781 )));
29782 let from_unix =
29783 Expression::Function(Box::new(Function::new(
29784 "FROM_UNIXTIME".to_string(),
29785 vec![unix_ts],
29786 )));
29787 Ok(Expression::Cast(Box::new(Cast {
29788 this: from_unix,
29789 to: DataType::Date,
29790 double_colon_syntax: false,
29791 trailing_comments: Vec::new(),
29792 format: None,
29793 default: None,
29794 inferred_type: None,
29795 })))
29796 }
29797 DialectType::Spark | DialectType::Databricks => {
29798 // Spark: TO_DATE(x, java_fmt)
29799 let java_fmt = crate::generator::Generator::strftime_to_java_format_static(&fmt);
29800 Ok(Expression::Function(Box::new(Function::new(
29801 "TO_DATE".to_string(),
29802 vec![this, Expression::string(&java_fmt)],
29803 ))))
29804 }
29805 DialectType::Drill => {
29806 // Drill: TO_DATE(x, java_fmt) with T quoted as 'T' in Java format
29807 // The generator's string literal escaping will double the quotes: 'T' -> ''T''
29808 let java_fmt = crate::generator::Generator::strftime_to_java_format_static(&fmt);
29809 let java_fmt = java_fmt.replace('T', "'T'");
29810 Ok(Expression::Function(Box::new(Function::new(
29811 "TO_DATE".to_string(),
29812 vec![this, Expression::string(&java_fmt)],
29813 ))))
29814 }
29815 _ => {
29816 // For other dialects: use TsOrDsToDate which delegates to generator
29817 Ok(Expression::TsOrDsToDate(Box::new(
29818 crate::expressions::TsOrDsToDate {
29819 this: Box::new(this),
29820 format: Some(fmt),
29821 safe: None,
29822 },
29823 )))
29824 }
29825 }
29826 } else {
29827 // Non-string format - keep as-is
29828 let mut new_args = Vec::new();
29829 new_args.push(this);
29830 new_args.push(fmt_expr);
29831 Ok(Expression::Function(Box::new(Function::new(
29832 "STR_TO_DATE".to_string(),
29833 new_args,
29834 ))))
29835 }
29836 } else {
29837 Ok(Expression::Function(f))
29838 }
29839 } else {
29840 Ok(e)
29841 }
29842 }
29843
29844 Action::TsOrDsAddConvert => {
29845 // TS_OR_DS_ADD(x, n, 'UNIT') from Generic -> dialect-specific DATE_ADD
29846 if let Expression::Function(f) = e {
29847 if f.args.len() == 3 {
29848 let mut args = f.args;
29849 let x = args.remove(0);
29850 let n = args.remove(0);
29851 let unit_expr = args.remove(0);
29852 let unit_str = match &unit_expr {
29853 Expression::Literal(lit)
29854 if matches!(lit.as_ref(), Literal::String(_)) =>
29855 {
29856 let Literal::String(s) = lit.as_ref() else {
29857 unreachable!()
29858 };
29859 s.to_ascii_uppercase()
29860 }
29861 _ => "DAY".to_string(),
29862 };
29863
29864 match target {
29865 DialectType::Hive
29866 | DialectType::Spark
29867 | DialectType::Databricks => {
29868 // DATE_ADD(x, n) - only supports DAY unit
29869 Ok(Expression::Function(Box::new(Function::new(
29870 "DATE_ADD".to_string(),
29871 vec![x, n],
29872 ))))
29873 }
29874 DialectType::MySQL => {
29875 // DATE_ADD(x, INTERVAL n UNIT)
29876 let iu = match unit_str.as_str() {
29877 "YEAR" => crate::expressions::IntervalUnit::Year,
29878 "QUARTER" => crate::expressions::IntervalUnit::Quarter,
29879 "MONTH" => crate::expressions::IntervalUnit::Month,
29880 "WEEK" => crate::expressions::IntervalUnit::Week,
29881 "HOUR" => crate::expressions::IntervalUnit::Hour,
29882 "MINUTE" => crate::expressions::IntervalUnit::Minute,
29883 "SECOND" => crate::expressions::IntervalUnit::Second,
29884 _ => crate::expressions::IntervalUnit::Day,
29885 };
29886 let interval = Expression::Interval(Box::new(
29887 crate::expressions::Interval {
29888 this: Some(n),
29889 unit: Some(
29890 crate::expressions::IntervalUnitSpec::Simple {
29891 unit: iu,
29892 use_plural: false,
29893 },
29894 ),
29895 },
29896 ));
29897 Ok(Expression::Function(Box::new(Function::new(
29898 "DATE_ADD".to_string(),
29899 vec![x, interval],
29900 ))))
29901 }
29902 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
29903 // DATE_ADD('UNIT', n, CAST(CAST(x AS TIMESTAMP) AS DATE))
29904 let cast_ts = Expression::Cast(Box::new(Cast {
29905 this: x,
29906 to: DataType::Timestamp {
29907 precision: None,
29908 timezone: false,
29909 },
29910 double_colon_syntax: false,
29911 trailing_comments: Vec::new(),
29912 format: None,
29913 default: None,
29914 inferred_type: None,
29915 }));
29916 let cast_date = Expression::Cast(Box::new(Cast {
29917 this: cast_ts,
29918 to: DataType::Date,
29919 double_colon_syntax: false,
29920 trailing_comments: Vec::new(),
29921 format: None,
29922 default: None,
29923 inferred_type: None,
29924 }));
29925 Ok(Expression::Function(Box::new(Function::new(
29926 "DATE_ADD".to_string(),
29927 vec![Expression::string(&unit_str), n, cast_date],
29928 ))))
29929 }
29930 DialectType::DuckDB => {
29931 // CAST(x AS DATE) + INTERVAL n UNIT
29932 let cast_date = Expression::Cast(Box::new(Cast {
29933 this: x,
29934 to: DataType::Date,
29935 double_colon_syntax: false,
29936 trailing_comments: Vec::new(),
29937 format: None,
29938 default: None,
29939 inferred_type: None,
29940 }));
29941 let iu = match unit_str.as_str() {
29942 "YEAR" => crate::expressions::IntervalUnit::Year,
29943 "QUARTER" => crate::expressions::IntervalUnit::Quarter,
29944 "MONTH" => crate::expressions::IntervalUnit::Month,
29945 "WEEK" => crate::expressions::IntervalUnit::Week,
29946 "HOUR" => crate::expressions::IntervalUnit::Hour,
29947 "MINUTE" => crate::expressions::IntervalUnit::Minute,
29948 "SECOND" => crate::expressions::IntervalUnit::Second,
29949 _ => crate::expressions::IntervalUnit::Day,
29950 };
29951 let interval = Expression::Interval(Box::new(
29952 crate::expressions::Interval {
29953 this: Some(n),
29954 unit: Some(
29955 crate::expressions::IntervalUnitSpec::Simple {
29956 unit: iu,
29957 use_plural: false,
29958 },
29959 ),
29960 },
29961 ));
29962 Ok(Expression::Add(Box::new(crate::expressions::BinaryOp {
29963 left: cast_date,
29964 right: interval,
29965 left_comments: Vec::new(),
29966 operator_comments: Vec::new(),
29967 trailing_comments: Vec::new(),
29968 inferred_type: None,
29969 })))
29970 }
29971 DialectType::Drill => {
29972 // DATE_ADD(CAST(x AS DATE), INTERVAL n UNIT)
29973 let cast_date = Expression::Cast(Box::new(Cast {
29974 this: x,
29975 to: DataType::Date,
29976 double_colon_syntax: false,
29977 trailing_comments: Vec::new(),
29978 format: None,
29979 default: None,
29980 inferred_type: None,
29981 }));
29982 let iu = match unit_str.as_str() {
29983 "YEAR" => crate::expressions::IntervalUnit::Year,
29984 "QUARTER" => crate::expressions::IntervalUnit::Quarter,
29985 "MONTH" => crate::expressions::IntervalUnit::Month,
29986 "WEEK" => crate::expressions::IntervalUnit::Week,
29987 "HOUR" => crate::expressions::IntervalUnit::Hour,
29988 "MINUTE" => crate::expressions::IntervalUnit::Minute,
29989 "SECOND" => crate::expressions::IntervalUnit::Second,
29990 _ => crate::expressions::IntervalUnit::Day,
29991 };
29992 let interval = Expression::Interval(Box::new(
29993 crate::expressions::Interval {
29994 this: Some(n),
29995 unit: Some(
29996 crate::expressions::IntervalUnitSpec::Simple {
29997 unit: iu,
29998 use_plural: false,
29999 },
30000 ),
30001 },
30002 ));
30003 Ok(Expression::Function(Box::new(Function::new(
30004 "DATE_ADD".to_string(),
30005 vec![cast_date, interval],
30006 ))))
30007 }
30008 _ => {
30009 // Default: keep as TS_OR_DS_ADD
30010 Ok(Expression::Function(Box::new(Function::new(
30011 "TS_OR_DS_ADD".to_string(),
30012 vec![x, n, unit_expr],
30013 ))))
30014 }
30015 }
30016 } else {
30017 Ok(Expression::Function(f))
30018 }
30019 } else {
30020 Ok(e)
30021 }
30022 }
30023
30024 Action::DateFromUnixDateConvert => {
30025 // DATE_FROM_UNIX_DATE(n) -> DATEADD(DAY, n, CAST('1970-01-01' AS DATE))
30026 if let Expression::Function(f) = e {
30027 // Keep as-is for dialects that support DATE_FROM_UNIX_DATE natively
30028 if matches!(
30029 target,
30030 DialectType::Spark | DialectType::Databricks | DialectType::BigQuery
30031 ) {
30032 return Ok(Expression::Function(Box::new(Function::new(
30033 "DATE_FROM_UNIX_DATE".to_string(),
30034 f.args,
30035 ))));
30036 }
30037 let n = f.args.into_iter().next().unwrap();
30038 let epoch_date = Expression::Cast(Box::new(Cast {
30039 this: Expression::string("1970-01-01"),
30040 to: DataType::Date,
30041 double_colon_syntax: false,
30042 trailing_comments: Vec::new(),
30043 format: None,
30044 default: None,
30045 inferred_type: None,
30046 }));
30047 match target {
30048 DialectType::DuckDB => {
30049 // CAST('1970-01-01' AS DATE) + INTERVAL n DAY
30050 let interval =
30051 Expression::Interval(Box::new(crate::expressions::Interval {
30052 this: Some(n),
30053 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
30054 unit: crate::expressions::IntervalUnit::Day,
30055 use_plural: false,
30056 }),
30057 }));
30058 Ok(Expression::Add(Box::new(
30059 crate::expressions::BinaryOp::new(epoch_date, interval),
30060 )))
30061 }
30062 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
30063 // DATE_ADD('DAY', n, CAST('1970-01-01' AS DATE))
30064 Ok(Expression::Function(Box::new(Function::new(
30065 "DATE_ADD".to_string(),
30066 vec![Expression::string("DAY"), n, epoch_date],
30067 ))))
30068 }
30069 DialectType::Snowflake | DialectType::Redshift | DialectType::TSQL => {
30070 // DATEADD(DAY, n, CAST('1970-01-01' AS DATE))
30071 Ok(Expression::Function(Box::new(Function::new(
30072 "DATEADD".to_string(),
30073 vec![
30074 Expression::Identifier(Identifier::new("DAY")),
30075 n,
30076 epoch_date,
30077 ],
30078 ))))
30079 }
30080 DialectType::BigQuery => {
30081 // DATE_ADD(CAST('1970-01-01' AS DATE), INTERVAL n DAY)
30082 let interval =
30083 Expression::Interval(Box::new(crate::expressions::Interval {
30084 this: Some(n),
30085 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
30086 unit: crate::expressions::IntervalUnit::Day,
30087 use_plural: false,
30088 }),
30089 }));
30090 Ok(Expression::Function(Box::new(Function::new(
30091 "DATE_ADD".to_string(),
30092 vec![epoch_date, interval],
30093 ))))
30094 }
30095 DialectType::MySQL
30096 | DialectType::Doris
30097 | DialectType::StarRocks
30098 | DialectType::Drill => {
30099 // DATE_ADD(CAST('1970-01-01' AS DATE), INTERVAL n DAY)
30100 let interval =
30101 Expression::Interval(Box::new(crate::expressions::Interval {
30102 this: Some(n),
30103 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
30104 unit: crate::expressions::IntervalUnit::Day,
30105 use_plural: false,
30106 }),
30107 }));
30108 Ok(Expression::Function(Box::new(Function::new(
30109 "DATE_ADD".to_string(),
30110 vec![epoch_date, interval],
30111 ))))
30112 }
30113 DialectType::Hive | DialectType::Spark | DialectType::Databricks => {
30114 // DATE_ADD(CAST('1970-01-01' AS DATE), n)
30115 Ok(Expression::Function(Box::new(Function::new(
30116 "DATE_ADD".to_string(),
30117 vec![epoch_date, n],
30118 ))))
30119 }
30120 DialectType::PostgreSQL
30121 | DialectType::Materialize
30122 | DialectType::RisingWave => {
30123 // CAST('1970-01-01' AS DATE) + INTERVAL 'n DAY'
30124 let n_str = match &n {
30125 Expression::Literal(lit)
30126 if matches!(lit.as_ref(), Literal::Number(_)) =>
30127 {
30128 let Literal::Number(s) = lit.as_ref() else {
30129 unreachable!()
30130 };
30131 s.clone()
30132 }
30133 _ => Self::expr_to_string_static(&n),
30134 };
30135 let interval =
30136 Expression::Interval(Box::new(crate::expressions::Interval {
30137 this: Some(Expression::string(&format!("{} DAY", n_str))),
30138 unit: None,
30139 }));
30140 Ok(Expression::Add(Box::new(
30141 crate::expressions::BinaryOp::new(epoch_date, interval),
30142 )))
30143 }
30144 _ => {
30145 // Default: keep as-is
30146 Ok(Expression::Function(Box::new(Function::new(
30147 "DATE_FROM_UNIX_DATE".to_string(),
30148 vec![n],
30149 ))))
30150 }
30151 }
30152 } else {
30153 Ok(e)
30154 }
30155 }
30156
30157 Action::ArrayRemoveConvert => {
30158 // ARRAY_REMOVE(arr, target) -> LIST_FILTER/arrayFilter
30159 if let Expression::ArrayRemove(bf) = e {
30160 let arr = bf.this;
30161 let target_val = bf.expression;
30162 match target {
30163 DialectType::DuckDB => {
30164 let u_id = crate::expressions::Identifier::new("_u");
30165 let lambda =
30166 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
30167 parameters: vec![u_id.clone()],
30168 body: Expression::Neq(Box::new(BinaryOp {
30169 left: Expression::Identifier(u_id),
30170 right: target_val,
30171 left_comments: Vec::new(),
30172 operator_comments: Vec::new(),
30173 trailing_comments: Vec::new(),
30174 inferred_type: None,
30175 })),
30176 colon: false,
30177 parameter_types: Vec::new(),
30178 }));
30179 Ok(Expression::Function(Box::new(Function::new(
30180 "LIST_FILTER".to_string(),
30181 vec![arr, lambda],
30182 ))))
30183 }
30184 DialectType::ClickHouse => {
30185 let u_id = crate::expressions::Identifier::new("_u");
30186 let lambda =
30187 Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
30188 parameters: vec![u_id.clone()],
30189 body: Expression::Neq(Box::new(BinaryOp {
30190 left: Expression::Identifier(u_id),
30191 right: target_val,
30192 left_comments: Vec::new(),
30193 operator_comments: Vec::new(),
30194 trailing_comments: Vec::new(),
30195 inferred_type: None,
30196 })),
30197 colon: false,
30198 parameter_types: Vec::new(),
30199 }));
30200 Ok(Expression::Function(Box::new(Function::new(
30201 "arrayFilter".to_string(),
30202 vec![lambda, arr],
30203 ))))
30204 }
30205 DialectType::BigQuery => {
30206 // ARRAY(SELECT _u FROM UNNEST(the_array) AS _u WHERE _u <> target)
30207 let u_id = crate::expressions::Identifier::new("_u");
30208 let u_col =
30209 Expression::Column(Box::new(crate::expressions::Column {
30210 name: u_id.clone(),
30211 table: None,
30212 join_mark: false,
30213 trailing_comments: Vec::new(),
30214 span: None,
30215 inferred_type: None,
30216 }));
30217 let unnest_expr =
30218 Expression::Unnest(Box::new(crate::expressions::UnnestFunc {
30219 this: arr,
30220 expressions: Vec::new(),
30221 with_ordinality: false,
30222 alias: None,
30223 offset_alias: None,
30224 }));
30225 let aliased_unnest =
30226 Expression::Alias(Box::new(crate::expressions::Alias {
30227 this: unnest_expr,
30228 alias: u_id.clone(),
30229 column_aliases: Vec::new(),
30230 pre_alias_comments: Vec::new(),
30231 trailing_comments: Vec::new(),
30232 inferred_type: None,
30233 }));
30234 let where_cond = Expression::Neq(Box::new(BinaryOp {
30235 left: u_col.clone(),
30236 right: target_val,
30237 left_comments: Vec::new(),
30238 operator_comments: Vec::new(),
30239 trailing_comments: Vec::new(),
30240 inferred_type: None,
30241 }));
30242 let subquery = Expression::Select(Box::new(
30243 crate::expressions::Select::new()
30244 .column(u_col)
30245 .from(aliased_unnest)
30246 .where_(where_cond),
30247 ));
30248 Ok(Expression::ArrayFunc(Box::new(
30249 crate::expressions::ArrayConstructor {
30250 expressions: vec![subquery],
30251 bracket_notation: false,
30252 use_list_keyword: false,
30253 },
30254 )))
30255 }
30256 _ => Ok(Expression::ArrayRemove(Box::new(
30257 crate::expressions::BinaryFunc {
30258 original_name: None,
30259 this: arr,
30260 expression: target_val,
30261 inferred_type: None,
30262 },
30263 ))),
30264 }
30265 } else {
30266 Ok(e)
30267 }
30268 }
30269
30270 Action::ArrayReverseConvert => {
30271 // ARRAY_REVERSE(x) -> arrayReverse(x) for ClickHouse
30272 if let Expression::ArrayReverse(af) = e {
30273 Ok(Expression::Function(Box::new(Function::new(
30274 "arrayReverse".to_string(),
30275 vec![af.this],
30276 ))))
30277 } else {
30278 Ok(e)
30279 }
30280 }
30281
30282 Action::JsonKeysConvert => {
30283 // JSON_KEYS(x) -> JSON_OBJECT_KEYS/OBJECT_KEYS
30284 if let Expression::JsonKeys(uf) = e {
30285 match target {
30286 DialectType::Spark | DialectType::Databricks => {
30287 Ok(Expression::Function(Box::new(Function::new(
30288 "JSON_OBJECT_KEYS".to_string(),
30289 vec![uf.this],
30290 ))))
30291 }
30292 DialectType::Snowflake => Ok(Expression::Function(Box::new(
30293 Function::new("OBJECT_KEYS".to_string(), vec![uf.this]),
30294 ))),
30295 _ => Ok(Expression::JsonKeys(uf)),
30296 }
30297 } else {
30298 Ok(e)
30299 }
30300 }
30301
30302 Action::ParseJsonStrip => {
30303 // PARSE_JSON(x) -> x (strip wrapper for SQLite/Doris)
30304 if let Expression::ParseJson(uf) = e {
30305 Ok(uf.this)
30306 } else {
30307 Ok(e)
30308 }
30309 }
30310
30311 Action::ArraySizeDrill => {
30312 // ARRAY_SIZE(x) -> REPEATED_COUNT(x) for Drill
30313 if let Expression::ArraySize(uf) = e {
30314 Ok(Expression::Function(Box::new(Function::new(
30315 "REPEATED_COUNT".to_string(),
30316 vec![uf.this],
30317 ))))
30318 } else {
30319 Ok(e)
30320 }
30321 }
30322
30323 Action::WeekOfYearToWeekIso => {
30324 // WEEKOFYEAR(x) -> WEEKISO(x) for Snowflake (cross-dialect normalization)
30325 if let Expression::WeekOfYear(uf) = e {
30326 Ok(Expression::Function(Box::new(Function::new(
30327 "WEEKISO".to_string(),
30328 vec![uf.this],
30329 ))))
30330 } else {
30331 Ok(e)
30332 }
30333 }
30334 }
30335 })
30336 }
30337
30338 /// Convert DATE_TRUNC('unit', x) to MySQL-specific expansion
30339 fn date_trunc_to_mysql(unit: &str, expr: &Expression) -> Result<Expression> {
30340 use crate::expressions::Function;
30341 match unit {
30342 "DAY" => {
30343 // DATE(x)
30344 Ok(Expression::Function(Box::new(Function::new(
30345 "DATE".to_string(),
30346 vec![expr.clone()],
30347 ))))
30348 }
30349 "WEEK" => {
30350 // STR_TO_DATE(CONCAT(YEAR(x), ' ', WEEK(x, 1), ' 1'), '%Y %u %w')
30351 let year_x = Expression::Function(Box::new(Function::new(
30352 "YEAR".to_string(),
30353 vec![expr.clone()],
30354 )));
30355 let week_x = Expression::Function(Box::new(Function::new(
30356 "WEEK".to_string(),
30357 vec![expr.clone(), Expression::number(1)],
30358 )));
30359 let concat_args = vec![
30360 year_x,
30361 Expression::string(" "),
30362 week_x,
30363 Expression::string(" 1"),
30364 ];
30365 let concat = Expression::Function(Box::new(Function::new(
30366 "CONCAT".to_string(),
30367 concat_args,
30368 )));
30369 Ok(Expression::Function(Box::new(Function::new(
30370 "STR_TO_DATE".to_string(),
30371 vec![concat, Expression::string("%Y %u %w")],
30372 ))))
30373 }
30374 "MONTH" => {
30375 // STR_TO_DATE(CONCAT(YEAR(x), ' ', MONTH(x), ' 1'), '%Y %c %e')
30376 let year_x = Expression::Function(Box::new(Function::new(
30377 "YEAR".to_string(),
30378 vec![expr.clone()],
30379 )));
30380 let month_x = Expression::Function(Box::new(Function::new(
30381 "MONTH".to_string(),
30382 vec![expr.clone()],
30383 )));
30384 let concat_args = vec![
30385 year_x,
30386 Expression::string(" "),
30387 month_x,
30388 Expression::string(" 1"),
30389 ];
30390 let concat = Expression::Function(Box::new(Function::new(
30391 "CONCAT".to_string(),
30392 concat_args,
30393 )));
30394 Ok(Expression::Function(Box::new(Function::new(
30395 "STR_TO_DATE".to_string(),
30396 vec![concat, Expression::string("%Y %c %e")],
30397 ))))
30398 }
30399 "QUARTER" => {
30400 // STR_TO_DATE(CONCAT(YEAR(x), ' ', QUARTER(x) * 3 - 2, ' 1'), '%Y %c %e')
30401 let year_x = Expression::Function(Box::new(Function::new(
30402 "YEAR".to_string(),
30403 vec![expr.clone()],
30404 )));
30405 let quarter_x = Expression::Function(Box::new(Function::new(
30406 "QUARTER".to_string(),
30407 vec![expr.clone()],
30408 )));
30409 // QUARTER(x) * 3 - 2
30410 let mul = Expression::Mul(Box::new(crate::expressions::BinaryOp {
30411 left: quarter_x,
30412 right: Expression::number(3),
30413 left_comments: Vec::new(),
30414 operator_comments: Vec::new(),
30415 trailing_comments: Vec::new(),
30416 inferred_type: None,
30417 }));
30418 let sub = Expression::Sub(Box::new(crate::expressions::BinaryOp {
30419 left: mul,
30420 right: Expression::number(2),
30421 left_comments: Vec::new(),
30422 operator_comments: Vec::new(),
30423 trailing_comments: Vec::new(),
30424 inferred_type: None,
30425 }));
30426 let concat_args = vec![
30427 year_x,
30428 Expression::string(" "),
30429 sub,
30430 Expression::string(" 1"),
30431 ];
30432 let concat = Expression::Function(Box::new(Function::new(
30433 "CONCAT".to_string(),
30434 concat_args,
30435 )));
30436 Ok(Expression::Function(Box::new(Function::new(
30437 "STR_TO_DATE".to_string(),
30438 vec![concat, Expression::string("%Y %c %e")],
30439 ))))
30440 }
30441 "YEAR" => {
30442 // STR_TO_DATE(CONCAT(YEAR(x), ' 1 1'), '%Y %c %e')
30443 let year_x = Expression::Function(Box::new(Function::new(
30444 "YEAR".to_string(),
30445 vec![expr.clone()],
30446 )));
30447 let concat_args = vec![year_x, Expression::string(" 1 1")];
30448 let concat = Expression::Function(Box::new(Function::new(
30449 "CONCAT".to_string(),
30450 concat_args,
30451 )));
30452 Ok(Expression::Function(Box::new(Function::new(
30453 "STR_TO_DATE".to_string(),
30454 vec![concat, Expression::string("%Y %c %e")],
30455 ))))
30456 }
30457 _ => {
30458 // Unsupported unit -> keep as DATE_TRUNC
30459 Ok(Expression::Function(Box::new(Function::new(
30460 "DATE_TRUNC".to_string(),
30461 vec![Expression::string(unit), expr.clone()],
30462 ))))
30463 }
30464 }
30465 }
30466
30467 /// Check if a DataType is or contains VARCHAR/CHAR (for Spark VARCHAR->STRING normalization)
30468 fn has_varchar_char_type(dt: &crate::expressions::DataType) -> bool {
30469 use crate::expressions::DataType;
30470 match dt {
30471 DataType::VarChar { .. } | DataType::Char { .. } => true,
30472 DataType::Struct { fields, .. } => fields
30473 .iter()
30474 .any(|f| Self::has_varchar_char_type(&f.data_type)),
30475 _ => false,
30476 }
30477 }
30478
30479 /// Recursively normalize VARCHAR/CHAR to STRING in a DataType (for Spark)
30480 fn normalize_varchar_to_string(
30481 dt: crate::expressions::DataType,
30482 ) -> crate::expressions::DataType {
30483 use crate::expressions::DataType;
30484 match dt {
30485 DataType::VarChar { .. } | DataType::Char { .. } => DataType::Custom {
30486 name: "STRING".to_string(),
30487 },
30488 DataType::Struct { fields, nested } => {
30489 let fields = fields
30490 .into_iter()
30491 .map(|mut f| {
30492 f.data_type = Self::normalize_varchar_to_string(f.data_type);
30493 f
30494 })
30495 .collect();
30496 DataType::Struct { fields, nested }
30497 }
30498 other => other,
30499 }
30500 }
30501
30502 /// Normalize an interval string like '1day' or ' 2 days ' to proper INTERVAL expression
30503 fn normalize_interval_string(expr: Expression, target: DialectType) -> Expression {
30504 if let Expression::Literal(ref lit) = expr {
30505 if let crate::expressions::Literal::String(ref s) = lit.as_ref() {
30506 // Try to parse patterns like '1day', '1 day', '2 days', ' 2 days '
30507 let trimmed = s.trim();
30508
30509 // Find where digits end and unit text begins
30510 let digit_end = trimmed
30511 .find(|c: char| !c.is_ascii_digit())
30512 .unwrap_or(trimmed.len());
30513 if digit_end == 0 || digit_end == trimmed.len() {
30514 return expr;
30515 }
30516 let num = &trimmed[..digit_end];
30517 let unit_text = trimmed[digit_end..].trim().to_ascii_uppercase();
30518 if unit_text.is_empty() {
30519 return expr;
30520 }
30521
30522 let known_units = [
30523 "DAY", "DAYS", "HOUR", "HOURS", "MINUTE", "MINUTES", "SECOND", "SECONDS",
30524 "WEEK", "WEEKS", "MONTH", "MONTHS", "YEAR", "YEARS",
30525 ];
30526 if !known_units.contains(&unit_text.as_str()) {
30527 return expr;
30528 }
30529
30530 let unit_str = unit_text.clone();
30531 // Singularize
30532 let unit_singular = if unit_str.ends_with('S') && unit_str.len() > 3 {
30533 &unit_str[..unit_str.len() - 1]
30534 } else {
30535 &unit_str
30536 };
30537 let unit = unit_singular;
30538
30539 match target {
30540 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
30541 // INTERVAL '2' DAY
30542 let iu = match unit {
30543 "DAY" => crate::expressions::IntervalUnit::Day,
30544 "HOUR" => crate::expressions::IntervalUnit::Hour,
30545 "MINUTE" => crate::expressions::IntervalUnit::Minute,
30546 "SECOND" => crate::expressions::IntervalUnit::Second,
30547 "WEEK" => crate::expressions::IntervalUnit::Week,
30548 "MONTH" => crate::expressions::IntervalUnit::Month,
30549 "YEAR" => crate::expressions::IntervalUnit::Year,
30550 _ => return expr,
30551 };
30552 return Expression::Interval(Box::new(crate::expressions::Interval {
30553 this: Some(Expression::string(num)),
30554 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
30555 unit: iu,
30556 use_plural: false,
30557 }),
30558 }));
30559 }
30560 DialectType::PostgreSQL | DialectType::Redshift | DialectType::DuckDB => {
30561 // INTERVAL '2 DAYS'
30562 let plural = if num != "1" && !unit_str.ends_with('S') {
30563 format!("{} {}S", num, unit)
30564 } else if unit_str.ends_with('S') {
30565 format!("{} {}", num, unit_str)
30566 } else {
30567 format!("{} {}", num, unit)
30568 };
30569 return Expression::Interval(Box::new(crate::expressions::Interval {
30570 this: Some(Expression::string(&plural)),
30571 unit: None,
30572 }));
30573 }
30574 _ => {
30575 // Spark/Databricks/Hive: INTERVAL '1' DAY
30576 let iu = match unit {
30577 "DAY" => crate::expressions::IntervalUnit::Day,
30578 "HOUR" => crate::expressions::IntervalUnit::Hour,
30579 "MINUTE" => crate::expressions::IntervalUnit::Minute,
30580 "SECOND" => crate::expressions::IntervalUnit::Second,
30581 "WEEK" => crate::expressions::IntervalUnit::Week,
30582 "MONTH" => crate::expressions::IntervalUnit::Month,
30583 "YEAR" => crate::expressions::IntervalUnit::Year,
30584 _ => return expr,
30585 };
30586 return Expression::Interval(Box::new(crate::expressions::Interval {
30587 this: Some(Expression::string(num)),
30588 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
30589 unit: iu,
30590 use_plural: false,
30591 }),
30592 }));
30593 }
30594 }
30595 }
30596 }
30597 // If it's already an INTERVAL expression, pass through
30598 expr
30599 }
30600
30601 /// Rewrite SELECT expressions containing UNNEST into expanded form with CROSS JOINs.
30602 /// DuckDB: SELECT UNNEST(arr1), UNNEST(arr2) ->
30603 /// BigQuery: SELECT IF(pos = pos_2, col, NULL) AS col, ... FROM UNNEST(GENERATE_ARRAY(0, ...)) AS pos CROSS JOIN ...
30604 /// Presto: SELECT IF(_u.pos = _u_2.pos_2, _u_2.col) AS col, ... FROM UNNEST(SEQUENCE(1, ...)) AS _u(pos) CROSS JOIN ...
30605 fn rewrite_unnest_expansion(
30606 select: &crate::expressions::Select,
30607 target: DialectType,
30608 ) -> Option<crate::expressions::Select> {
30609 use crate::expressions::{
30610 Alias, BinaryOp, Column, From, Function, Identifier, Join, JoinKind, Literal,
30611 UnnestFunc,
30612 };
30613
30614 let index_offset: i64 = match target {
30615 DialectType::Presto | DialectType::Trino => 1,
30616 _ => 0, // BigQuery, Snowflake
30617 };
30618
30619 let if_func_name = match target {
30620 DialectType::Snowflake => "IFF",
30621 _ => "IF",
30622 };
30623
30624 let array_length_func = match target {
30625 DialectType::BigQuery => "ARRAY_LENGTH",
30626 DialectType::Presto | DialectType::Trino => "CARDINALITY",
30627 DialectType::Snowflake => "ARRAY_SIZE",
30628 _ => "ARRAY_LENGTH",
30629 };
30630
30631 let use_table_aliases = matches!(
30632 target,
30633 DialectType::Presto | DialectType::Trino | DialectType::Snowflake
30634 );
30635 let null_third_arg = matches!(target, DialectType::BigQuery | DialectType::Snowflake);
30636
30637 fn make_col(name: &str, table: Option<&str>) -> Expression {
30638 if let Some(tbl) = table {
30639 Expression::boxed_column(Column {
30640 name: Identifier::new(name.to_string()),
30641 table: Some(Identifier::new(tbl.to_string())),
30642 join_mark: false,
30643 trailing_comments: Vec::new(),
30644 span: None,
30645 inferred_type: None,
30646 })
30647 } else {
30648 Expression::Identifier(Identifier::new(name.to_string()))
30649 }
30650 }
30651
30652 fn make_join(this: Expression) -> Join {
30653 Join {
30654 this,
30655 on: None,
30656 using: Vec::new(),
30657 kind: JoinKind::Cross,
30658 use_inner_keyword: false,
30659 use_outer_keyword: false,
30660 deferred_condition: false,
30661 join_hint: None,
30662 match_condition: None,
30663 pivots: Vec::new(),
30664 comments: Vec::new(),
30665 nesting_group: 0,
30666 directed: false,
30667 }
30668 }
30669
30670 // Collect UNNEST info from SELECT expressions
30671 struct UnnestInfo {
30672 arr_expr: Expression,
30673 col_alias: String,
30674 pos_alias: String,
30675 source_alias: String,
30676 original_expr: Expression,
30677 has_outer_alias: Option<String>,
30678 }
30679
30680 let mut unnest_infos: Vec<UnnestInfo> = Vec::new();
30681 let mut col_counter = 0usize;
30682 let mut pos_counter = 1usize;
30683 let mut source_counter = 1usize;
30684
30685 fn extract_unnest_arg(expr: &Expression) -> Option<Expression> {
30686 match expr {
30687 Expression::Unnest(u) => Some(u.this.clone()),
30688 Expression::Function(f)
30689 if f.name.eq_ignore_ascii_case("UNNEST") && !f.args.is_empty() =>
30690 {
30691 Some(f.args[0].clone())
30692 }
30693 Expression::Alias(a) => extract_unnest_arg(&a.this),
30694 Expression::Add(op)
30695 | Expression::Sub(op)
30696 | Expression::Mul(op)
30697 | Expression::Div(op) => {
30698 extract_unnest_arg(&op.left).or_else(|| extract_unnest_arg(&op.right))
30699 }
30700 _ => None,
30701 }
30702 }
30703
30704 fn get_alias_name(expr: &Expression) -> Option<String> {
30705 if let Expression::Alias(a) = expr {
30706 Some(a.alias.name.clone())
30707 } else {
30708 None
30709 }
30710 }
30711
30712 for sel_expr in &select.expressions {
30713 if let Some(arr) = extract_unnest_arg(sel_expr) {
30714 col_counter += 1;
30715 pos_counter += 1;
30716 source_counter += 1;
30717
30718 let col_alias = if col_counter == 1 {
30719 "col".to_string()
30720 } else {
30721 format!("col_{}", col_counter)
30722 };
30723 let pos_alias = format!("pos_{}", pos_counter);
30724 let source_alias = format!("_u_{}", source_counter);
30725 let has_outer_alias = get_alias_name(sel_expr);
30726
30727 unnest_infos.push(UnnestInfo {
30728 arr_expr: arr,
30729 col_alias,
30730 pos_alias,
30731 source_alias,
30732 original_expr: sel_expr.clone(),
30733 has_outer_alias,
30734 });
30735 }
30736 }
30737
30738 if unnest_infos.is_empty() {
30739 return None;
30740 }
30741
30742 let series_alias = "pos".to_string();
30743 let series_source_alias = "_u".to_string();
30744 let tbl_ref = if use_table_aliases {
30745 Some(series_source_alias.as_str())
30746 } else {
30747 None
30748 };
30749
30750 // Build new SELECT expressions
30751 let mut new_select_exprs = Vec::new();
30752 for info in &unnest_infos {
30753 let actual_col_name = info.has_outer_alias.as_ref().unwrap_or(&info.col_alias);
30754 let src_ref = if use_table_aliases {
30755 Some(info.source_alias.as_str())
30756 } else {
30757 None
30758 };
30759
30760 let pos_col = make_col(&series_alias, tbl_ref);
30761 let unnest_pos_col = make_col(&info.pos_alias, src_ref);
30762 let col_ref = make_col(actual_col_name, src_ref);
30763
30764 let eq_cond = Expression::Eq(Box::new(BinaryOp::new(
30765 pos_col.clone(),
30766 unnest_pos_col.clone(),
30767 )));
30768 let mut if_args = vec![eq_cond, col_ref];
30769 if null_third_arg {
30770 if_args.push(Expression::Null(crate::expressions::Null));
30771 }
30772
30773 let if_expr =
30774 Expression::Function(Box::new(Function::new(if_func_name.to_string(), if_args)));
30775 let final_expr = Self::replace_unnest_with_if(&info.original_expr, &if_expr);
30776
30777 new_select_exprs.push(Expression::Alias(Box::new(Alias::new(
30778 final_expr,
30779 Identifier::new(actual_col_name.clone()),
30780 ))));
30781 }
30782
30783 // Build array size expressions for GREATEST
30784 let size_exprs: Vec<Expression> = unnest_infos
30785 .iter()
30786 .map(|info| {
30787 Expression::Function(Box::new(Function::new(
30788 array_length_func.to_string(),
30789 vec![info.arr_expr.clone()],
30790 )))
30791 })
30792 .collect();
30793
30794 let greatest =
30795 Expression::Function(Box::new(Function::new("GREATEST".to_string(), size_exprs)));
30796
30797 let series_end = if index_offset == 0 {
30798 Expression::Sub(Box::new(BinaryOp::new(
30799 greatest,
30800 Expression::Literal(Box::new(Literal::Number("1".to_string()))),
30801 )))
30802 } else {
30803 greatest
30804 };
30805
30806 // Build the position array source
30807 let series_unnest_expr = match target {
30808 DialectType::BigQuery => {
30809 let gen_array = Expression::Function(Box::new(Function::new(
30810 "GENERATE_ARRAY".to_string(),
30811 vec![
30812 Expression::Literal(Box::new(Literal::Number("0".to_string()))),
30813 series_end,
30814 ],
30815 )));
30816 Expression::Unnest(Box::new(UnnestFunc {
30817 this: gen_array,
30818 expressions: Vec::new(),
30819 with_ordinality: false,
30820 alias: None,
30821 offset_alias: None,
30822 }))
30823 }
30824 DialectType::Presto | DialectType::Trino => {
30825 let sequence = Expression::Function(Box::new(Function::new(
30826 "SEQUENCE".to_string(),
30827 vec![
30828 Expression::Literal(Box::new(Literal::Number("1".to_string()))),
30829 series_end,
30830 ],
30831 )));
30832 Expression::Unnest(Box::new(UnnestFunc {
30833 this: sequence,
30834 expressions: Vec::new(),
30835 with_ordinality: false,
30836 alias: None,
30837 offset_alias: None,
30838 }))
30839 }
30840 DialectType::Snowflake => {
30841 let range_end = Expression::Add(Box::new(BinaryOp::new(
30842 Expression::Paren(Box::new(crate::expressions::Paren {
30843 this: series_end,
30844 trailing_comments: Vec::new(),
30845 })),
30846 Expression::Literal(Box::new(Literal::Number("1".to_string()))),
30847 )));
30848 let gen_range = Expression::Function(Box::new(Function::new(
30849 "ARRAY_GENERATE_RANGE".to_string(),
30850 vec![
30851 Expression::Literal(Box::new(Literal::Number("0".to_string()))),
30852 range_end,
30853 ],
30854 )));
30855 let flatten_arg =
30856 Expression::NamedArgument(Box::new(crate::expressions::NamedArgument {
30857 name: Identifier::new("INPUT".to_string()),
30858 value: gen_range,
30859 separator: crate::expressions::NamedArgSeparator::DArrow,
30860 }));
30861 let flatten = Expression::Function(Box::new(Function::new(
30862 "FLATTEN".to_string(),
30863 vec![flatten_arg],
30864 )));
30865 Expression::Function(Box::new(Function::new("TABLE".to_string(), vec![flatten])))
30866 }
30867 _ => return None,
30868 };
30869
30870 // Build series alias expression
30871 let series_alias_expr = if use_table_aliases {
30872 let col_aliases = if matches!(target, DialectType::Snowflake) {
30873 vec![
30874 Identifier::new("seq".to_string()),
30875 Identifier::new("key".to_string()),
30876 Identifier::new("path".to_string()),
30877 Identifier::new("index".to_string()),
30878 Identifier::new(series_alias.clone()),
30879 Identifier::new("this".to_string()),
30880 ]
30881 } else {
30882 vec![Identifier::new(series_alias.clone())]
30883 };
30884 Expression::Alias(Box::new(Alias {
30885 this: series_unnest_expr,
30886 alias: Identifier::new(series_source_alias.clone()),
30887 column_aliases: col_aliases,
30888 pre_alias_comments: Vec::new(),
30889 trailing_comments: Vec::new(),
30890 inferred_type: None,
30891 }))
30892 } else {
30893 Expression::Alias(Box::new(Alias::new(
30894 series_unnest_expr,
30895 Identifier::new(series_alias.clone()),
30896 )))
30897 };
30898
30899 // Build CROSS JOINs for each UNNEST
30900 let mut joins = Vec::new();
30901 for info in &unnest_infos {
30902 let actual_col_name = info.has_outer_alias.as_ref().unwrap_or(&info.col_alias);
30903
30904 let unnest_join_expr = match target {
30905 DialectType::BigQuery => {
30906 // UNNEST([1,2,3]) AS col WITH OFFSET AS pos_2
30907 let unnest = UnnestFunc {
30908 this: info.arr_expr.clone(),
30909 expressions: Vec::new(),
30910 with_ordinality: true,
30911 alias: Some(Identifier::new(actual_col_name.clone())),
30912 offset_alias: Some(Identifier::new(info.pos_alias.clone())),
30913 };
30914 Expression::Unnest(Box::new(unnest))
30915 }
30916 DialectType::Presto | DialectType::Trino => {
30917 let unnest = UnnestFunc {
30918 this: info.arr_expr.clone(),
30919 expressions: Vec::new(),
30920 with_ordinality: true,
30921 alias: None,
30922 offset_alias: None,
30923 };
30924 Expression::Alias(Box::new(Alias {
30925 this: Expression::Unnest(Box::new(unnest)),
30926 alias: Identifier::new(info.source_alias.clone()),
30927 column_aliases: vec![
30928 Identifier::new(actual_col_name.clone()),
30929 Identifier::new(info.pos_alias.clone()),
30930 ],
30931 pre_alias_comments: Vec::new(),
30932 trailing_comments: Vec::new(),
30933 inferred_type: None,
30934 }))
30935 }
30936 DialectType::Snowflake => {
30937 let flatten_arg =
30938 Expression::NamedArgument(Box::new(crate::expressions::NamedArgument {
30939 name: Identifier::new("INPUT".to_string()),
30940 value: info.arr_expr.clone(),
30941 separator: crate::expressions::NamedArgSeparator::DArrow,
30942 }));
30943 let flatten = Expression::Function(Box::new(Function::new(
30944 "FLATTEN".to_string(),
30945 vec![flatten_arg],
30946 )));
30947 let table_fn = Expression::Function(Box::new(Function::new(
30948 "TABLE".to_string(),
30949 vec![flatten],
30950 )));
30951 Expression::Alias(Box::new(Alias {
30952 this: table_fn,
30953 alias: Identifier::new(info.source_alias.clone()),
30954 column_aliases: vec![
30955 Identifier::new("seq".to_string()),
30956 Identifier::new("key".to_string()),
30957 Identifier::new("path".to_string()),
30958 Identifier::new(info.pos_alias.clone()),
30959 Identifier::new(actual_col_name.clone()),
30960 Identifier::new("this".to_string()),
30961 ],
30962 pre_alias_comments: Vec::new(),
30963 trailing_comments: Vec::new(),
30964 inferred_type: None,
30965 }))
30966 }
30967 _ => return None,
30968 };
30969
30970 joins.push(make_join(unnest_join_expr));
30971 }
30972
30973 // Build WHERE clause
30974 let mut where_conditions: Vec<Expression> = Vec::new();
30975 for info in &unnest_infos {
30976 let src_ref = if use_table_aliases {
30977 Some(info.source_alias.as_str())
30978 } else {
30979 None
30980 };
30981 let pos_col = make_col(&series_alias, tbl_ref);
30982 let unnest_pos_col = make_col(&info.pos_alias, src_ref);
30983
30984 let arr_size = Expression::Function(Box::new(Function::new(
30985 array_length_func.to_string(),
30986 vec![info.arr_expr.clone()],
30987 )));
30988
30989 let size_ref = if index_offset == 0 {
30990 Expression::Paren(Box::new(crate::expressions::Paren {
30991 this: Expression::Sub(Box::new(BinaryOp::new(
30992 arr_size,
30993 Expression::Literal(Box::new(Literal::Number("1".to_string()))),
30994 ))),
30995 trailing_comments: Vec::new(),
30996 }))
30997 } else {
30998 arr_size
30999 };
31000
31001 let eq = Expression::Eq(Box::new(BinaryOp::new(
31002 pos_col.clone(),
31003 unnest_pos_col.clone(),
31004 )));
31005 let gt = Expression::Gt(Box::new(BinaryOp::new(pos_col, size_ref.clone())));
31006 let pos_eq_size = Expression::Eq(Box::new(BinaryOp::new(unnest_pos_col, size_ref)));
31007 let and_cond = Expression::And(Box::new(BinaryOp::new(gt, pos_eq_size)));
31008 let paren_and = Expression::Paren(Box::new(crate::expressions::Paren {
31009 this: and_cond,
31010 trailing_comments: Vec::new(),
31011 }));
31012 let or_cond = Expression::Or(Box::new(BinaryOp::new(eq, paren_and)));
31013
31014 where_conditions.push(or_cond);
31015 }
31016
31017 let where_expr = if where_conditions.len() == 1 {
31018 // Single condition: no parens needed
31019 where_conditions.into_iter().next().unwrap()
31020 } else {
31021 // Multiple conditions: wrap each OR in parens, then combine with AND
31022 let wrap = |e: Expression| {
31023 Expression::Paren(Box::new(crate::expressions::Paren {
31024 this: e,
31025 trailing_comments: Vec::new(),
31026 }))
31027 };
31028 let mut iter = where_conditions.into_iter();
31029 let first = wrap(iter.next().unwrap());
31030 let second = wrap(iter.next().unwrap());
31031 let mut combined = Expression::Paren(Box::new(crate::expressions::Paren {
31032 this: Expression::And(Box::new(BinaryOp::new(first, second))),
31033 trailing_comments: Vec::new(),
31034 }));
31035 for cond in iter {
31036 combined = Expression::And(Box::new(BinaryOp::new(combined, wrap(cond))));
31037 }
31038 combined
31039 };
31040
31041 // Build the new SELECT
31042 let mut new_select = select.clone();
31043 new_select.expressions = new_select_exprs;
31044
31045 if new_select.from.is_some() {
31046 let mut all_joins = vec![make_join(series_alias_expr)];
31047 all_joins.extend(joins);
31048 new_select.joins.extend(all_joins);
31049 } else {
31050 new_select.from = Some(From {
31051 expressions: vec![series_alias_expr],
31052 });
31053 new_select.joins.extend(joins);
31054 }
31055
31056 if let Some(ref existing_where) = new_select.where_clause {
31057 let combined = Expression::And(Box::new(BinaryOp::new(
31058 existing_where.this.clone(),
31059 where_expr,
31060 )));
31061 new_select.where_clause = Some(crate::expressions::Where { this: combined });
31062 } else {
31063 new_select.where_clause = Some(crate::expressions::Where { this: where_expr });
31064 }
31065
31066 Some(new_select)
31067 }
31068
31069 /// Helper to replace UNNEST(...) inside an expression with a replacement expression.
31070 fn replace_unnest_with_if(original: &Expression, replacement: &Expression) -> Expression {
31071 match original {
31072 Expression::Unnest(_) => replacement.clone(),
31073 Expression::Function(f) if f.name.eq_ignore_ascii_case("UNNEST") => replacement.clone(),
31074 Expression::Alias(a) => Self::replace_unnest_with_if(&a.this, replacement),
31075 Expression::Add(op) => {
31076 let left = Self::replace_unnest_with_if(&op.left, replacement);
31077 let right = Self::replace_unnest_with_if(&op.right, replacement);
31078 Expression::Add(Box::new(crate::expressions::BinaryOp::new(left, right)))
31079 }
31080 Expression::Sub(op) => {
31081 let left = Self::replace_unnest_with_if(&op.left, replacement);
31082 let right = Self::replace_unnest_with_if(&op.right, replacement);
31083 Expression::Sub(Box::new(crate::expressions::BinaryOp::new(left, right)))
31084 }
31085 Expression::Mul(op) => {
31086 let left = Self::replace_unnest_with_if(&op.left, replacement);
31087 let right = Self::replace_unnest_with_if(&op.right, replacement);
31088 Expression::Mul(Box::new(crate::expressions::BinaryOp::new(left, right)))
31089 }
31090 Expression::Div(op) => {
31091 let left = Self::replace_unnest_with_if(&op.left, replacement);
31092 let right = Self::replace_unnest_with_if(&op.right, replacement);
31093 Expression::Div(Box::new(crate::expressions::BinaryOp::new(left, right)))
31094 }
31095 _ => original.clone(),
31096 }
31097 }
31098
31099 /// Decompose a JSON path like `$.y[0].z` into individual parts: `["y", "0", "z"]`.
31100 /// Strips `$` prefix, handles bracket notation, quoted strings, and removes `[*]` wildcards.
31101 fn decompose_json_path(path: &str) -> Vec<String> {
31102 let mut parts = Vec::new();
31103 let path = if path.starts_with("$.") {
31104 &path[2..]
31105 } else if path.starts_with('$') {
31106 &path[1..]
31107 } else {
31108 path
31109 };
31110 if path.is_empty() {
31111 return parts;
31112 }
31113 let mut current = String::new();
31114 let chars: Vec<char> = path.chars().collect();
31115 let mut i = 0;
31116 while i < chars.len() {
31117 match chars[i] {
31118 '.' => {
31119 if !current.is_empty() {
31120 parts.push(current.clone());
31121 current.clear();
31122 }
31123 i += 1;
31124 }
31125 '[' => {
31126 if !current.is_empty() {
31127 parts.push(current.clone());
31128 current.clear();
31129 }
31130 i += 1;
31131 let mut bracket_content = String::new();
31132 while i < chars.len() && chars[i] != ']' {
31133 if chars[i] == '"' || chars[i] == '\'' {
31134 let quote = chars[i];
31135 i += 1;
31136 while i < chars.len() && chars[i] != quote {
31137 bracket_content.push(chars[i]);
31138 i += 1;
31139 }
31140 if i < chars.len() {
31141 i += 1;
31142 }
31143 } else {
31144 bracket_content.push(chars[i]);
31145 i += 1;
31146 }
31147 }
31148 if i < chars.len() {
31149 i += 1;
31150 }
31151 if bracket_content != "*" {
31152 parts.push(bracket_content);
31153 }
31154 }
31155 _ => {
31156 current.push(chars[i]);
31157 i += 1;
31158 }
31159 }
31160 }
31161 if !current.is_empty() {
31162 parts.push(current);
31163 }
31164 parts
31165 }
31166
31167 /// Strip `$` prefix from a JSON path, keeping the rest.
31168 /// `$.y[0].z` -> `y[0].z`, `$["a b"]` -> `["a b"]`
31169 fn strip_json_dollar_prefix(path: &str) -> String {
31170 if path.starts_with("$.") {
31171 path[2..].to_string()
31172 } else if path.starts_with('$') {
31173 path[1..].to_string()
31174 } else {
31175 path.to_string()
31176 }
31177 }
31178
31179 /// Strip `[*]` wildcards from a JSON path.
31180 /// `$.y[*]` -> `$.y`, `$.y[*].z` -> `$.y.z`
31181 fn strip_json_wildcards(path: &str) -> String {
31182 path.replace("[*]", "")
31183 .replace("..", ".") // Clean double dots from `$.y[*].z` -> `$.y..z`
31184 .trim_end_matches('.')
31185 .to_string()
31186 }
31187
31188 /// Convert bracket notation to dot notation for JSON paths.
31189 /// `$["a b"]` -> `$."a b"`, `$["key"]` -> `$.key`
31190 fn bracket_to_dot_notation(path: &str) -> String {
31191 let mut result = String::new();
31192 let chars: Vec<char> = path.chars().collect();
31193 let mut i = 0;
31194 while i < chars.len() {
31195 if chars[i] == '[' {
31196 // Read bracket content
31197 i += 1;
31198 let mut bracket_content = String::new();
31199 let mut is_quoted = false;
31200 let mut _quote_char = '"';
31201 while i < chars.len() && chars[i] != ']' {
31202 if chars[i] == '"' || chars[i] == '\'' {
31203 is_quoted = true;
31204 _quote_char = chars[i];
31205 i += 1;
31206 while i < chars.len() && chars[i] != _quote_char {
31207 bracket_content.push(chars[i]);
31208 i += 1;
31209 }
31210 if i < chars.len() {
31211 i += 1;
31212 }
31213 } else {
31214 bracket_content.push(chars[i]);
31215 i += 1;
31216 }
31217 }
31218 if i < chars.len() {
31219 i += 1;
31220 } // skip ]
31221 if bracket_content == "*" {
31222 // Keep wildcard as-is
31223 result.push_str("[*]");
31224 } else if is_quoted {
31225 // Quoted bracket -> dot notation with quotes
31226 result.push('.');
31227 result.push('"');
31228 result.push_str(&bracket_content);
31229 result.push('"');
31230 } else {
31231 // Numeric index -> keep as bracket
31232 result.push('[');
31233 result.push_str(&bracket_content);
31234 result.push(']');
31235 }
31236 } else {
31237 result.push(chars[i]);
31238 i += 1;
31239 }
31240 }
31241 result
31242 }
31243
31244 /// Convert JSON path bracket quoted strings to use single quotes instead of double quotes.
31245 /// `$["a b"]` -> `$['a b']`
31246 fn bracket_to_single_quotes(path: &str) -> String {
31247 let mut result = String::new();
31248 let chars: Vec<char> = path.chars().collect();
31249 let mut i = 0;
31250 while i < chars.len() {
31251 if chars[i] == '[' && i + 1 < chars.len() && chars[i + 1] == '"' {
31252 result.push('[');
31253 result.push('\'');
31254 i += 2; // skip [ and "
31255 while i < chars.len() && chars[i] != '"' {
31256 result.push(chars[i]);
31257 i += 1;
31258 }
31259 if i < chars.len() {
31260 i += 1;
31261 } // skip closing "
31262 result.push('\'');
31263 } else {
31264 result.push(chars[i]);
31265 i += 1;
31266 }
31267 }
31268 result
31269 }
31270
31271 /// Transform TSQL SELECT INTO -> CREATE TABLE AS for DuckDB/Snowflake
31272 /// or PostgreSQL #temp -> TEMPORARY.
31273 /// Also strips # from INSERT INTO #table for non-TSQL targets.
31274 fn transform_select_into(
31275 expr: Expression,
31276 _source: DialectType,
31277 target: DialectType,
31278 ) -> Expression {
31279 use crate::expressions::{CreateTable, Expression, TableRef};
31280
31281 // Handle INSERT INTO #temp -> INSERT INTO temp for non-TSQL targets
31282 if let Expression::Insert(ref insert) = expr {
31283 if insert.table.name.name.starts_with('#')
31284 && !matches!(target, DialectType::TSQL | DialectType::Fabric)
31285 {
31286 let mut new_insert = insert.clone();
31287 new_insert.table.name.name =
31288 insert.table.name.name.trim_start_matches('#').to_string();
31289 return Expression::Insert(new_insert);
31290 }
31291 return expr;
31292 }
31293
31294 if let Expression::Select(ref select) = expr {
31295 if let Some(ref into) = select.into {
31296 let table_name_raw = match &into.this {
31297 Expression::Table(tr) => tr.name.name.clone(),
31298 Expression::Identifier(id) => id.name.clone(),
31299 _ => String::new(),
31300 };
31301 let is_temp = table_name_raw.starts_with('#') || into.temporary;
31302 let clean_name = table_name_raw.trim_start_matches('#').to_string();
31303
31304 match target {
31305 DialectType::DuckDB | DialectType::Snowflake => {
31306 // SELECT INTO -> CREATE TABLE AS SELECT
31307 let mut new_select = select.clone();
31308 new_select.into = None;
31309 let ct = CreateTable {
31310 name: TableRef::new(clean_name),
31311 on_cluster: None,
31312 columns: Vec::new(),
31313 constraints: Vec::new(),
31314 if_not_exists: false,
31315 temporary: is_temp,
31316 or_replace: false,
31317 table_modifier: None,
31318 as_select: Some(Expression::Select(new_select)),
31319 as_select_parenthesized: false,
31320 on_commit: None,
31321 clone_source: None,
31322 clone_at_clause: None,
31323 shallow_clone: false,
31324 is_copy: false,
31325 leading_comments: Vec::new(),
31326 with_properties: Vec::new(),
31327 teradata_post_name_options: Vec::new(),
31328 with_data: None,
31329 with_statistics: None,
31330 teradata_indexes: Vec::new(),
31331 with_cte: None,
31332 properties: Vec::new(),
31333 partition_of: None,
31334 post_table_properties: Vec::new(),
31335 mysql_table_options: Vec::new(),
31336 inherits: Vec::new(),
31337 on_property: None,
31338 copy_grants: false,
31339 using_template: None,
31340 rollup: None,
31341 uuid: None,
31342 with_partition_columns: Vec::new(),
31343 with_connection: None,
31344 };
31345 return Expression::CreateTable(Box::new(ct));
31346 }
31347 DialectType::PostgreSQL | DialectType::Redshift => {
31348 // PostgreSQL: #foo -> INTO TEMPORARY foo
31349 if is_temp && !into.temporary {
31350 let mut new_select = select.clone();
31351 let mut new_into = into.clone();
31352 new_into.temporary = true;
31353 new_into.unlogged = false;
31354 new_into.this = Expression::Table(Box::new(TableRef::new(clean_name)));
31355 new_select.into = Some(new_into);
31356 Expression::Select(new_select)
31357 } else {
31358 expr
31359 }
31360 }
31361 _ => expr,
31362 }
31363 } else {
31364 expr
31365 }
31366 } else {
31367 expr
31368 }
31369 }
31370
31371 /// Transform CREATE TABLE WITH properties for cross-dialect transpilation.
31372 /// Handles FORMAT, PARTITIONED_BY, and other Presto WITH properties.
31373 fn transform_create_table_properties(
31374 ct: &mut crate::expressions::CreateTable,
31375 _source: DialectType,
31376 target: DialectType,
31377 ) {
31378 use crate::expressions::{
31379 BinaryOp, BooleanLiteral, Expression, FileFormatProperty, Identifier, Literal,
31380 Properties,
31381 };
31382
31383 // Helper to convert a raw property value string to the correct Expression
31384 let value_to_expr = |v: &str| -> Expression {
31385 let trimmed = v.trim();
31386 // Check if it's a quoted string (starts and ends with ')
31387 if trimmed.starts_with('\'') && trimmed.ends_with('\'') {
31388 Expression::Literal(Box::new(Literal::String(
31389 trimmed[1..trimmed.len() - 1].to_string(),
31390 )))
31391 }
31392 // Check if it's a number
31393 else if trimmed.parse::<i64>().is_ok() || trimmed.parse::<f64>().is_ok() {
31394 Expression::Literal(Box::new(Literal::Number(trimmed.to_string())))
31395 }
31396 // Check if it's ARRAY[...] or ARRAY(...)
31397 else if trimmed.len() >= 5 && trimmed[..5].eq_ignore_ascii_case("ARRAY") {
31398 // Convert ARRAY['y'] to ARRAY('y') for Hive/Spark
31399 let inner = trimmed
31400 .trim_start_matches(|c: char| c.is_alphabetic()) // Remove ARRAY
31401 .trim_start_matches('[')
31402 .trim_start_matches('(')
31403 .trim_end_matches(']')
31404 .trim_end_matches(')');
31405 let elements: Vec<Expression> = inner
31406 .split(',')
31407 .map(|e| {
31408 let elem = e.trim().trim_matches('\'');
31409 Expression::Literal(Box::new(Literal::String(elem.to_string())))
31410 })
31411 .collect();
31412 Expression::Function(Box::new(crate::expressions::Function::new(
31413 "ARRAY".to_string(),
31414 elements,
31415 )))
31416 }
31417 // Otherwise, just output as identifier (unquoted)
31418 else {
31419 Expression::Identifier(Identifier::new(trimmed.to_string()))
31420 }
31421 };
31422
31423 if ct.with_properties.is_empty() && ct.properties.is_empty() {
31424 return;
31425 }
31426
31427 // Handle Presto-style WITH properties
31428 if !ct.with_properties.is_empty() {
31429 // Extract FORMAT property and remaining properties
31430 let mut format_value: Option<String> = None;
31431 let mut partitioned_by: Option<String> = None;
31432 let mut other_props: Vec<(String, String)> = Vec::new();
31433
31434 for (key, value) in ct.with_properties.drain(..) {
31435 if key.eq_ignore_ascii_case("FORMAT") {
31436 // Strip surrounding quotes from value if present
31437 format_value = Some(value.trim_matches('\'').to_string());
31438 } else if key.eq_ignore_ascii_case("PARTITIONED_BY") {
31439 partitioned_by = Some(value);
31440 } else {
31441 other_props.push((key, value));
31442 }
31443 }
31444
31445 match target {
31446 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
31447 // Presto: keep WITH properties but lowercase 'format' key
31448 if let Some(fmt) = format_value {
31449 ct.with_properties
31450 .push(("format".to_string(), format!("'{}'", fmt)));
31451 }
31452 if let Some(part) = partitioned_by {
31453 // Convert (col1, col2) to ARRAY['col1', 'col2'] format
31454 let trimmed = part.trim();
31455 let inner = trimmed.trim_start_matches('(').trim_end_matches(')');
31456 // Also handle ARRAY['...'] format - keep as-is
31457 if trimmed.len() >= 5 && trimmed[..5].eq_ignore_ascii_case("ARRAY") {
31458 ct.with_properties
31459 .push(("PARTITIONED_BY".to_string(), part));
31460 } else {
31461 // Parse column names from the parenthesized list
31462 let cols: Vec<&str> = inner
31463 .split(',')
31464 .map(|c| c.trim().trim_matches('"').trim_matches('\''))
31465 .collect();
31466 let array_val = format!(
31467 "ARRAY[{}]",
31468 cols.iter()
31469 .map(|c| format!("'{}'", c))
31470 .collect::<Vec<_>>()
31471 .join(", ")
31472 );
31473 ct.with_properties
31474 .push(("PARTITIONED_BY".to_string(), array_val));
31475 }
31476 }
31477 ct.with_properties.extend(other_props);
31478 }
31479 DialectType::Hive => {
31480 // Hive: FORMAT -> STORED AS, other props -> TBLPROPERTIES
31481 if let Some(fmt) = format_value {
31482 ct.properties.push(Expression::FileFormatProperty(Box::new(
31483 FileFormatProperty {
31484 this: Some(Box::new(Expression::Identifier(Identifier::new(fmt)))),
31485 expressions: vec![],
31486 hive_format: Some(Box::new(Expression::Boolean(BooleanLiteral {
31487 value: true,
31488 }))),
31489 },
31490 )));
31491 }
31492 if let Some(_part) = partitioned_by {
31493 // PARTITIONED_BY handling is complex - move columns to partitioned by
31494 // For now, the partition columns are extracted from the column list
31495 Self::apply_partitioned_by(ct, &_part, target);
31496 }
31497 if !other_props.is_empty() {
31498 let eq_exprs: Vec<Expression> = other_props
31499 .into_iter()
31500 .map(|(k, v)| {
31501 Expression::Eq(Box::new(BinaryOp::new(
31502 Expression::Literal(Box::new(Literal::String(k))),
31503 value_to_expr(&v),
31504 )))
31505 })
31506 .collect();
31507 ct.properties
31508 .push(Expression::Properties(Box::new(Properties {
31509 expressions: eq_exprs,
31510 })));
31511 }
31512 }
31513 DialectType::Spark | DialectType::Databricks => {
31514 // Spark: FORMAT -> USING, other props -> TBLPROPERTIES
31515 if let Some(fmt) = format_value {
31516 ct.properties.push(Expression::FileFormatProperty(Box::new(
31517 FileFormatProperty {
31518 this: Some(Box::new(Expression::Identifier(Identifier::new(fmt)))),
31519 expressions: vec![],
31520 hive_format: None, // None means USING syntax
31521 },
31522 )));
31523 }
31524 if let Some(_part) = partitioned_by {
31525 Self::apply_partitioned_by(ct, &_part, target);
31526 }
31527 if !other_props.is_empty() {
31528 let eq_exprs: Vec<Expression> = other_props
31529 .into_iter()
31530 .map(|(k, v)| {
31531 Expression::Eq(Box::new(BinaryOp::new(
31532 Expression::Literal(Box::new(Literal::String(k))),
31533 value_to_expr(&v),
31534 )))
31535 })
31536 .collect();
31537 ct.properties
31538 .push(Expression::Properties(Box::new(Properties {
31539 expressions: eq_exprs,
31540 })));
31541 }
31542 }
31543 DialectType::DuckDB => {
31544 // DuckDB: strip all WITH properties (FORMAT, PARTITIONED_BY, etc.)
31545 // Keep nothing
31546 }
31547 _ => {
31548 // For other dialects, keep WITH properties as-is
31549 if let Some(fmt) = format_value {
31550 ct.with_properties
31551 .push(("FORMAT".to_string(), format!("'{}'", fmt)));
31552 }
31553 if let Some(part) = partitioned_by {
31554 ct.with_properties
31555 .push(("PARTITIONED_BY".to_string(), part));
31556 }
31557 ct.with_properties.extend(other_props);
31558 }
31559 }
31560 }
31561
31562 // Handle STORED AS 'PARQUET' (quoted format name) -> STORED AS PARQUET (unquoted)
31563 // and Hive STORED AS -> Presto WITH (format=...) conversion
31564 if !ct.properties.is_empty() {
31565 let is_presto_target = matches!(
31566 target,
31567 DialectType::Presto | DialectType::Trino | DialectType::Athena
31568 );
31569 let is_duckdb_target = matches!(target, DialectType::DuckDB);
31570
31571 if is_presto_target || is_duckdb_target {
31572 let mut new_properties = Vec::new();
31573 for prop in ct.properties.drain(..) {
31574 match &prop {
31575 Expression::FileFormatProperty(ffp) => {
31576 if is_presto_target {
31577 // Convert STORED AS/USING to WITH (format=...)
31578 if let Some(ref fmt_expr) = ffp.this {
31579 let fmt_str = match fmt_expr.as_ref() {
31580 Expression::Identifier(id) => id.name.clone(),
31581 Expression::Literal(lit)
31582 if matches!(lit.as_ref(), Literal::String(_)) =>
31583 {
31584 let Literal::String(s) = lit.as_ref() else {
31585 unreachable!()
31586 };
31587 s.clone()
31588 }
31589 _ => {
31590 new_properties.push(prop);
31591 continue;
31592 }
31593 };
31594 ct.with_properties
31595 .push(("format".to_string(), format!("'{}'", fmt_str)));
31596 }
31597 }
31598 // DuckDB: just strip file format properties
31599 }
31600 // Convert TBLPROPERTIES to WITH properties for Presto target
31601 Expression::Properties(props) if is_presto_target => {
31602 for expr in &props.expressions {
31603 if let Expression::Eq(eq) = expr {
31604 // Extract key and value from the Eq expression
31605 let key = match &eq.left {
31606 Expression::Literal(lit)
31607 if matches!(lit.as_ref(), Literal::String(_)) =>
31608 {
31609 let Literal::String(s) = lit.as_ref() else {
31610 unreachable!()
31611 };
31612 s.clone()
31613 }
31614 Expression::Identifier(id) => id.name.clone(),
31615 _ => continue,
31616 };
31617 let value = match &eq.right {
31618 Expression::Literal(lit)
31619 if matches!(lit.as_ref(), Literal::String(_)) =>
31620 {
31621 let Literal::String(s) = lit.as_ref() else {
31622 unreachable!()
31623 };
31624 format!("'{}'", s)
31625 }
31626 Expression::Literal(lit)
31627 if matches!(lit.as_ref(), Literal::Number(_)) =>
31628 {
31629 let Literal::Number(n) = lit.as_ref() else {
31630 unreachable!()
31631 };
31632 n.clone()
31633 }
31634 Expression::Identifier(id) => id.name.clone(),
31635 _ => continue,
31636 };
31637 ct.with_properties.push((key, value));
31638 }
31639 }
31640 }
31641 // Convert PartitionedByProperty for Presto target
31642 Expression::PartitionedByProperty(ref pbp) if is_presto_target => {
31643 // Check if it contains ColumnDef expressions (Hive-style with types)
31644 if let Expression::Tuple(ref tuple) = *pbp.this {
31645 let mut col_names: Vec<String> = Vec::new();
31646 let mut col_defs: Vec<crate::expressions::ColumnDef> = Vec::new();
31647 let mut has_col_defs = false;
31648 for expr in &tuple.expressions {
31649 if let Expression::ColumnDef(ref cd) = expr {
31650 has_col_defs = true;
31651 col_names.push(cd.name.name.clone());
31652 col_defs.push(*cd.clone());
31653 } else if let Expression::Column(ref col) = expr {
31654 col_names.push(col.name.name.clone());
31655 } else if let Expression::Identifier(ref id) = expr {
31656 col_names.push(id.name.clone());
31657 } else {
31658 // For function expressions like MONTHS(y), serialize to SQL
31659 let generic = Dialect::get(DialectType::Generic);
31660 if let Ok(sql) = generic.generate(expr) {
31661 col_names.push(sql);
31662 }
31663 }
31664 }
31665 if has_col_defs {
31666 // Merge partition column defs into the main column list
31667 for cd in col_defs {
31668 ct.columns.push(cd);
31669 }
31670 }
31671 if !col_names.is_empty() {
31672 // Add PARTITIONED_BY property
31673 let array_val = format!(
31674 "ARRAY[{}]",
31675 col_names
31676 .iter()
31677 .map(|n| format!("'{}'", n))
31678 .collect::<Vec<_>>()
31679 .join(", ")
31680 );
31681 ct.with_properties
31682 .push(("PARTITIONED_BY".to_string(), array_val));
31683 }
31684 }
31685 // Skip - don't keep in properties
31686 }
31687 _ => {
31688 if !is_duckdb_target {
31689 new_properties.push(prop);
31690 }
31691 }
31692 }
31693 }
31694 ct.properties = new_properties;
31695 } else {
31696 // For Hive/Spark targets, unquote format names in STORED AS
31697 for prop in &mut ct.properties {
31698 if let Expression::FileFormatProperty(ref mut ffp) = prop {
31699 if let Some(ref mut fmt_expr) = ffp.this {
31700 if let Expression::Literal(lit) = fmt_expr.as_ref() {
31701 if let Literal::String(s) = lit.as_ref() {
31702 // Convert STORED AS 'PARQUET' to STORED AS PARQUET (unquote)
31703 let unquoted = s.clone();
31704 *fmt_expr =
31705 Box::new(Expression::Identifier(Identifier::new(unquoted)));
31706 }
31707 }
31708 }
31709 }
31710 }
31711 }
31712 }
31713 }
31714
31715 /// Apply PARTITIONED_BY conversion: move partition columns from column list to PARTITIONED BY
31716 fn apply_partitioned_by(
31717 ct: &mut crate::expressions::CreateTable,
31718 partitioned_by_value: &str,
31719 target: DialectType,
31720 ) {
31721 use crate::expressions::{Column, Expression, Identifier, PartitionedByProperty, Tuple};
31722
31723 // Parse the ARRAY['col1', 'col2'] value to extract column names
31724 let mut col_names: Vec<String> = Vec::new();
31725 // The value looks like ARRAY['y', 'z'] or ARRAY('y', 'z')
31726 let inner = partitioned_by_value
31727 .trim()
31728 .trim_start_matches("ARRAY")
31729 .trim_start_matches('[')
31730 .trim_start_matches('(')
31731 .trim_end_matches(']')
31732 .trim_end_matches(')');
31733 for part in inner.split(',') {
31734 let col = part.trim().trim_matches('\'').trim_matches('"');
31735 if !col.is_empty() {
31736 col_names.push(col.to_string());
31737 }
31738 }
31739
31740 if col_names.is_empty() {
31741 return;
31742 }
31743
31744 if matches!(target, DialectType::Hive) {
31745 // Hive: PARTITIONED BY (col_name type, ...) - move columns out of column list
31746 let mut partition_col_defs = Vec::new();
31747 for col_name in &col_names {
31748 // Find and remove from columns
31749 if let Some(pos) = ct
31750 .columns
31751 .iter()
31752 .position(|c| c.name.name.eq_ignore_ascii_case(col_name))
31753 {
31754 let col_def = ct.columns.remove(pos);
31755 partition_col_defs.push(Expression::ColumnDef(Box::new(col_def)));
31756 }
31757 }
31758 if !partition_col_defs.is_empty() {
31759 ct.properties
31760 .push(Expression::PartitionedByProperty(Box::new(
31761 PartitionedByProperty {
31762 this: Box::new(Expression::Tuple(Box::new(Tuple {
31763 expressions: partition_col_defs,
31764 }))),
31765 },
31766 )));
31767 }
31768 } else if matches!(target, DialectType::Spark | DialectType::Databricks) {
31769 // Spark: PARTITIONED BY (col1, col2) - just column names, keep in column list
31770 // Use quoted identifiers to match the quoting style of the original column definitions
31771 let partition_exprs: Vec<Expression> = col_names
31772 .iter()
31773 .map(|name| {
31774 // Check if the column exists in the column list and use its quoting
31775 let is_quoted = ct
31776 .columns
31777 .iter()
31778 .any(|c| c.name.name.eq_ignore_ascii_case(name) && c.name.quoted);
31779 let ident = if is_quoted {
31780 Identifier::quoted(name.clone())
31781 } else {
31782 Identifier::new(name.clone())
31783 };
31784 Expression::boxed_column(Column {
31785 name: ident,
31786 table: None,
31787 join_mark: false,
31788 trailing_comments: Vec::new(),
31789 span: None,
31790 inferred_type: None,
31791 })
31792 })
31793 .collect();
31794 ct.properties
31795 .push(Expression::PartitionedByProperty(Box::new(
31796 PartitionedByProperty {
31797 this: Box::new(Expression::Tuple(Box::new(Tuple {
31798 expressions: partition_exprs,
31799 }))),
31800 },
31801 )));
31802 }
31803 // DuckDB: strip partitioned_by entirely (already handled)
31804 }
31805
31806 /// Convert a DataType to Spark's type string format (using angle brackets)
31807 fn data_type_to_spark_string(dt: &crate::expressions::DataType) -> String {
31808 use crate::expressions::DataType;
31809 match dt {
31810 DataType::Int { .. } => "INT".to_string(),
31811 DataType::BigInt { .. } => "BIGINT".to_string(),
31812 DataType::SmallInt { .. } => "SMALLINT".to_string(),
31813 DataType::TinyInt { .. } => "TINYINT".to_string(),
31814 DataType::Float { .. } => "FLOAT".to_string(),
31815 DataType::Double { .. } => "DOUBLE".to_string(),
31816 DataType::Decimal {
31817 precision: Some(p),
31818 scale: Some(s),
31819 } => format!("DECIMAL({}, {})", p, s),
31820 DataType::Decimal {
31821 precision: Some(p), ..
31822 } => format!("DECIMAL({})", p),
31823 DataType::Decimal { .. } => "DECIMAL".to_string(),
31824 DataType::VarChar { .. } | DataType::Text | DataType::String { .. } => {
31825 "STRING".to_string()
31826 }
31827 DataType::Char { .. } => "STRING".to_string(),
31828 DataType::Boolean => "BOOLEAN".to_string(),
31829 DataType::Date => "DATE".to_string(),
31830 DataType::Timestamp { .. } => "TIMESTAMP".to_string(),
31831 DataType::Json | DataType::JsonB => "STRING".to_string(),
31832 DataType::Binary { .. } => "BINARY".to_string(),
31833 DataType::Array { element_type, .. } => {
31834 format!("ARRAY<{}>", Self::data_type_to_spark_string(element_type))
31835 }
31836 DataType::Map {
31837 key_type,
31838 value_type,
31839 } => format!(
31840 "MAP<{}, {}>",
31841 Self::data_type_to_spark_string(key_type),
31842 Self::data_type_to_spark_string(value_type)
31843 ),
31844 DataType::Struct { fields, .. } => {
31845 let field_strs: Vec<String> = fields
31846 .iter()
31847 .map(|f| {
31848 if f.name.is_empty() {
31849 Self::data_type_to_spark_string(&f.data_type)
31850 } else {
31851 format!(
31852 "{}: {}",
31853 f.name,
31854 Self::data_type_to_spark_string(&f.data_type)
31855 )
31856 }
31857 })
31858 .collect();
31859 format!("STRUCT<{}>", field_strs.join(", "))
31860 }
31861 DataType::Custom { name } => name.clone(),
31862 _ => format!("{:?}", dt),
31863 }
31864 }
31865
31866 /// Extract value and unit from an Interval expression
31867 /// Returns (value_expression, IntervalUnit)
31868 fn extract_interval_parts(
31869 interval_expr: &Expression,
31870 ) -> (Expression, crate::expressions::IntervalUnit) {
31871 use crate::expressions::{IntervalUnit, IntervalUnitSpec};
31872
31873 if let Expression::Interval(iv) = interval_expr {
31874 let val = iv.this.clone().unwrap_or(Expression::number(0));
31875 let unit = match &iv.unit {
31876 Some(IntervalUnitSpec::Simple { unit, .. }) => *unit,
31877 None => {
31878 // Unit might be embedded in the string value (Snowflake format: '5 DAY')
31879 if let Expression::Literal(lit) = &val {
31880 if let crate::expressions::Literal::String(s) = lit.as_ref() {
31881 let parts: Vec<&str> = s.trim().splitn(2, ' ').collect();
31882 if parts.len() == 2 {
31883 let unit_str = parts[1].trim().to_ascii_uppercase();
31884 let parsed_unit = match unit_str.as_str() {
31885 "YEAR" | "YEARS" => IntervalUnit::Year,
31886 "QUARTER" | "QUARTERS" => IntervalUnit::Quarter,
31887 "MONTH" | "MONTHS" => IntervalUnit::Month,
31888 "WEEK" | "WEEKS" | "ISOWEEK" => IntervalUnit::Week,
31889 "DAY" | "DAYS" => IntervalUnit::Day,
31890 "HOUR" | "HOURS" => IntervalUnit::Hour,
31891 "MINUTE" | "MINUTES" => IntervalUnit::Minute,
31892 "SECOND" | "SECONDS" => IntervalUnit::Second,
31893 "MILLISECOND" | "MILLISECONDS" => IntervalUnit::Millisecond,
31894 "MICROSECOND" | "MICROSECONDS" => IntervalUnit::Microsecond,
31895 _ => IntervalUnit::Day,
31896 };
31897 // Return just the numeric part as value and parsed unit
31898 return (
31899 Expression::Literal(Box::new(
31900 crate::expressions::Literal::String(parts[0].to_string()),
31901 )),
31902 parsed_unit,
31903 );
31904 }
31905 IntervalUnit::Day
31906 } else {
31907 IntervalUnit::Day
31908 }
31909 } else {
31910 IntervalUnit::Day
31911 }
31912 }
31913 _ => IntervalUnit::Day,
31914 };
31915 (val, unit)
31916 } else {
31917 // Not an interval - pass through
31918 (interval_expr.clone(), crate::expressions::IntervalUnit::Day)
31919 }
31920 }
31921
31922 /// Normalize BigQuery-specific functions to standard forms that target dialects can handle
31923 fn normalize_bigquery_function(
31924 e: Expression,
31925 source: DialectType,
31926 target: DialectType,
31927 ) -> Result<Expression> {
31928 use crate::expressions::{BinaryOp, Cast, DataType, Function, Identifier, Literal, Paren};
31929
31930 let f = if let Expression::Function(f) = e {
31931 *f
31932 } else {
31933 return Ok(e);
31934 };
31935 let name = f.name.to_ascii_uppercase();
31936 let mut args = f.args;
31937
31938 /// Helper to extract unit string from an identifier, column, or literal expression
31939 fn get_unit_str(expr: &Expression) -> String {
31940 match expr {
31941 Expression::Identifier(id) => id.name.to_ascii_uppercase(),
31942 Expression::Var(v) => v.this.to_ascii_uppercase(),
31943 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)) => {
31944 let Literal::String(s) = lit.as_ref() else {
31945 unreachable!()
31946 };
31947 s.to_ascii_uppercase()
31948 }
31949 Expression::Column(col) => col.name.name.to_ascii_uppercase(),
31950 // Handle WEEK(MONDAY), WEEK(SUNDAY) etc. which are parsed as Function("WEEK", [Column("MONDAY")])
31951 Expression::Function(f) => {
31952 let base = f.name.to_ascii_uppercase();
31953 if !f.args.is_empty() {
31954 // e.g., WEEK(MONDAY) -> "WEEK(MONDAY)"
31955 let inner = get_unit_str(&f.args[0]);
31956 format!("{}({})", base, inner)
31957 } else {
31958 base
31959 }
31960 }
31961 _ => "DAY".to_string(),
31962 }
31963 }
31964
31965 /// Parse unit string to IntervalUnit
31966 fn parse_interval_unit(s: &str) -> crate::expressions::IntervalUnit {
31967 match s {
31968 "YEAR" => crate::expressions::IntervalUnit::Year,
31969 "QUARTER" => crate::expressions::IntervalUnit::Quarter,
31970 "MONTH" => crate::expressions::IntervalUnit::Month,
31971 "WEEK" | "ISOWEEK" => crate::expressions::IntervalUnit::Week,
31972 "DAY" => crate::expressions::IntervalUnit::Day,
31973 "HOUR" => crate::expressions::IntervalUnit::Hour,
31974 "MINUTE" => crate::expressions::IntervalUnit::Minute,
31975 "SECOND" => crate::expressions::IntervalUnit::Second,
31976 "MILLISECOND" => crate::expressions::IntervalUnit::Millisecond,
31977 "MICROSECOND" => crate::expressions::IntervalUnit::Microsecond,
31978 _ if s.starts_with("WEEK(") => crate::expressions::IntervalUnit::Week,
31979 _ => crate::expressions::IntervalUnit::Day,
31980 }
31981 }
31982
31983 match name.as_str() {
31984 // TIMESTAMP_DIFF(date1, date2, unit) -> TIMESTAMPDIFF(unit, date2, date1)
31985 // (BigQuery: result = date1 - date2, Standard: result = end - start)
31986 "TIMESTAMP_DIFF" | "DATETIME_DIFF" | "TIME_DIFF" if args.len() == 3 => {
31987 let date1 = args.remove(0);
31988 let date2 = args.remove(0);
31989 let unit_expr = args.remove(0);
31990 let unit_str = get_unit_str(&unit_expr);
31991
31992 if matches!(target, DialectType::BigQuery) {
31993 // BigQuery -> BigQuery: just uppercase the unit
31994 let unit = Expression::Identifier(Identifier::new(unit_str.clone()));
31995 return Ok(Expression::Function(Box::new(Function::new(
31996 f.name,
31997 vec![date1, date2, unit],
31998 ))));
31999 }
32000
32001 // For Snowflake: use TimestampDiff expression so it generates TIMESTAMPDIFF
32002 // (Function("TIMESTAMPDIFF") would be converted to DATEDIFF by Snowflake's function normalization)
32003 if matches!(target, DialectType::Snowflake) {
32004 return Ok(Expression::TimestampDiff(Box::new(
32005 crate::expressions::TimestampDiff {
32006 this: Box::new(date2),
32007 expression: Box::new(date1),
32008 unit: Some(unit_str),
32009 },
32010 )));
32011 }
32012
32013 // For DuckDB: DATE_DIFF('UNIT', start, end) with proper CAST
32014 if matches!(target, DialectType::DuckDB) {
32015 let (cast_d1, cast_d2) = if name == "TIME_DIFF" {
32016 // CAST to TIME
32017 let cast_fn = |e: Expression| -> Expression {
32018 match e {
32019 Expression::Literal(lit)
32020 if matches!(lit.as_ref(), Literal::String(_)) =>
32021 {
32022 let Literal::String(s) = lit.as_ref() else {
32023 unreachable!()
32024 };
32025 Expression::Cast(Box::new(Cast {
32026 this: Expression::Literal(Box::new(Literal::String(
32027 s.clone(),
32028 ))),
32029 to: DataType::Custom {
32030 name: "TIME".to_string(),
32031 },
32032 trailing_comments: vec![],
32033 double_colon_syntax: false,
32034 format: None,
32035 default: None,
32036 inferred_type: None,
32037 }))
32038 }
32039 other => other,
32040 }
32041 };
32042 (cast_fn(date1), cast_fn(date2))
32043 } else if name == "DATETIME_DIFF" {
32044 // CAST to TIMESTAMP
32045 (
32046 Self::ensure_cast_timestamp(date1),
32047 Self::ensure_cast_timestamp(date2),
32048 )
32049 } else {
32050 // TIMESTAMP_DIFF: CAST to TIMESTAMPTZ
32051 (
32052 Self::ensure_cast_timestamptz(date1),
32053 Self::ensure_cast_timestamptz(date2),
32054 )
32055 };
32056 return Ok(Expression::Function(Box::new(Function::new(
32057 "DATE_DIFF".to_string(),
32058 vec![
32059 Expression::Literal(Box::new(Literal::String(unit_str))),
32060 cast_d2,
32061 cast_d1,
32062 ],
32063 ))));
32064 }
32065
32066 // Convert to standard TIMESTAMPDIFF(unit, start, end)
32067 let unit = Expression::Identifier(Identifier::new(unit_str));
32068 Ok(Expression::Function(Box::new(Function::new(
32069 "TIMESTAMPDIFF".to_string(),
32070 vec![unit, date2, date1],
32071 ))))
32072 }
32073
32074 // DATEDIFF(unit, start, end) -> target-specific form
32075 // Used by: Redshift, Snowflake, TSQL, Databricks, Spark
32076 "DATEDIFF" if args.len() == 3 => {
32077 let arg0 = args.remove(0);
32078 let arg1 = args.remove(0);
32079 let arg2 = args.remove(0);
32080 let unit_str = get_unit_str(&arg0);
32081
32082 // Redshift DATEDIFF(unit, start, end) order: result = end - start
32083 // Snowflake DATEDIFF(unit, start, end) order: result = end - start
32084 // TSQL DATEDIFF(unit, start, end) order: result = end - start
32085
32086 if matches!(target, DialectType::Snowflake) {
32087 // Snowflake: DATEDIFF(UNIT, start, end) - uppercase unit
32088 let unit = Expression::Identifier(Identifier::new(unit_str));
32089 return Ok(Expression::Function(Box::new(Function::new(
32090 "DATEDIFF".to_string(),
32091 vec![unit, arg1, arg2],
32092 ))));
32093 }
32094
32095 if matches!(target, DialectType::DuckDB) {
32096 // DuckDB: DATE_DIFF('UNIT', start, end) with CAST
32097 let cast_d1 = Self::ensure_cast_timestamp(arg1);
32098 let cast_d2 = Self::ensure_cast_timestamp(arg2);
32099 return Ok(Expression::Function(Box::new(Function::new(
32100 "DATE_DIFF".to_string(),
32101 vec![
32102 Expression::Literal(Box::new(Literal::String(unit_str))),
32103 cast_d1,
32104 cast_d2,
32105 ],
32106 ))));
32107 }
32108
32109 if matches!(target, DialectType::BigQuery) {
32110 // BigQuery: DATE_DIFF(end_date, start_date, UNIT) - reversed args, CAST to DATETIME
32111 let cast_d1 = Self::ensure_cast_datetime(arg1);
32112 let cast_d2 = Self::ensure_cast_datetime(arg2);
32113 let unit = Expression::Identifier(Identifier::new(unit_str));
32114 return Ok(Expression::Function(Box::new(Function::new(
32115 "DATE_DIFF".to_string(),
32116 vec![cast_d2, cast_d1, unit],
32117 ))));
32118 }
32119
32120 if matches!(target, DialectType::Spark | DialectType::Databricks) {
32121 // Spark/Databricks: DATEDIFF(UNIT, start, end) - uppercase unit
32122 let unit = Expression::Identifier(Identifier::new(unit_str));
32123 return Ok(Expression::Function(Box::new(Function::new(
32124 "DATEDIFF".to_string(),
32125 vec![unit, arg1, arg2],
32126 ))));
32127 }
32128
32129 if matches!(target, DialectType::Hive) {
32130 // Hive: DATEDIFF(end, start) for DAY only, use MONTHS_BETWEEN for MONTH
32131 match unit_str.as_str() {
32132 "MONTH" => {
32133 return Ok(Expression::Function(Box::new(Function::new(
32134 "CAST".to_string(),
32135 vec![Expression::Function(Box::new(Function::new(
32136 "MONTHS_BETWEEN".to_string(),
32137 vec![arg2, arg1],
32138 )))],
32139 ))));
32140 }
32141 "WEEK" => {
32142 return Ok(Expression::Cast(Box::new(Cast {
32143 this: Expression::Div(Box::new(crate::expressions::BinaryOp::new(
32144 Expression::Function(Box::new(Function::new(
32145 "DATEDIFF".to_string(),
32146 vec![arg2, arg1],
32147 ))),
32148 Expression::Literal(Box::new(Literal::Number("7".to_string()))),
32149 ))),
32150 to: DataType::Int {
32151 length: None,
32152 integer_spelling: false,
32153 },
32154 trailing_comments: vec![],
32155 double_colon_syntax: false,
32156 format: None,
32157 default: None,
32158 inferred_type: None,
32159 })));
32160 }
32161 _ => {
32162 // Default: DATEDIFF(end, start) for DAY
32163 return Ok(Expression::Function(Box::new(Function::new(
32164 "DATEDIFF".to_string(),
32165 vec![arg2, arg1],
32166 ))));
32167 }
32168 }
32169 }
32170
32171 if matches!(
32172 target,
32173 DialectType::Presto | DialectType::Trino | DialectType::Athena
32174 ) {
32175 // Presto/Trino: DATE_DIFF('UNIT', start, end)
32176 return Ok(Expression::Function(Box::new(Function::new(
32177 "DATE_DIFF".to_string(),
32178 vec![
32179 Expression::Literal(Box::new(Literal::String(unit_str))),
32180 arg1,
32181 arg2,
32182 ],
32183 ))));
32184 }
32185
32186 if matches!(target, DialectType::TSQL) {
32187 // TSQL: DATEDIFF(UNIT, start, CAST(end AS DATETIME2))
32188 let cast_d2 = Self::ensure_cast_datetime2(arg2);
32189 let unit = Expression::Identifier(Identifier::new(unit_str));
32190 return Ok(Expression::Function(Box::new(Function::new(
32191 "DATEDIFF".to_string(),
32192 vec![unit, arg1, cast_d2],
32193 ))));
32194 }
32195
32196 if matches!(target, DialectType::PostgreSQL) {
32197 // PostgreSQL doesn't have DATEDIFF - use date subtraction or EXTRACT
32198 // For now, use DATEDIFF (passthrough) with uppercased unit
32199 let unit = Expression::Identifier(Identifier::new(unit_str));
32200 return Ok(Expression::Function(Box::new(Function::new(
32201 "DATEDIFF".to_string(),
32202 vec![unit, arg1, arg2],
32203 ))));
32204 }
32205
32206 // Default: DATEDIFF(UNIT, start, end) with uppercase unit
32207 let unit = Expression::Identifier(Identifier::new(unit_str));
32208 Ok(Expression::Function(Box::new(Function::new(
32209 "DATEDIFF".to_string(),
32210 vec![unit, arg1, arg2],
32211 ))))
32212 }
32213
32214 // DATE_DIFF(date1, date2, unit) -> standard form
32215 "DATE_DIFF" if args.len() == 3 => {
32216 let date1 = args.remove(0);
32217 let date2 = args.remove(0);
32218 let unit_expr = args.remove(0);
32219 let unit_str = get_unit_str(&unit_expr);
32220
32221 if matches!(target, DialectType::BigQuery) {
32222 // BigQuery -> BigQuery: just uppercase the unit, normalize WEEK(SUNDAY) -> WEEK
32223 let norm_unit = if unit_str == "WEEK(SUNDAY)" {
32224 "WEEK".to_string()
32225 } else {
32226 unit_str
32227 };
32228 let norm_d1 = Self::date_literal_to_cast(date1);
32229 let norm_d2 = Self::date_literal_to_cast(date2);
32230 let unit = Expression::Identifier(Identifier::new(norm_unit));
32231 return Ok(Expression::Function(Box::new(Function::new(
32232 f.name,
32233 vec![norm_d1, norm_d2, unit],
32234 ))));
32235 }
32236
32237 if matches!(target, DialectType::MySQL) {
32238 // MySQL DATEDIFF only takes 2 args (date1, date2), returns day difference
32239 let norm_d1 = Self::date_literal_to_cast(date1);
32240 let norm_d2 = Self::date_literal_to_cast(date2);
32241 return Ok(Expression::Function(Box::new(Function::new(
32242 "DATEDIFF".to_string(),
32243 vec![norm_d1, norm_d2],
32244 ))));
32245 }
32246
32247 if matches!(target, DialectType::StarRocks) {
32248 // StarRocks: DATE_DIFF('UNIT', date1, date2) - unit as string, args NOT swapped
32249 let norm_d1 = Self::date_literal_to_cast(date1);
32250 let norm_d2 = Self::date_literal_to_cast(date2);
32251 return Ok(Expression::Function(Box::new(Function::new(
32252 "DATE_DIFF".to_string(),
32253 vec![
32254 Expression::Literal(Box::new(Literal::String(unit_str))),
32255 norm_d1,
32256 norm_d2,
32257 ],
32258 ))));
32259 }
32260
32261 if matches!(target, DialectType::DuckDB) {
32262 // DuckDB: DATE_DIFF('UNIT', date2, date1) with proper CAST for dates
32263 let norm_d1 = Self::ensure_cast_date(date1);
32264 let norm_d2 = Self::ensure_cast_date(date2);
32265
32266 // Handle WEEK variants: WEEK(MONDAY)/WEEK(SUNDAY)/ISOWEEK/WEEK
32267 let is_week_variant = unit_str == "WEEK"
32268 || unit_str.starts_with("WEEK(")
32269 || unit_str == "ISOWEEK";
32270 if is_week_variant {
32271 // For DuckDB, WEEK-based diffs use DATE_TRUNC approach
32272 // WEEK(MONDAY) / ISOWEEK: DATE_DIFF('WEEK', DATE_TRUNC('WEEK', d2), DATE_TRUNC('WEEK', d1))
32273 // WEEK / WEEK(SUNDAY): DATE_DIFF('WEEK', DATE_TRUNC('WEEK', d2 + INTERVAL '1' DAY), DATE_TRUNC('WEEK', d1 + INTERVAL '1' DAY))
32274 // WEEK(SATURDAY): DATE_DIFF('WEEK', DATE_TRUNC('WEEK', d2 + INTERVAL '-5' DAY), DATE_TRUNC('WEEK', d1 + INTERVAL '-5' DAY))
32275 let day_offset = if unit_str == "WEEK(MONDAY)" || unit_str == "ISOWEEK" {
32276 None // ISO weeks start on Monday, aligned with DATE_TRUNC('WEEK')
32277 } else if unit_str == "WEEK" || unit_str == "WEEK(SUNDAY)" {
32278 Some("1") // Shift Sunday to Monday alignment
32279 } else if unit_str == "WEEK(SATURDAY)" {
32280 Some("-5")
32281 } else if unit_str == "WEEK(TUESDAY)" {
32282 Some("-1")
32283 } else if unit_str == "WEEK(WEDNESDAY)" {
32284 Some("-2")
32285 } else if unit_str == "WEEK(THURSDAY)" {
32286 Some("-3")
32287 } else if unit_str == "WEEK(FRIDAY)" {
32288 Some("-4")
32289 } else {
32290 Some("1") // default to Sunday
32291 };
32292
32293 let make_trunc = |date: Expression, offset: Option<&str>| -> Expression {
32294 let shifted = if let Some(off) = offset {
32295 let interval =
32296 Expression::Interval(Box::new(crate::expressions::Interval {
32297 this: Some(Expression::Literal(Box::new(Literal::String(
32298 off.to_string(),
32299 )))),
32300 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
32301 unit: crate::expressions::IntervalUnit::Day,
32302 use_plural: false,
32303 }),
32304 }));
32305 Expression::Add(Box::new(crate::expressions::BinaryOp::new(
32306 date, interval,
32307 )))
32308 } else {
32309 date
32310 };
32311 Expression::Function(Box::new(Function::new(
32312 "DATE_TRUNC".to_string(),
32313 vec![
32314 Expression::Literal(Box::new(Literal::String(
32315 "WEEK".to_string(),
32316 ))),
32317 shifted,
32318 ],
32319 )))
32320 };
32321
32322 let trunc_d2 = make_trunc(norm_d2, day_offset);
32323 let trunc_d1 = make_trunc(norm_d1, day_offset);
32324 return Ok(Expression::Function(Box::new(Function::new(
32325 "DATE_DIFF".to_string(),
32326 vec![
32327 Expression::Literal(Box::new(Literal::String("WEEK".to_string()))),
32328 trunc_d2,
32329 trunc_d1,
32330 ],
32331 ))));
32332 }
32333
32334 return Ok(Expression::Function(Box::new(Function::new(
32335 "DATE_DIFF".to_string(),
32336 vec![
32337 Expression::Literal(Box::new(Literal::String(unit_str))),
32338 norm_d2,
32339 norm_d1,
32340 ],
32341 ))));
32342 }
32343
32344 // Default: DATEDIFF(unit, date2, date1)
32345 let unit = Expression::Identifier(Identifier::new(unit_str));
32346 Ok(Expression::Function(Box::new(Function::new(
32347 "DATEDIFF".to_string(),
32348 vec![unit, date2, date1],
32349 ))))
32350 }
32351
32352 // TIMESTAMP_ADD(ts, INTERVAL n UNIT) -> target-specific
32353 "TIMESTAMP_ADD" | "DATETIME_ADD" | "TIME_ADD" if args.len() == 2 => {
32354 let ts = args.remove(0);
32355 let interval_expr = args.remove(0);
32356 let (val, unit) = Self::extract_interval_parts(&interval_expr);
32357
32358 match target {
32359 DialectType::Snowflake => {
32360 // TIMESTAMPADD(UNIT, val, CAST(ts AS TIMESTAMPTZ))
32361 // Use TimestampAdd expression so Snowflake generates TIMESTAMPADD
32362 // (Function("TIMESTAMPADD") would be converted to DATEADD by Snowflake's function normalization)
32363 let unit_str = Self::interval_unit_to_string(&unit);
32364 let cast_ts = Self::maybe_cast_ts_to_tz(ts, &name);
32365 Ok(Expression::TimestampAdd(Box::new(
32366 crate::expressions::TimestampAdd {
32367 this: Box::new(val),
32368 expression: Box::new(cast_ts),
32369 unit: Some(unit_str.to_string()),
32370 },
32371 )))
32372 }
32373 DialectType::Spark | DialectType::Databricks => {
32374 if name == "DATETIME_ADD" && matches!(target, DialectType::Spark) {
32375 // Spark DATETIME_ADD: ts + INTERVAL val UNIT
32376 let interval =
32377 Expression::Interval(Box::new(crate::expressions::Interval {
32378 this: Some(val),
32379 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
32380 unit,
32381 use_plural: false,
32382 }),
32383 }));
32384 Ok(Expression::Add(Box::new(
32385 crate::expressions::BinaryOp::new(ts, interval),
32386 )))
32387 } else if name == "DATETIME_ADD"
32388 && matches!(target, DialectType::Databricks)
32389 {
32390 // Databricks DATETIME_ADD: TIMESTAMPADD(UNIT, val, ts)
32391 let unit_str = Self::interval_unit_to_string(&unit);
32392 Ok(Expression::Function(Box::new(Function::new(
32393 "TIMESTAMPADD".to_string(),
32394 vec![Expression::Identifier(Identifier::new(unit_str)), val, ts],
32395 ))))
32396 } else {
32397 // Presto-style: DATE_ADD('unit', val, CAST(ts AS TIMESTAMP))
32398 let unit_str = Self::interval_unit_to_string(&unit);
32399 let cast_ts =
32400 if name.starts_with("TIMESTAMP") || name.starts_with("DATETIME") {
32401 Self::maybe_cast_ts(ts)
32402 } else {
32403 ts
32404 };
32405 Ok(Expression::Function(Box::new(Function::new(
32406 "DATE_ADD".to_string(),
32407 vec![
32408 Expression::Identifier(Identifier::new(unit_str)),
32409 val,
32410 cast_ts,
32411 ],
32412 ))))
32413 }
32414 }
32415 DialectType::MySQL => {
32416 // DATE_ADD(TIMESTAMP(ts), INTERVAL val UNIT) for MySQL
32417 let mysql_ts = if name.starts_with("TIMESTAMP") {
32418 // Check if already wrapped in TIMESTAMP() function (from cross-dialect normalization)
32419 match &ts {
32420 Expression::Function(ref inner_f)
32421 if inner_f.name.eq_ignore_ascii_case("TIMESTAMP") =>
32422 {
32423 // Already wrapped, keep as-is
32424 ts
32425 }
32426 _ => {
32427 // Unwrap typed literals: TIMESTAMP '...' -> '...' for TIMESTAMP() wrapper
32428 let unwrapped = match ts {
32429 Expression::Literal(lit)
32430 if matches!(lit.as_ref(), Literal::Timestamp(_)) =>
32431 {
32432 let Literal::Timestamp(s) = lit.as_ref() else {
32433 unreachable!()
32434 };
32435 Expression::Literal(Box::new(Literal::String(
32436 s.clone(),
32437 )))
32438 }
32439 other => other,
32440 };
32441 Expression::Function(Box::new(Function::new(
32442 "TIMESTAMP".to_string(),
32443 vec![unwrapped],
32444 )))
32445 }
32446 }
32447 } else {
32448 ts
32449 };
32450 Ok(Expression::DateAdd(Box::new(
32451 crate::expressions::DateAddFunc {
32452 this: mysql_ts,
32453 interval: val,
32454 unit,
32455 },
32456 )))
32457 }
32458 _ => {
32459 // DuckDB and others use DateAdd expression (DuckDB converts to + INTERVAL)
32460 let cast_ts = if matches!(target, DialectType::DuckDB) {
32461 if name == "DATETIME_ADD" {
32462 Self::ensure_cast_timestamp(ts)
32463 } else if name.starts_with("TIMESTAMP") {
32464 Self::maybe_cast_ts_to_tz(ts, &name)
32465 } else {
32466 ts
32467 }
32468 } else {
32469 ts
32470 };
32471 Ok(Expression::DateAdd(Box::new(
32472 crate::expressions::DateAddFunc {
32473 this: cast_ts,
32474 interval: val,
32475 unit,
32476 },
32477 )))
32478 }
32479 }
32480 }
32481
32482 // TIMESTAMP_SUB(ts, INTERVAL n UNIT) -> target-specific
32483 "TIMESTAMP_SUB" | "DATETIME_SUB" | "TIME_SUB" if args.len() == 2 => {
32484 let ts = args.remove(0);
32485 let interval_expr = args.remove(0);
32486 let (val, unit) = Self::extract_interval_parts(&interval_expr);
32487
32488 match target {
32489 DialectType::Snowflake => {
32490 // TIMESTAMPADD(UNIT, val * -1, CAST(ts AS TIMESTAMPTZ))
32491 let unit_str = Self::interval_unit_to_string(&unit);
32492 let cast_ts = Self::maybe_cast_ts_to_tz(ts, &name);
32493 let neg_val = Expression::Mul(Box::new(crate::expressions::BinaryOp::new(
32494 val,
32495 Expression::Neg(Box::new(crate::expressions::UnaryOp {
32496 this: Expression::number(1),
32497 inferred_type: None,
32498 })),
32499 )));
32500 Ok(Expression::TimestampAdd(Box::new(
32501 crate::expressions::TimestampAdd {
32502 this: Box::new(neg_val),
32503 expression: Box::new(cast_ts),
32504 unit: Some(unit_str.to_string()),
32505 },
32506 )))
32507 }
32508 DialectType::Spark | DialectType::Databricks => {
32509 if (name == "DATETIME_SUB" && matches!(target, DialectType::Spark))
32510 || (name == "TIMESTAMP_SUB" && matches!(target, DialectType::Spark))
32511 {
32512 // Spark: ts - INTERVAL val UNIT
32513 let cast_ts = if name.starts_with("TIMESTAMP") {
32514 Self::maybe_cast_ts(ts)
32515 } else {
32516 ts
32517 };
32518 let interval =
32519 Expression::Interval(Box::new(crate::expressions::Interval {
32520 this: Some(val),
32521 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
32522 unit,
32523 use_plural: false,
32524 }),
32525 }));
32526 Ok(Expression::Sub(Box::new(
32527 crate::expressions::BinaryOp::new(cast_ts, interval),
32528 )))
32529 } else {
32530 // Databricks: TIMESTAMPADD(UNIT, val * -1, ts)
32531 let unit_str = Self::interval_unit_to_string(&unit);
32532 let neg_val =
32533 Expression::Mul(Box::new(crate::expressions::BinaryOp::new(
32534 val,
32535 Expression::Neg(Box::new(crate::expressions::UnaryOp {
32536 this: Expression::number(1),
32537 inferred_type: None,
32538 })),
32539 )));
32540 Ok(Expression::Function(Box::new(Function::new(
32541 "TIMESTAMPADD".to_string(),
32542 vec![
32543 Expression::Identifier(Identifier::new(unit_str)),
32544 neg_val,
32545 ts,
32546 ],
32547 ))))
32548 }
32549 }
32550 DialectType::MySQL => {
32551 let mysql_ts = if name.starts_with("TIMESTAMP") {
32552 // Check if already wrapped in TIMESTAMP() function (from cross-dialect normalization)
32553 match &ts {
32554 Expression::Function(ref inner_f)
32555 if inner_f.name.eq_ignore_ascii_case("TIMESTAMP") =>
32556 {
32557 // Already wrapped, keep as-is
32558 ts
32559 }
32560 _ => {
32561 let unwrapped = match ts {
32562 Expression::Literal(lit)
32563 if matches!(lit.as_ref(), Literal::Timestamp(_)) =>
32564 {
32565 let Literal::Timestamp(s) = lit.as_ref() else {
32566 unreachable!()
32567 };
32568 Expression::Literal(Box::new(Literal::String(
32569 s.clone(),
32570 )))
32571 }
32572 other => other,
32573 };
32574 Expression::Function(Box::new(Function::new(
32575 "TIMESTAMP".to_string(),
32576 vec![unwrapped],
32577 )))
32578 }
32579 }
32580 } else {
32581 ts
32582 };
32583 Ok(Expression::DateSub(Box::new(
32584 crate::expressions::DateAddFunc {
32585 this: mysql_ts,
32586 interval: val,
32587 unit,
32588 },
32589 )))
32590 }
32591 _ => {
32592 let cast_ts = if matches!(target, DialectType::DuckDB) {
32593 if name == "DATETIME_SUB" {
32594 Self::ensure_cast_timestamp(ts)
32595 } else if name.starts_with("TIMESTAMP") {
32596 Self::maybe_cast_ts_to_tz(ts, &name)
32597 } else {
32598 ts
32599 }
32600 } else {
32601 ts
32602 };
32603 Ok(Expression::DateSub(Box::new(
32604 crate::expressions::DateAddFunc {
32605 this: cast_ts,
32606 interval: val,
32607 unit,
32608 },
32609 )))
32610 }
32611 }
32612 }
32613
32614 // DATE_SUB(date, INTERVAL n UNIT) -> target-specific
32615 "DATE_SUB" if args.len() == 2 => {
32616 let date = args.remove(0);
32617 let interval_expr = args.remove(0);
32618 let (val, unit) = Self::extract_interval_parts(&interval_expr);
32619
32620 match target {
32621 DialectType::Databricks | DialectType::Spark => {
32622 // Databricks/Spark: DATE_ADD(date, -val)
32623 // Use DateAdd expression with negative val so it generates correctly
32624 // The generator will output DATE_ADD(date, INTERVAL -val DAY)
32625 // Then Databricks transform converts 2-arg DATE_ADD(date, interval) to DATEADD(DAY, interval, date)
32626 // Instead, we directly output as a simple negated DateSub
32627 Ok(Expression::DateSub(Box::new(
32628 crate::expressions::DateAddFunc {
32629 this: date,
32630 interval: val,
32631 unit,
32632 },
32633 )))
32634 }
32635 DialectType::DuckDB => {
32636 // DuckDB: CAST(date AS DATE) - INTERVAL 'val' UNIT
32637 let cast_date = Self::ensure_cast_date(date);
32638 let interval =
32639 Expression::Interval(Box::new(crate::expressions::Interval {
32640 this: Some(val),
32641 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
32642 unit,
32643 use_plural: false,
32644 }),
32645 }));
32646 Ok(Expression::Sub(Box::new(
32647 crate::expressions::BinaryOp::new(cast_date, interval),
32648 )))
32649 }
32650 DialectType::Snowflake => {
32651 // Snowflake: Let Snowflake's own DateSub -> DATEADD(UNIT, val * -1, date) handler work
32652 // Just ensure the date is cast properly
32653 let cast_date = Self::ensure_cast_date(date);
32654 Ok(Expression::DateSub(Box::new(
32655 crate::expressions::DateAddFunc {
32656 this: cast_date,
32657 interval: val,
32658 unit,
32659 },
32660 )))
32661 }
32662 DialectType::PostgreSQL => {
32663 // PostgreSQL: date - INTERVAL 'val UNIT'
32664 let unit_str = Self::interval_unit_to_string(&unit);
32665 let interval =
32666 Expression::Interval(Box::new(crate::expressions::Interval {
32667 this: Some(Expression::Literal(Box::new(Literal::String(
32668 format!("{} {}", Self::expr_to_string(&val), unit_str),
32669 )))),
32670 unit: None,
32671 }));
32672 Ok(Expression::Sub(Box::new(
32673 crate::expressions::BinaryOp::new(date, interval),
32674 )))
32675 }
32676 _ => Ok(Expression::DateSub(Box::new(
32677 crate::expressions::DateAddFunc {
32678 this: date,
32679 interval: val,
32680 unit,
32681 },
32682 ))),
32683 }
32684 }
32685
32686 // DATEADD(unit, val, date) -> target-specific form
32687 // Used by: Redshift, Snowflake, TSQL, ClickHouse
32688 "DATEADD" if args.len() == 3 => {
32689 let arg0 = args.remove(0);
32690 let arg1 = args.remove(0);
32691 let arg2 = args.remove(0);
32692 let unit_str = get_unit_str(&arg0);
32693
32694 if matches!(target, DialectType::Snowflake | DialectType::TSQL) {
32695 // Keep DATEADD(UNIT, val, date) with uppercased unit
32696 let unit = Expression::Identifier(Identifier::new(unit_str));
32697 // Only CAST to DATETIME2 for TSQL target when source is NOT Spark/Databricks family
32698 let date = if matches!(target, DialectType::TSQL)
32699 && !matches!(
32700 source,
32701 DialectType::Spark | DialectType::Databricks | DialectType::Hive
32702 ) {
32703 Self::ensure_cast_datetime2(arg2)
32704 } else {
32705 arg2
32706 };
32707 return Ok(Expression::Function(Box::new(Function::new(
32708 "DATEADD".to_string(),
32709 vec![unit, arg1, date],
32710 ))));
32711 }
32712
32713 if matches!(target, DialectType::DuckDB) {
32714 // DuckDB: date + INTERVAL 'val' UNIT
32715 let iu = parse_interval_unit(&unit_str);
32716 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
32717 this: Some(arg1),
32718 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
32719 unit: iu,
32720 use_plural: false,
32721 }),
32722 }));
32723 let cast_date = Self::ensure_cast_timestamp(arg2);
32724 return Ok(Expression::Add(Box::new(
32725 crate::expressions::BinaryOp::new(cast_date, interval),
32726 )));
32727 }
32728
32729 if matches!(target, DialectType::BigQuery) {
32730 // BigQuery: DATE_ADD(date, INTERVAL val UNIT) or TIMESTAMP_ADD(ts, INTERVAL val UNIT)
32731 let iu = parse_interval_unit(&unit_str);
32732 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
32733 this: Some(arg1),
32734 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
32735 unit: iu,
32736 use_plural: false,
32737 }),
32738 }));
32739 return Ok(Expression::Function(Box::new(Function::new(
32740 "DATE_ADD".to_string(),
32741 vec![arg2, interval],
32742 ))));
32743 }
32744
32745 if matches!(target, DialectType::Databricks) {
32746 // Databricks: keep DATEADD(UNIT, val, date) format
32747 let unit = Expression::Identifier(Identifier::new(unit_str));
32748 return Ok(Expression::Function(Box::new(Function::new(
32749 "DATEADD".to_string(),
32750 vec![unit, arg1, arg2],
32751 ))));
32752 }
32753
32754 if matches!(target, DialectType::Spark) {
32755 // Spark: convert month-based units to ADD_MONTHS, rest to DATE_ADD
32756 fn multiply_expr_dateadd(expr: Expression, factor: i64) -> Expression {
32757 if let Expression::Literal(lit) = &expr {
32758 if let crate::expressions::Literal::Number(n) = lit.as_ref() {
32759 if let Ok(val) = n.parse::<i64>() {
32760 return Expression::Literal(Box::new(
32761 crate::expressions::Literal::Number(
32762 (val * factor).to_string(),
32763 ),
32764 ));
32765 }
32766 }
32767 }
32768 Expression::Mul(Box::new(crate::expressions::BinaryOp::new(
32769 expr,
32770 Expression::Literal(Box::new(crate::expressions::Literal::Number(
32771 factor.to_string(),
32772 ))),
32773 )))
32774 }
32775 match unit_str.as_str() {
32776 "YEAR" => {
32777 let months = multiply_expr_dateadd(arg1, 12);
32778 return Ok(Expression::Function(Box::new(Function::new(
32779 "ADD_MONTHS".to_string(),
32780 vec![arg2, months],
32781 ))));
32782 }
32783 "QUARTER" => {
32784 let months = multiply_expr_dateadd(arg1, 3);
32785 return Ok(Expression::Function(Box::new(Function::new(
32786 "ADD_MONTHS".to_string(),
32787 vec![arg2, months],
32788 ))));
32789 }
32790 "MONTH" => {
32791 return Ok(Expression::Function(Box::new(Function::new(
32792 "ADD_MONTHS".to_string(),
32793 vec![arg2, arg1],
32794 ))));
32795 }
32796 "WEEK" => {
32797 let days = multiply_expr_dateadd(arg1, 7);
32798 return Ok(Expression::Function(Box::new(Function::new(
32799 "DATE_ADD".to_string(),
32800 vec![arg2, days],
32801 ))));
32802 }
32803 "DAY" => {
32804 return Ok(Expression::Function(Box::new(Function::new(
32805 "DATE_ADD".to_string(),
32806 vec![arg2, arg1],
32807 ))));
32808 }
32809 _ => {
32810 let unit = Expression::Identifier(Identifier::new(unit_str));
32811 return Ok(Expression::Function(Box::new(Function::new(
32812 "DATE_ADD".to_string(),
32813 vec![unit, arg1, arg2],
32814 ))));
32815 }
32816 }
32817 }
32818
32819 if matches!(target, DialectType::Hive) {
32820 // Hive: DATE_ADD(date, val) for DAY, or date + INTERVAL for others
32821 match unit_str.as_str() {
32822 "DAY" => {
32823 return Ok(Expression::Function(Box::new(Function::new(
32824 "DATE_ADD".to_string(),
32825 vec![arg2, arg1],
32826 ))));
32827 }
32828 "MONTH" => {
32829 return Ok(Expression::Function(Box::new(Function::new(
32830 "ADD_MONTHS".to_string(),
32831 vec![arg2, arg1],
32832 ))));
32833 }
32834 _ => {
32835 let iu = parse_interval_unit(&unit_str);
32836 let interval =
32837 Expression::Interval(Box::new(crate::expressions::Interval {
32838 this: Some(arg1),
32839 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
32840 unit: iu,
32841 use_plural: false,
32842 }),
32843 }));
32844 return Ok(Expression::Add(Box::new(
32845 crate::expressions::BinaryOp::new(arg2, interval),
32846 )));
32847 }
32848 }
32849 }
32850
32851 if matches!(target, DialectType::PostgreSQL) {
32852 // PostgreSQL: date + INTERVAL 'val UNIT'
32853 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
32854 this: Some(Expression::Literal(Box::new(Literal::String(format!(
32855 "{} {}",
32856 Self::expr_to_string(&arg1),
32857 unit_str
32858 ))))),
32859 unit: None,
32860 }));
32861 return Ok(Expression::Add(Box::new(
32862 crate::expressions::BinaryOp::new(arg2, interval),
32863 )));
32864 }
32865
32866 if matches!(
32867 target,
32868 DialectType::Presto | DialectType::Trino | DialectType::Athena
32869 ) {
32870 // Presto/Trino: DATE_ADD('UNIT', val, date)
32871 return Ok(Expression::Function(Box::new(Function::new(
32872 "DATE_ADD".to_string(),
32873 vec![
32874 Expression::Literal(Box::new(Literal::String(unit_str))),
32875 arg1,
32876 arg2,
32877 ],
32878 ))));
32879 }
32880
32881 if matches!(target, DialectType::ClickHouse) {
32882 // ClickHouse: DATE_ADD(UNIT, val, date)
32883 let unit = Expression::Identifier(Identifier::new(unit_str));
32884 return Ok(Expression::Function(Box::new(Function::new(
32885 "DATE_ADD".to_string(),
32886 vec![unit, arg1, arg2],
32887 ))));
32888 }
32889
32890 // Default: keep DATEADD with uppercased unit
32891 let unit = Expression::Identifier(Identifier::new(unit_str));
32892 Ok(Expression::Function(Box::new(Function::new(
32893 "DATEADD".to_string(),
32894 vec![unit, arg1, arg2],
32895 ))))
32896 }
32897
32898 // DATE_ADD(unit, val, date) - 3 arg form from ClickHouse/Presto
32899 "DATE_ADD" if args.len() == 3 => {
32900 let arg0 = args.remove(0);
32901 let arg1 = args.remove(0);
32902 let arg2 = args.remove(0);
32903 let unit_str = get_unit_str(&arg0);
32904
32905 if matches!(
32906 target,
32907 DialectType::Presto | DialectType::Trino | DialectType::Athena
32908 ) {
32909 // Presto/Trino: DATE_ADD('UNIT', val, date)
32910 return Ok(Expression::Function(Box::new(Function::new(
32911 "DATE_ADD".to_string(),
32912 vec![
32913 Expression::Literal(Box::new(Literal::String(unit_str))),
32914 arg1,
32915 arg2,
32916 ],
32917 ))));
32918 }
32919
32920 if matches!(
32921 target,
32922 DialectType::Snowflake | DialectType::TSQL | DialectType::Redshift
32923 ) {
32924 // DATEADD(UNIT, val, date)
32925 let unit = Expression::Identifier(Identifier::new(unit_str));
32926 let date = if matches!(target, DialectType::TSQL) {
32927 Self::ensure_cast_datetime2(arg2)
32928 } else {
32929 arg2
32930 };
32931 return Ok(Expression::Function(Box::new(Function::new(
32932 "DATEADD".to_string(),
32933 vec![unit, arg1, date],
32934 ))));
32935 }
32936
32937 if matches!(target, DialectType::DuckDB) {
32938 // DuckDB: date + INTERVAL val UNIT
32939 let iu = parse_interval_unit(&unit_str);
32940 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
32941 this: Some(arg1),
32942 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
32943 unit: iu,
32944 use_plural: false,
32945 }),
32946 }));
32947 return Ok(Expression::Add(Box::new(
32948 crate::expressions::BinaryOp::new(arg2, interval),
32949 )));
32950 }
32951
32952 if matches!(target, DialectType::Spark | DialectType::Databricks) {
32953 // Spark: DATE_ADD(UNIT, val, date) with uppercased unit
32954 let unit = Expression::Identifier(Identifier::new(unit_str));
32955 return Ok(Expression::Function(Box::new(Function::new(
32956 "DATE_ADD".to_string(),
32957 vec![unit, arg1, arg2],
32958 ))));
32959 }
32960
32961 // Default: DATE_ADD(UNIT, val, date)
32962 let unit = Expression::Identifier(Identifier::new(unit_str));
32963 Ok(Expression::Function(Box::new(Function::new(
32964 "DATE_ADD".to_string(),
32965 vec![unit, arg1, arg2],
32966 ))))
32967 }
32968
32969 // DATE_ADD(date, INTERVAL val UNIT) - 2 arg BigQuery form
32970 "DATE_ADD" if args.len() == 2 => {
32971 let date = args.remove(0);
32972 let interval_expr = args.remove(0);
32973 let (val, unit) = Self::extract_interval_parts(&interval_expr);
32974 let unit_str = Self::interval_unit_to_string(&unit);
32975
32976 match target {
32977 DialectType::DuckDB => {
32978 // DuckDB: CAST(date AS DATE) + INTERVAL 'val' UNIT
32979 let cast_date = Self::ensure_cast_date(date);
32980 let quoted_val = Self::quote_interval_val(&val);
32981 let interval =
32982 Expression::Interval(Box::new(crate::expressions::Interval {
32983 this: Some(quoted_val),
32984 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
32985 unit,
32986 use_plural: false,
32987 }),
32988 }));
32989 Ok(Expression::Add(Box::new(
32990 crate::expressions::BinaryOp::new(cast_date, interval),
32991 )))
32992 }
32993 DialectType::PostgreSQL => {
32994 // PostgreSQL: date + INTERVAL 'val UNIT'
32995 let interval =
32996 Expression::Interval(Box::new(crate::expressions::Interval {
32997 this: Some(Expression::Literal(Box::new(Literal::String(
32998 format!("{} {}", Self::expr_to_string(&val), unit_str),
32999 )))),
33000 unit: None,
33001 }));
33002 Ok(Expression::Add(Box::new(
33003 crate::expressions::BinaryOp::new(date, interval),
33004 )))
33005 }
33006 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
33007 // Presto: DATE_ADD('UNIT', CAST('val' AS BIGINT), date)
33008 let val_str = Self::expr_to_string(&val);
33009 Ok(Expression::Function(Box::new(Function::new(
33010 "DATE_ADD".to_string(),
33011 vec![
33012 Expression::Literal(Box::new(Literal::String(
33013 unit_str.to_string(),
33014 ))),
33015 Expression::Cast(Box::new(Cast {
33016 this: Expression::Literal(Box::new(Literal::String(val_str))),
33017 to: DataType::BigInt { length: None },
33018 trailing_comments: vec![],
33019 double_colon_syntax: false,
33020 format: None,
33021 default: None,
33022 inferred_type: None,
33023 })),
33024 date,
33025 ],
33026 ))))
33027 }
33028 DialectType::Spark | DialectType::Hive => {
33029 // Spark/Hive: DATE_ADD(date, val) for DAY
33030 match unit_str {
33031 "DAY" => Ok(Expression::Function(Box::new(Function::new(
33032 "DATE_ADD".to_string(),
33033 vec![date, val],
33034 )))),
33035 "MONTH" => Ok(Expression::Function(Box::new(Function::new(
33036 "ADD_MONTHS".to_string(),
33037 vec![date, val],
33038 )))),
33039 _ => {
33040 let iu = parse_interval_unit(&unit_str);
33041 let interval =
33042 Expression::Interval(Box::new(crate::expressions::Interval {
33043 this: Some(val),
33044 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
33045 unit: iu,
33046 use_plural: false,
33047 }),
33048 }));
33049 Ok(Expression::Function(Box::new(Function::new(
33050 "DATE_ADD".to_string(),
33051 vec![date, interval],
33052 ))))
33053 }
33054 }
33055 }
33056 DialectType::Snowflake => {
33057 // Snowflake: DATEADD(UNIT, 'val', CAST(date AS DATE))
33058 let cast_date = Self::ensure_cast_date(date);
33059 let val_str = Self::expr_to_string(&val);
33060 Ok(Expression::Function(Box::new(Function::new(
33061 "DATEADD".to_string(),
33062 vec![
33063 Expression::Identifier(Identifier::new(unit_str)),
33064 Expression::Literal(Box::new(Literal::String(val_str))),
33065 cast_date,
33066 ],
33067 ))))
33068 }
33069 DialectType::TSQL | DialectType::Fabric => {
33070 let cast_date = Self::ensure_cast_datetime2(date);
33071 Ok(Expression::Function(Box::new(Function::new(
33072 "DATEADD".to_string(),
33073 vec![
33074 Expression::Identifier(Identifier::new(unit_str)),
33075 val,
33076 cast_date,
33077 ],
33078 ))))
33079 }
33080 DialectType::Redshift => Ok(Expression::Function(Box::new(Function::new(
33081 "DATEADD".to_string(),
33082 vec![Expression::Identifier(Identifier::new(unit_str)), val, date],
33083 )))),
33084 DialectType::MySQL => {
33085 // MySQL: DATE_ADD(date, INTERVAL 'val' UNIT)
33086 let quoted_val = Self::quote_interval_val(&val);
33087 let iu = parse_interval_unit(&unit_str);
33088 let interval =
33089 Expression::Interval(Box::new(crate::expressions::Interval {
33090 this: Some(quoted_val),
33091 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
33092 unit: iu,
33093 use_plural: false,
33094 }),
33095 }));
33096 Ok(Expression::Function(Box::new(Function::new(
33097 "DATE_ADD".to_string(),
33098 vec![date, interval],
33099 ))))
33100 }
33101 DialectType::BigQuery => {
33102 // BigQuery: DATE_ADD(date, INTERVAL 'val' UNIT)
33103 let quoted_val = Self::quote_interval_val(&val);
33104 let iu = parse_interval_unit(&unit_str);
33105 let interval =
33106 Expression::Interval(Box::new(crate::expressions::Interval {
33107 this: Some(quoted_val),
33108 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
33109 unit: iu,
33110 use_plural: false,
33111 }),
33112 }));
33113 Ok(Expression::Function(Box::new(Function::new(
33114 "DATE_ADD".to_string(),
33115 vec![date, interval],
33116 ))))
33117 }
33118 DialectType::Databricks => Ok(Expression::Function(Box::new(Function::new(
33119 "DATEADD".to_string(),
33120 vec![Expression::Identifier(Identifier::new(unit_str)), val, date],
33121 )))),
33122 _ => {
33123 // Default: keep as DATE_ADD with decomposed interval
33124 Ok(Expression::DateAdd(Box::new(
33125 crate::expressions::DateAddFunc {
33126 this: date,
33127 interval: val,
33128 unit,
33129 },
33130 )))
33131 }
33132 }
33133 }
33134
33135 // ADD_MONTHS(date, val) -> target-specific form
33136 "ADD_MONTHS" if args.len() == 2 => {
33137 let date = args.remove(0);
33138 let val = args.remove(0);
33139
33140 if matches!(target, DialectType::TSQL) {
33141 // TSQL: DATEADD(MONTH, val, CAST(date AS DATETIME2))
33142 let cast_date = Self::ensure_cast_datetime2(date);
33143 return Ok(Expression::Function(Box::new(Function::new(
33144 "DATEADD".to_string(),
33145 vec![
33146 Expression::Identifier(Identifier::new("MONTH")),
33147 val,
33148 cast_date,
33149 ],
33150 ))));
33151 }
33152
33153 if matches!(target, DialectType::DuckDB) {
33154 // DuckDB: date + INTERVAL val MONTH
33155 let interval = Expression::Interval(Box::new(crate::expressions::Interval {
33156 this: Some(val),
33157 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
33158 unit: crate::expressions::IntervalUnit::Month,
33159 use_plural: false,
33160 }),
33161 }));
33162 return Ok(Expression::Add(Box::new(
33163 crate::expressions::BinaryOp::new(date, interval),
33164 )));
33165 }
33166
33167 if matches!(target, DialectType::Snowflake) {
33168 // Snowflake: keep ADD_MONTHS when source is also Snowflake, else DATEADD
33169 if matches!(source, DialectType::Snowflake) {
33170 return Ok(Expression::Function(Box::new(Function::new(
33171 "ADD_MONTHS".to_string(),
33172 vec![date, val],
33173 ))));
33174 }
33175 return Ok(Expression::Function(Box::new(Function::new(
33176 "DATEADD".to_string(),
33177 vec![Expression::Identifier(Identifier::new("MONTH")), val, date],
33178 ))));
33179 }
33180
33181 if matches!(target, DialectType::Spark | DialectType::Databricks) {
33182 // Spark: ADD_MONTHS(date, val) - keep as is
33183 return Ok(Expression::Function(Box::new(Function::new(
33184 "ADD_MONTHS".to_string(),
33185 vec![date, val],
33186 ))));
33187 }
33188
33189 if matches!(target, DialectType::Hive) {
33190 return Ok(Expression::Function(Box::new(Function::new(
33191 "ADD_MONTHS".to_string(),
33192 vec![date, val],
33193 ))));
33194 }
33195
33196 if matches!(
33197 target,
33198 DialectType::Presto | DialectType::Trino | DialectType::Athena
33199 ) {
33200 // Presto: DATE_ADD('MONTH', val, date)
33201 return Ok(Expression::Function(Box::new(Function::new(
33202 "DATE_ADD".to_string(),
33203 vec![
33204 Expression::Literal(Box::new(Literal::String("MONTH".to_string()))),
33205 val,
33206 date,
33207 ],
33208 ))));
33209 }
33210
33211 // Default: keep ADD_MONTHS
33212 Ok(Expression::Function(Box::new(Function::new(
33213 "ADD_MONTHS".to_string(),
33214 vec![date, val],
33215 ))))
33216 }
33217
33218 // SAFE_DIVIDE(x, y) -> target-specific form directly
33219 "SAFE_DIVIDE" if args.len() == 2 => {
33220 let x = args.remove(0);
33221 let y = args.remove(0);
33222 // Wrap x and y in parens if they're complex expressions
33223 let y_ref = match &y {
33224 Expression::Column(_) | Expression::Literal(_) | Expression::Identifier(_) => {
33225 y.clone()
33226 }
33227 _ => Expression::Paren(Box::new(Paren {
33228 this: y.clone(),
33229 trailing_comments: vec![],
33230 })),
33231 };
33232 let x_ref = match &x {
33233 Expression::Column(_) | Expression::Literal(_) | Expression::Identifier(_) => {
33234 x.clone()
33235 }
33236 _ => Expression::Paren(Box::new(Paren {
33237 this: x.clone(),
33238 trailing_comments: vec![],
33239 })),
33240 };
33241 let condition = Expression::Neq(Box::new(crate::expressions::BinaryOp::new(
33242 y_ref.clone(),
33243 Expression::number(0),
33244 )));
33245 let div_expr = Expression::Div(Box::new(crate::expressions::BinaryOp::new(
33246 x_ref.clone(),
33247 y_ref.clone(),
33248 )));
33249
33250 match target {
33251 DialectType::Spark | DialectType::Databricks => Ok(Expression::Function(
33252 Box::new(Function::new("TRY_DIVIDE".to_string(), vec![x, y])),
33253 )),
33254 DialectType::DuckDB | DialectType::PostgreSQL => {
33255 // CASE WHEN y <> 0 THEN x / y ELSE NULL END
33256 let result_div = if matches!(target, DialectType::PostgreSQL) {
33257 let cast_x = Expression::Cast(Box::new(Cast {
33258 this: x_ref,
33259 to: DataType::Custom {
33260 name: "DOUBLE PRECISION".to_string(),
33261 },
33262 trailing_comments: vec![],
33263 double_colon_syntax: false,
33264 format: None,
33265 default: None,
33266 inferred_type: None,
33267 }));
33268 Expression::Div(Box::new(crate::expressions::BinaryOp::new(
33269 cast_x, y_ref,
33270 )))
33271 } else {
33272 div_expr
33273 };
33274 Ok(Expression::Case(Box::new(crate::expressions::Case {
33275 operand: None,
33276 whens: vec![(condition, result_div)],
33277 else_: Some(Expression::Null(crate::expressions::Null)),
33278 comments: Vec::new(),
33279 inferred_type: None,
33280 })))
33281 }
33282 DialectType::Snowflake => {
33283 // IFF(y <> 0, x / y, NULL)
33284 Ok(Expression::IfFunc(Box::new(crate::expressions::IfFunc {
33285 condition,
33286 true_value: div_expr,
33287 false_value: Some(Expression::Null(crate::expressions::Null)),
33288 original_name: Some("IFF".to_string()),
33289 inferred_type: None,
33290 })))
33291 }
33292 DialectType::Presto | DialectType::Trino => {
33293 // IF(y <> 0, CAST(x AS DOUBLE) / y, NULL)
33294 let cast_x = Expression::Cast(Box::new(Cast {
33295 this: x_ref,
33296 to: DataType::Double {
33297 precision: None,
33298 scale: None,
33299 },
33300 trailing_comments: vec![],
33301 double_colon_syntax: false,
33302 format: None,
33303 default: None,
33304 inferred_type: None,
33305 }));
33306 let cast_div = Expression::Div(Box::new(
33307 crate::expressions::BinaryOp::new(cast_x, y_ref),
33308 ));
33309 Ok(Expression::IfFunc(Box::new(crate::expressions::IfFunc {
33310 condition,
33311 true_value: cast_div,
33312 false_value: Some(Expression::Null(crate::expressions::Null)),
33313 original_name: None,
33314 inferred_type: None,
33315 })))
33316 }
33317 _ => {
33318 // IF(y <> 0, x / y, NULL)
33319 Ok(Expression::IfFunc(Box::new(crate::expressions::IfFunc {
33320 condition,
33321 true_value: div_expr,
33322 false_value: Some(Expression::Null(crate::expressions::Null)),
33323 original_name: None,
33324 inferred_type: None,
33325 })))
33326 }
33327 }
33328 }
33329
33330 // GENERATE_UUID() -> UUID() with CAST to string
33331 "GENERATE_UUID" => {
33332 let uuid_expr = Expression::Uuid(Box::new(crate::expressions::Uuid {
33333 this: None,
33334 name: None,
33335 is_string: None,
33336 }));
33337 // Most targets need CAST(UUID() AS TEXT/VARCHAR/STRING)
33338 let cast_type = match target {
33339 DialectType::DuckDB => Some(DataType::Text),
33340 DialectType::Presto | DialectType::Trino => Some(DataType::VarChar {
33341 length: None,
33342 parenthesized_length: false,
33343 }),
33344 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
33345 Some(DataType::String { length: None })
33346 }
33347 _ => None,
33348 };
33349 if let Some(dt) = cast_type {
33350 Ok(Expression::Cast(Box::new(Cast {
33351 this: uuid_expr,
33352 to: dt,
33353 trailing_comments: vec![],
33354 double_colon_syntax: false,
33355 format: None,
33356 default: None,
33357 inferred_type: None,
33358 })))
33359 } else {
33360 Ok(uuid_expr)
33361 }
33362 }
33363
33364 // COUNTIF(x) -> CountIf expression
33365 "COUNTIF" if args.len() == 1 => {
33366 let arg = args.remove(0);
33367 Ok(Expression::CountIf(Box::new(crate::expressions::AggFunc {
33368 this: arg,
33369 distinct: false,
33370 filter: None,
33371 order_by: vec![],
33372 name: None,
33373 ignore_nulls: None,
33374 having_max: None,
33375 limit: None,
33376 inferred_type: None,
33377 })))
33378 }
33379
33380 // EDIT_DISTANCE(col1, col2, ...) -> Levenshtein expression
33381 "EDIT_DISTANCE" => {
33382 // Strip named arguments (max_distance => N) and pass as positional
33383 let mut positional_args: Vec<Expression> = vec![];
33384 for arg in args {
33385 match arg {
33386 Expression::NamedArgument(na) => {
33387 positional_args.push(na.value);
33388 }
33389 other => positional_args.push(other),
33390 }
33391 }
33392 if positional_args.len() >= 2 {
33393 let col1 = positional_args.remove(0);
33394 let col2 = positional_args.remove(0);
33395 let levenshtein = crate::expressions::BinaryFunc {
33396 this: col1,
33397 expression: col2,
33398 original_name: None,
33399 inferred_type: None,
33400 };
33401 // Pass extra args through a function wrapper with all args
33402 if !positional_args.is_empty() {
33403 let max_dist = positional_args.remove(0);
33404 // DuckDB: CASE WHEN LEVENSHTEIN(a, b) IS NULL OR max IS NULL THEN NULL ELSE LEAST(LEVENSHTEIN(a, b), max) END
33405 if matches!(target, DialectType::DuckDB) {
33406 let lev = Expression::Function(Box::new(Function::new(
33407 "LEVENSHTEIN".to_string(),
33408 vec![levenshtein.this, levenshtein.expression],
33409 )));
33410 let lev_is_null =
33411 Expression::IsNull(Box::new(crate::expressions::IsNull {
33412 this: lev.clone(),
33413 not: false,
33414 postfix_form: false,
33415 }));
33416 let max_is_null =
33417 Expression::IsNull(Box::new(crate::expressions::IsNull {
33418 this: max_dist.clone(),
33419 not: false,
33420 postfix_form: false,
33421 }));
33422 let null_check =
33423 Expression::Or(Box::new(crate::expressions::BinaryOp {
33424 left: lev_is_null,
33425 right: max_is_null,
33426 left_comments: Vec::new(),
33427 operator_comments: Vec::new(),
33428 trailing_comments: Vec::new(),
33429 inferred_type: None,
33430 }));
33431 let least =
33432 Expression::Least(Box::new(crate::expressions::VarArgFunc {
33433 expressions: vec![lev, max_dist],
33434 original_name: None,
33435 inferred_type: None,
33436 }));
33437 return Ok(Expression::Case(Box::new(crate::expressions::Case {
33438 operand: None,
33439 whens: vec![(
33440 null_check,
33441 Expression::Null(crate::expressions::Null),
33442 )],
33443 else_: Some(least),
33444 comments: Vec::new(),
33445 inferred_type: None,
33446 })));
33447 }
33448 let mut all_args = vec![levenshtein.this, levenshtein.expression, max_dist];
33449 all_args.extend(positional_args);
33450 // PostgreSQL: use LEVENSHTEIN_LESS_EQUAL when max_distance is provided
33451 let func_name = if matches!(target, DialectType::PostgreSQL) {
33452 "LEVENSHTEIN_LESS_EQUAL"
33453 } else {
33454 "LEVENSHTEIN"
33455 };
33456 return Ok(Expression::Function(Box::new(Function::new(
33457 func_name.to_string(),
33458 all_args,
33459 ))));
33460 }
33461 Ok(Expression::Levenshtein(Box::new(levenshtein)))
33462 } else {
33463 Ok(Expression::Function(Box::new(Function::new(
33464 "EDIT_DISTANCE".to_string(),
33465 positional_args,
33466 ))))
33467 }
33468 }
33469
33470 // TIMESTAMP_SECONDS(x) -> UnixToTime with scale 0
33471 "TIMESTAMP_SECONDS" if args.len() == 1 => {
33472 let arg = args.remove(0);
33473 Ok(Expression::UnixToTime(Box::new(
33474 crate::expressions::UnixToTime {
33475 this: Box::new(arg),
33476 scale: Some(0),
33477 zone: None,
33478 hours: None,
33479 minutes: None,
33480 format: None,
33481 target_type: None,
33482 },
33483 )))
33484 }
33485
33486 // TIMESTAMP_MILLIS(x) -> UnixToTime with scale 3
33487 "TIMESTAMP_MILLIS" if args.len() == 1 => {
33488 let arg = args.remove(0);
33489 Ok(Expression::UnixToTime(Box::new(
33490 crate::expressions::UnixToTime {
33491 this: Box::new(arg),
33492 scale: Some(3),
33493 zone: None,
33494 hours: None,
33495 minutes: None,
33496 format: None,
33497 target_type: None,
33498 },
33499 )))
33500 }
33501
33502 // TIMESTAMP_MICROS(x) -> UnixToTime with scale 6
33503 "TIMESTAMP_MICROS" if args.len() == 1 => {
33504 let arg = args.remove(0);
33505 Ok(Expression::UnixToTime(Box::new(
33506 crate::expressions::UnixToTime {
33507 this: Box::new(arg),
33508 scale: Some(6),
33509 zone: None,
33510 hours: None,
33511 minutes: None,
33512 format: None,
33513 target_type: None,
33514 },
33515 )))
33516 }
33517
33518 // DIV(x, y) -> IntDiv expression
33519 "DIV" if args.len() == 2 => {
33520 let x = args.remove(0);
33521 let y = args.remove(0);
33522 Ok(Expression::IntDiv(Box::new(
33523 crate::expressions::BinaryFunc {
33524 this: x,
33525 expression: y,
33526 original_name: None,
33527 inferred_type: None,
33528 },
33529 )))
33530 }
33531
33532 // TO_HEX(x) -> target-specific form
33533 "TO_HEX" if args.len() == 1 => {
33534 let arg = args.remove(0);
33535 // Check if inner function already returns hex string in certain targets
33536 let inner_returns_hex = matches!(&arg, Expression::Function(f) if matches!(f.name.as_str(), "MD5" | "SHA1" | "SHA256" | "SHA512"));
33537 if matches!(target, DialectType::BigQuery) {
33538 // BQ->BQ: keep as TO_HEX
33539 Ok(Expression::Function(Box::new(Function::new(
33540 "TO_HEX".to_string(),
33541 vec![arg],
33542 ))))
33543 } else if matches!(target, DialectType::DuckDB) && inner_returns_hex {
33544 // DuckDB: MD5/SHA already return hex strings, so TO_HEX is redundant
33545 Ok(arg)
33546 } else if matches!(target, DialectType::Snowflake) && inner_returns_hex {
33547 // Snowflake: TO_HEX(SHA1(x)) -> TO_CHAR(SHA1_BINARY(x))
33548 // TO_HEX(MD5(x)) -> TO_CHAR(MD5_BINARY(x))
33549 // TO_HEX(SHA256(x)) -> TO_CHAR(SHA2_BINARY(x, 256))
33550 // TO_HEX(SHA512(x)) -> TO_CHAR(SHA2_BINARY(x, 512))
33551 if let Expression::Function(ref inner_f) = arg {
33552 let inner_args = inner_f.args.clone();
33553 let binary_func = match inner_f.name.to_ascii_uppercase().as_str() {
33554 "SHA1" => Expression::Function(Box::new(Function::new(
33555 "SHA1_BINARY".to_string(),
33556 inner_args,
33557 ))),
33558 "MD5" => Expression::Function(Box::new(Function::new(
33559 "MD5_BINARY".to_string(),
33560 inner_args,
33561 ))),
33562 "SHA256" => {
33563 let mut a = inner_args;
33564 a.push(Expression::number(256));
33565 Expression::Function(Box::new(Function::new(
33566 "SHA2_BINARY".to_string(),
33567 a,
33568 )))
33569 }
33570 "SHA512" => {
33571 let mut a = inner_args;
33572 a.push(Expression::number(512));
33573 Expression::Function(Box::new(Function::new(
33574 "SHA2_BINARY".to_string(),
33575 a,
33576 )))
33577 }
33578 _ => arg.clone(),
33579 };
33580 Ok(Expression::Function(Box::new(Function::new(
33581 "TO_CHAR".to_string(),
33582 vec![binary_func],
33583 ))))
33584 } else {
33585 let inner = Expression::Function(Box::new(Function::new(
33586 "HEX".to_string(),
33587 vec![arg],
33588 )));
33589 Ok(Expression::Lower(Box::new(
33590 crate::expressions::UnaryFunc::new(inner),
33591 )))
33592 }
33593 } else if matches!(target, DialectType::Presto | DialectType::Trino) {
33594 let inner = Expression::Function(Box::new(Function::new(
33595 "TO_HEX".to_string(),
33596 vec![arg],
33597 )));
33598 Ok(Expression::Lower(Box::new(
33599 crate::expressions::UnaryFunc::new(inner),
33600 )))
33601 } else {
33602 let inner =
33603 Expression::Function(Box::new(Function::new("HEX".to_string(), vec![arg])));
33604 Ok(Expression::Lower(Box::new(
33605 crate::expressions::UnaryFunc::new(inner),
33606 )))
33607 }
33608 }
33609
33610 // LAST_DAY(date, unit) -> strip unit for most targets, or transform for PostgreSQL
33611 "LAST_DAY" if args.len() == 2 => {
33612 let date = args.remove(0);
33613 let _unit = args.remove(0); // Strip the unit (MONTH is default)
33614 Ok(Expression::Function(Box::new(Function::new(
33615 "LAST_DAY".to_string(),
33616 vec![date],
33617 ))))
33618 }
33619
33620 // GENERATE_ARRAY(start, end, step?) -> GenerateSeries expression
33621 "GENERATE_ARRAY" => {
33622 let start = args.get(0).cloned();
33623 let end = args.get(1).cloned();
33624 let step = args.get(2).cloned();
33625 Ok(Expression::GenerateSeries(Box::new(
33626 crate::expressions::GenerateSeries {
33627 start: start.map(Box::new),
33628 end: end.map(Box::new),
33629 step: step.map(Box::new),
33630 is_end_exclusive: None,
33631 },
33632 )))
33633 }
33634
33635 // GENERATE_TIMESTAMP_ARRAY(start, end, step) -> GenerateSeries expression
33636 "GENERATE_TIMESTAMP_ARRAY" => {
33637 let start = args.get(0).cloned();
33638 let end = args.get(1).cloned();
33639 let step = args.get(2).cloned();
33640
33641 if matches!(target, DialectType::DuckDB) {
33642 // DuckDB: GENERATE_SERIES(CAST(start AS TIMESTAMP), CAST(end AS TIMESTAMP), step)
33643 // Only cast string literals - leave columns/expressions as-is
33644 let maybe_cast_ts = |expr: Expression| -> Expression {
33645 if matches!(&expr, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)))
33646 {
33647 Expression::Cast(Box::new(Cast {
33648 this: expr,
33649 to: DataType::Timestamp {
33650 precision: None,
33651 timezone: false,
33652 },
33653 trailing_comments: vec![],
33654 double_colon_syntax: false,
33655 format: None,
33656 default: None,
33657 inferred_type: None,
33658 }))
33659 } else {
33660 expr
33661 }
33662 };
33663 let cast_start = start.map(maybe_cast_ts);
33664 let cast_end = end.map(maybe_cast_ts);
33665 Ok(Expression::GenerateSeries(Box::new(
33666 crate::expressions::GenerateSeries {
33667 start: cast_start.map(Box::new),
33668 end: cast_end.map(Box::new),
33669 step: step.map(Box::new),
33670 is_end_exclusive: None,
33671 },
33672 )))
33673 } else {
33674 Ok(Expression::GenerateSeries(Box::new(
33675 crate::expressions::GenerateSeries {
33676 start: start.map(Box::new),
33677 end: end.map(Box::new),
33678 step: step.map(Box::new),
33679 is_end_exclusive: None,
33680 },
33681 )))
33682 }
33683 }
33684
33685 // TO_JSON(x) -> target-specific (from Spark/Hive)
33686 "TO_JSON" => {
33687 match target {
33688 DialectType::Presto | DialectType::Trino => {
33689 // JSON_FORMAT(CAST(x AS JSON))
33690 let arg = args
33691 .into_iter()
33692 .next()
33693 .unwrap_or(Expression::Null(crate::expressions::Null));
33694 let cast_json = Expression::Cast(Box::new(Cast {
33695 this: arg,
33696 to: DataType::Custom {
33697 name: "JSON".to_string(),
33698 },
33699 trailing_comments: vec![],
33700 double_colon_syntax: false,
33701 format: None,
33702 default: None,
33703 inferred_type: None,
33704 }));
33705 Ok(Expression::Function(Box::new(Function::new(
33706 "JSON_FORMAT".to_string(),
33707 vec![cast_json],
33708 ))))
33709 }
33710 DialectType::BigQuery => Ok(Expression::Function(Box::new(Function::new(
33711 "TO_JSON_STRING".to_string(),
33712 args,
33713 )))),
33714 DialectType::DuckDB => {
33715 // CAST(TO_JSON(x) AS TEXT)
33716 let arg = args
33717 .into_iter()
33718 .next()
33719 .unwrap_or(Expression::Null(crate::expressions::Null));
33720 let to_json = Expression::Function(Box::new(Function::new(
33721 "TO_JSON".to_string(),
33722 vec![arg],
33723 )));
33724 Ok(Expression::Cast(Box::new(Cast {
33725 this: to_json,
33726 to: DataType::Text,
33727 trailing_comments: vec![],
33728 double_colon_syntax: false,
33729 format: None,
33730 default: None,
33731 inferred_type: None,
33732 })))
33733 }
33734 _ => Ok(Expression::Function(Box::new(Function::new(
33735 "TO_JSON".to_string(),
33736 args,
33737 )))),
33738 }
33739 }
33740
33741 // TO_JSON_STRING(x) -> target-specific
33742 "TO_JSON_STRING" => {
33743 match target {
33744 DialectType::Spark | DialectType::Databricks | DialectType::Hive => Ok(
33745 Expression::Function(Box::new(Function::new("TO_JSON".to_string(), args))),
33746 ),
33747 DialectType::Presto | DialectType::Trino => {
33748 // JSON_FORMAT(CAST(x AS JSON))
33749 let arg = args
33750 .into_iter()
33751 .next()
33752 .unwrap_or(Expression::Null(crate::expressions::Null));
33753 let cast_json = Expression::Cast(Box::new(Cast {
33754 this: arg,
33755 to: DataType::Custom {
33756 name: "JSON".to_string(),
33757 },
33758 trailing_comments: vec![],
33759 double_colon_syntax: false,
33760 format: None,
33761 default: None,
33762 inferred_type: None,
33763 }));
33764 Ok(Expression::Function(Box::new(Function::new(
33765 "JSON_FORMAT".to_string(),
33766 vec![cast_json],
33767 ))))
33768 }
33769 DialectType::DuckDB => {
33770 // CAST(TO_JSON(x) AS TEXT)
33771 let arg = args
33772 .into_iter()
33773 .next()
33774 .unwrap_or(Expression::Null(crate::expressions::Null));
33775 let to_json = Expression::Function(Box::new(Function::new(
33776 "TO_JSON".to_string(),
33777 vec![arg],
33778 )));
33779 Ok(Expression::Cast(Box::new(Cast {
33780 this: to_json,
33781 to: DataType::Text,
33782 trailing_comments: vec![],
33783 double_colon_syntax: false,
33784 format: None,
33785 default: None,
33786 inferred_type: None,
33787 })))
33788 }
33789 DialectType::Snowflake => {
33790 // TO_JSON(x)
33791 Ok(Expression::Function(Box::new(Function::new(
33792 "TO_JSON".to_string(),
33793 args,
33794 ))))
33795 }
33796 _ => Ok(Expression::Function(Box::new(Function::new(
33797 "TO_JSON_STRING".to_string(),
33798 args,
33799 )))),
33800 }
33801 }
33802
33803 // SAFE_ADD(x, y) -> SafeAdd expression
33804 "SAFE_ADD" if args.len() == 2 => {
33805 let x = args.remove(0);
33806 let y = args.remove(0);
33807 Ok(Expression::SafeAdd(Box::new(crate::expressions::SafeAdd {
33808 this: Box::new(x),
33809 expression: Box::new(y),
33810 })))
33811 }
33812
33813 // SAFE_SUBTRACT(x, y) -> SafeSubtract expression
33814 "SAFE_SUBTRACT" if args.len() == 2 => {
33815 let x = args.remove(0);
33816 let y = args.remove(0);
33817 Ok(Expression::SafeSubtract(Box::new(
33818 crate::expressions::SafeSubtract {
33819 this: Box::new(x),
33820 expression: Box::new(y),
33821 },
33822 )))
33823 }
33824
33825 // SAFE_MULTIPLY(x, y) -> SafeMultiply expression
33826 "SAFE_MULTIPLY" if args.len() == 2 => {
33827 let x = args.remove(0);
33828 let y = args.remove(0);
33829 Ok(Expression::SafeMultiply(Box::new(
33830 crate::expressions::SafeMultiply {
33831 this: Box::new(x),
33832 expression: Box::new(y),
33833 },
33834 )))
33835 }
33836
33837 // REGEXP_CONTAINS(str, pattern) -> RegexpLike expression
33838 "REGEXP_CONTAINS" if args.len() == 2 => {
33839 let str_expr = args.remove(0);
33840 let pattern = args.remove(0);
33841 Ok(Expression::RegexpLike(Box::new(
33842 crate::expressions::RegexpFunc {
33843 this: str_expr,
33844 pattern,
33845 flags: None,
33846 },
33847 )))
33848 }
33849
33850 // CONTAINS_SUBSTR(a, b) -> CONTAINS(LOWER(a), LOWER(b))
33851 "CONTAINS_SUBSTR" if args.len() == 2 => {
33852 let a = args.remove(0);
33853 let b = args.remove(0);
33854 let lower_a = Expression::Lower(Box::new(crate::expressions::UnaryFunc::new(a)));
33855 let lower_b = Expression::Lower(Box::new(crate::expressions::UnaryFunc::new(b)));
33856 Ok(Expression::Function(Box::new(Function::new(
33857 "CONTAINS".to_string(),
33858 vec![lower_a, lower_b],
33859 ))))
33860 }
33861
33862 // INT64(x) -> CAST(x AS BIGINT)
33863 "INT64" if args.len() == 1 => {
33864 let arg = args.remove(0);
33865 Ok(Expression::Cast(Box::new(Cast {
33866 this: arg,
33867 to: DataType::BigInt { length: None },
33868 trailing_comments: vec![],
33869 double_colon_syntax: false,
33870 format: None,
33871 default: None,
33872 inferred_type: None,
33873 })))
33874 }
33875
33876 // INSTR(str, substr) -> target-specific
33877 "INSTR" if args.len() >= 2 => {
33878 let str_expr = args.remove(0);
33879 let substr = args.remove(0);
33880 if matches!(target, DialectType::Snowflake) {
33881 // CHARINDEX(substr, str)
33882 Ok(Expression::Function(Box::new(Function::new(
33883 "CHARINDEX".to_string(),
33884 vec![substr, str_expr],
33885 ))))
33886 } else if matches!(target, DialectType::BigQuery) {
33887 // Keep as INSTR
33888 Ok(Expression::Function(Box::new(Function::new(
33889 "INSTR".to_string(),
33890 vec![str_expr, substr],
33891 ))))
33892 } else {
33893 // Default: keep as INSTR
33894 Ok(Expression::Function(Box::new(Function::new(
33895 "INSTR".to_string(),
33896 vec![str_expr, substr],
33897 ))))
33898 }
33899 }
33900
33901 // BigQuery DATE_TRUNC(expr, unit) -> DATE_TRUNC('unit', expr) for standard SQL
33902 "DATE_TRUNC" if args.len() == 2 => {
33903 let expr = args.remove(0);
33904 let unit_expr = args.remove(0);
33905 let unit_str = get_unit_str(&unit_expr);
33906
33907 match target {
33908 DialectType::DuckDB
33909 | DialectType::Snowflake
33910 | DialectType::PostgreSQL
33911 | DialectType::Presto
33912 | DialectType::Trino
33913 | DialectType::Databricks
33914 | DialectType::Spark
33915 | DialectType::Redshift
33916 | DialectType::ClickHouse
33917 | DialectType::TSQL => {
33918 // Standard: DATE_TRUNC('UNIT', expr)
33919 Ok(Expression::Function(Box::new(Function::new(
33920 "DATE_TRUNC".to_string(),
33921 vec![
33922 Expression::Literal(Box::new(Literal::String(unit_str))),
33923 expr,
33924 ],
33925 ))))
33926 }
33927 _ => {
33928 // Keep BigQuery arg order: DATE_TRUNC(expr, unit)
33929 Ok(Expression::Function(Box::new(Function::new(
33930 "DATE_TRUNC".to_string(),
33931 vec![expr, unit_expr],
33932 ))))
33933 }
33934 }
33935 }
33936
33937 // TIMESTAMP_TRUNC / DATETIME_TRUNC -> target-specific
33938 "TIMESTAMP_TRUNC" | "DATETIME_TRUNC" if args.len() >= 2 => {
33939 // TIMESTAMP_TRUNC(ts, unit) or TIMESTAMP_TRUNC(ts, unit, timezone)
33940 let ts = args.remove(0);
33941 let unit_expr = args.remove(0);
33942 let tz = if !args.is_empty() {
33943 Some(args.remove(0))
33944 } else {
33945 None
33946 };
33947 let unit_str = get_unit_str(&unit_expr);
33948
33949 match target {
33950 DialectType::DuckDB => {
33951 // DuckDB: DATE_TRUNC('UNIT', CAST(ts AS TIMESTAMPTZ))
33952 // With timezone: DATE_TRUNC('UNIT', ts AT TIME ZONE 'tz') AT TIME ZONE 'tz' (for DAY granularity)
33953 // Without timezone for MINUTE+ granularity: just DATE_TRUNC
33954 let is_coarse = matches!(
33955 unit_str.as_str(),
33956 "DAY" | "WEEK" | "MONTH" | "QUARTER" | "YEAR"
33957 );
33958 // For DATETIME_TRUNC, cast string args to TIMESTAMP
33959 let cast_ts = if name == "DATETIME_TRUNC" {
33960 match ts {
33961 Expression::Literal(ref lit)
33962 if matches!(lit.as_ref(), Literal::String(ref _s)) =>
33963 {
33964 Expression::Cast(Box::new(Cast {
33965 this: ts,
33966 to: DataType::Timestamp {
33967 precision: None,
33968 timezone: false,
33969 },
33970 trailing_comments: vec![],
33971 double_colon_syntax: false,
33972 format: None,
33973 default: None,
33974 inferred_type: None,
33975 }))
33976 }
33977 _ => Self::maybe_cast_ts_to_tz(ts, &name),
33978 }
33979 } else {
33980 Self::maybe_cast_ts_to_tz(ts, &name)
33981 };
33982
33983 if let Some(tz_arg) = tz {
33984 if is_coarse {
33985 // DATE_TRUNC('UNIT', ts AT TIME ZONE 'tz') AT TIME ZONE 'tz'
33986 let at_tz = Expression::AtTimeZone(Box::new(
33987 crate::expressions::AtTimeZone {
33988 this: cast_ts,
33989 zone: tz_arg.clone(),
33990 },
33991 ));
33992 let date_trunc = Expression::Function(Box::new(Function::new(
33993 "DATE_TRUNC".to_string(),
33994 vec![
33995 Expression::Literal(Box::new(Literal::String(unit_str))),
33996 at_tz,
33997 ],
33998 )));
33999 Ok(Expression::AtTimeZone(Box::new(
34000 crate::expressions::AtTimeZone {
34001 this: date_trunc,
34002 zone: tz_arg,
34003 },
34004 )))
34005 } else {
34006 // For MINUTE/HOUR: no AT TIME ZONE wrapper, just DATE_TRUNC('UNIT', ts)
34007 Ok(Expression::Function(Box::new(Function::new(
34008 "DATE_TRUNC".to_string(),
34009 vec![
34010 Expression::Literal(Box::new(Literal::String(unit_str))),
34011 cast_ts,
34012 ],
34013 ))))
34014 }
34015 } else {
34016 // No timezone: DATE_TRUNC('UNIT', CAST(ts AS TIMESTAMPTZ))
34017 Ok(Expression::Function(Box::new(Function::new(
34018 "DATE_TRUNC".to_string(),
34019 vec![
34020 Expression::Literal(Box::new(Literal::String(unit_str))),
34021 cast_ts,
34022 ],
34023 ))))
34024 }
34025 }
34026 DialectType::Databricks | DialectType::Spark => {
34027 // Databricks/Spark: DATE_TRUNC('UNIT', ts)
34028 Ok(Expression::Function(Box::new(Function::new(
34029 "DATE_TRUNC".to_string(),
34030 vec![Expression::Literal(Box::new(Literal::String(unit_str))), ts],
34031 ))))
34032 }
34033 _ => {
34034 // Default: keep as TIMESTAMP_TRUNC('UNIT', ts, [tz])
34035 let unit = Expression::Literal(Box::new(Literal::String(unit_str)));
34036 let mut date_trunc_args = vec![unit, ts];
34037 if let Some(tz_arg) = tz {
34038 date_trunc_args.push(tz_arg);
34039 }
34040 Ok(Expression::Function(Box::new(Function::new(
34041 "TIMESTAMP_TRUNC".to_string(),
34042 date_trunc_args,
34043 ))))
34044 }
34045 }
34046 }
34047
34048 // TIME(h, m, s) -> target-specific, TIME('string') -> CAST('string' AS TIME)
34049 "TIME" => {
34050 if args.len() == 3 {
34051 // TIME(h, m, s) constructor
34052 match target {
34053 DialectType::TSQL => {
34054 // TIMEFROMPARTS(h, m, s, 0, 0)
34055 args.push(Expression::number(0));
34056 args.push(Expression::number(0));
34057 Ok(Expression::Function(Box::new(Function::new(
34058 "TIMEFROMPARTS".to_string(),
34059 args,
34060 ))))
34061 }
34062 DialectType::MySQL => Ok(Expression::Function(Box::new(Function::new(
34063 "MAKETIME".to_string(),
34064 args,
34065 )))),
34066 DialectType::PostgreSQL => Ok(Expression::Function(Box::new(
34067 Function::new("MAKE_TIME".to_string(), args),
34068 ))),
34069 _ => Ok(Expression::Function(Box::new(Function::new(
34070 "TIME".to_string(),
34071 args,
34072 )))),
34073 }
34074 } else if args.len() == 1 {
34075 let arg = args.remove(0);
34076 if matches!(target, DialectType::Spark) {
34077 // Spark: CAST(x AS TIMESTAMP) (yes, TIMESTAMP not TIME)
34078 Ok(Expression::Cast(Box::new(Cast {
34079 this: arg,
34080 to: DataType::Timestamp {
34081 timezone: false,
34082 precision: None,
34083 },
34084 trailing_comments: vec![],
34085 double_colon_syntax: false,
34086 format: None,
34087 default: None,
34088 inferred_type: None,
34089 })))
34090 } else {
34091 // Most targets: CAST(x AS TIME)
34092 Ok(Expression::Cast(Box::new(Cast {
34093 this: arg,
34094 to: DataType::Time {
34095 precision: None,
34096 timezone: false,
34097 },
34098 trailing_comments: vec![],
34099 double_colon_syntax: false,
34100 format: None,
34101 default: None,
34102 inferred_type: None,
34103 })))
34104 }
34105 } else if args.len() == 2 {
34106 // TIME(expr, timezone) -> CAST(CAST(expr AS TIMESTAMPTZ) AT TIME ZONE tz AS TIME)
34107 let expr = args.remove(0);
34108 let tz = args.remove(0);
34109 let cast_tstz = Expression::Cast(Box::new(Cast {
34110 this: expr,
34111 to: DataType::Timestamp {
34112 timezone: true,
34113 precision: None,
34114 },
34115 trailing_comments: vec![],
34116 double_colon_syntax: false,
34117 format: None,
34118 default: None,
34119 inferred_type: None,
34120 }));
34121 let at_tz = Expression::AtTimeZone(Box::new(crate::expressions::AtTimeZone {
34122 this: cast_tstz,
34123 zone: tz,
34124 }));
34125 Ok(Expression::Cast(Box::new(Cast {
34126 this: at_tz,
34127 to: DataType::Time {
34128 precision: None,
34129 timezone: false,
34130 },
34131 trailing_comments: vec![],
34132 double_colon_syntax: false,
34133 format: None,
34134 default: None,
34135 inferred_type: None,
34136 })))
34137 } else {
34138 Ok(Expression::Function(Box::new(Function::new(
34139 "TIME".to_string(),
34140 args,
34141 ))))
34142 }
34143 }
34144
34145 // DATETIME('string') -> CAST('string' AS TIMESTAMP)
34146 // DATETIME('date', TIME 'time') -> CAST(CAST('date' AS DATE) + CAST('time' AS TIME) AS TIMESTAMP)
34147 // DATETIME('string', 'timezone') -> CAST(CAST('string' AS TIMESTAMPTZ) AT TIME ZONE tz AS TIMESTAMP)
34148 // DATETIME(y, m, d, h, min, s) -> target-specific
34149 "DATETIME" => {
34150 // For BigQuery target: keep DATETIME function but convert TIME literal to CAST
34151 if matches!(target, DialectType::BigQuery) {
34152 if args.len() == 2 {
34153 let has_time_literal = matches!(&args[1], Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Time(_)));
34154 if has_time_literal {
34155 let first = args.remove(0);
34156 let second = args.remove(0);
34157 let time_as_cast = match second {
34158 Expression::Literal(lit)
34159 if matches!(lit.as_ref(), Literal::Time(_)) =>
34160 {
34161 let Literal::Time(s) = lit.as_ref() else {
34162 unreachable!()
34163 };
34164 Expression::Cast(Box::new(Cast {
34165 this: Expression::Literal(Box::new(Literal::String(
34166 s.clone(),
34167 ))),
34168 to: DataType::Time {
34169 precision: None,
34170 timezone: false,
34171 },
34172 trailing_comments: vec![],
34173 double_colon_syntax: false,
34174 format: None,
34175 default: None,
34176 inferred_type: None,
34177 }))
34178 }
34179 other => other,
34180 };
34181 return Ok(Expression::Function(Box::new(Function::new(
34182 "DATETIME".to_string(),
34183 vec![first, time_as_cast],
34184 ))));
34185 }
34186 }
34187 return Ok(Expression::Function(Box::new(Function::new(
34188 "DATETIME".to_string(),
34189 args,
34190 ))));
34191 }
34192
34193 if args.len() == 1 {
34194 let arg = args.remove(0);
34195 Ok(Expression::Cast(Box::new(Cast {
34196 this: arg,
34197 to: DataType::Timestamp {
34198 timezone: false,
34199 precision: None,
34200 },
34201 trailing_comments: vec![],
34202 double_colon_syntax: false,
34203 format: None,
34204 default: None,
34205 inferred_type: None,
34206 })))
34207 } else if args.len() == 2 {
34208 let first = args.remove(0);
34209 let second = args.remove(0);
34210 // Check if second arg is a TIME literal
34211 let is_time_literal = matches!(&second, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Time(_)));
34212 if is_time_literal {
34213 // DATETIME('date', TIME 'time') -> CAST(CAST(date AS DATE) + CAST('time' AS TIME) AS TIMESTAMP)
34214 let cast_date = Expression::Cast(Box::new(Cast {
34215 this: first,
34216 to: DataType::Date,
34217 trailing_comments: vec![],
34218 double_colon_syntax: false,
34219 format: None,
34220 default: None,
34221 inferred_type: None,
34222 }));
34223 // Convert TIME 'x' literal to string 'x' so CAST produces CAST('x' AS TIME) not CAST(TIME 'x' AS TIME)
34224 let time_as_string = match second {
34225 Expression::Literal(lit)
34226 if matches!(lit.as_ref(), Literal::Time(_)) =>
34227 {
34228 let Literal::Time(s) = lit.as_ref() else {
34229 unreachable!()
34230 };
34231 Expression::Literal(Box::new(Literal::String(s.clone())))
34232 }
34233 other => other,
34234 };
34235 let cast_time = Expression::Cast(Box::new(Cast {
34236 this: time_as_string,
34237 to: DataType::Time {
34238 precision: None,
34239 timezone: false,
34240 },
34241 trailing_comments: vec![],
34242 double_colon_syntax: false,
34243 format: None,
34244 default: None,
34245 inferred_type: None,
34246 }));
34247 let add_expr =
34248 Expression::Add(Box::new(BinaryOp::new(cast_date, cast_time)));
34249 Ok(Expression::Cast(Box::new(Cast {
34250 this: add_expr,
34251 to: DataType::Timestamp {
34252 timezone: false,
34253 precision: None,
34254 },
34255 trailing_comments: vec![],
34256 double_colon_syntax: false,
34257 format: None,
34258 default: None,
34259 inferred_type: None,
34260 })))
34261 } else {
34262 // DATETIME('string', 'timezone')
34263 let cast_tstz = Expression::Cast(Box::new(Cast {
34264 this: first,
34265 to: DataType::Timestamp {
34266 timezone: true,
34267 precision: None,
34268 },
34269 trailing_comments: vec![],
34270 double_colon_syntax: false,
34271 format: None,
34272 default: None,
34273 inferred_type: None,
34274 }));
34275 let at_tz =
34276 Expression::AtTimeZone(Box::new(crate::expressions::AtTimeZone {
34277 this: cast_tstz,
34278 zone: second,
34279 }));
34280 Ok(Expression::Cast(Box::new(Cast {
34281 this: at_tz,
34282 to: DataType::Timestamp {
34283 timezone: false,
34284 precision: None,
34285 },
34286 trailing_comments: vec![],
34287 double_colon_syntax: false,
34288 format: None,
34289 default: None,
34290 inferred_type: None,
34291 })))
34292 }
34293 } else if args.len() >= 3 {
34294 // DATETIME(y, m, d, h, min, s) -> TIMESTAMP_FROM_PARTS for Snowflake
34295 // For other targets, use MAKE_TIMESTAMP or similar
34296 if matches!(target, DialectType::Snowflake) {
34297 Ok(Expression::Function(Box::new(Function::new(
34298 "TIMESTAMP_FROM_PARTS".to_string(),
34299 args,
34300 ))))
34301 } else {
34302 Ok(Expression::Function(Box::new(Function::new(
34303 "DATETIME".to_string(),
34304 args,
34305 ))))
34306 }
34307 } else {
34308 Ok(Expression::Function(Box::new(Function::new(
34309 "DATETIME".to_string(),
34310 args,
34311 ))))
34312 }
34313 }
34314
34315 // TIMESTAMP(x) -> CAST(x AS TIMESTAMP WITH TIME ZONE) for Presto
34316 // TIMESTAMP(x, tz) -> CAST(x AS TIMESTAMP) AT TIME ZONE tz for DuckDB
34317 "TIMESTAMP" => {
34318 if args.len() == 1 {
34319 let arg = args.remove(0);
34320 Ok(Expression::Cast(Box::new(Cast {
34321 this: arg,
34322 to: DataType::Timestamp {
34323 timezone: true,
34324 precision: None,
34325 },
34326 trailing_comments: vec![],
34327 double_colon_syntax: false,
34328 format: None,
34329 default: None,
34330 inferred_type: None,
34331 })))
34332 } else if args.len() == 2 {
34333 let arg = args.remove(0);
34334 let tz = args.remove(0);
34335 let cast_ts = Expression::Cast(Box::new(Cast {
34336 this: arg,
34337 to: DataType::Timestamp {
34338 timezone: false,
34339 precision: None,
34340 },
34341 trailing_comments: vec![],
34342 double_colon_syntax: false,
34343 format: None,
34344 default: None,
34345 inferred_type: None,
34346 }));
34347 if matches!(target, DialectType::Snowflake) {
34348 // CONVERT_TIMEZONE('tz', CAST(x AS TIMESTAMP))
34349 Ok(Expression::Function(Box::new(Function::new(
34350 "CONVERT_TIMEZONE".to_string(),
34351 vec![tz, cast_ts],
34352 ))))
34353 } else {
34354 Ok(Expression::AtTimeZone(Box::new(
34355 crate::expressions::AtTimeZone {
34356 this: cast_ts,
34357 zone: tz,
34358 },
34359 )))
34360 }
34361 } else {
34362 Ok(Expression::Function(Box::new(Function::new(
34363 "TIMESTAMP".to_string(),
34364 args,
34365 ))))
34366 }
34367 }
34368
34369 // STRING(x) -> CAST(x AS VARCHAR/TEXT)
34370 // STRING(x, tz) -> CAST(CAST(x AS TIMESTAMP) AT TIME ZONE 'UTC' AT TIME ZONE tz AS VARCHAR/TEXT)
34371 "STRING" => {
34372 if args.len() == 1 {
34373 let arg = args.remove(0);
34374 let cast_type = match target {
34375 DialectType::DuckDB => DataType::Text,
34376 _ => DataType::VarChar {
34377 length: None,
34378 parenthesized_length: false,
34379 },
34380 };
34381 Ok(Expression::Cast(Box::new(Cast {
34382 this: arg,
34383 to: cast_type,
34384 trailing_comments: vec![],
34385 double_colon_syntax: false,
34386 format: None,
34387 default: None,
34388 inferred_type: None,
34389 })))
34390 } else if args.len() == 2 {
34391 let arg = args.remove(0);
34392 let tz = args.remove(0);
34393 let cast_type = match target {
34394 DialectType::DuckDB => DataType::Text,
34395 _ => DataType::VarChar {
34396 length: None,
34397 parenthesized_length: false,
34398 },
34399 };
34400 if matches!(target, DialectType::Snowflake) {
34401 // STRING(x, tz) -> CAST(CONVERT_TIMEZONE('UTC', tz, x) AS VARCHAR)
34402 let convert_tz = Expression::Function(Box::new(Function::new(
34403 "CONVERT_TIMEZONE".to_string(),
34404 vec![
34405 Expression::Literal(Box::new(Literal::String("UTC".to_string()))),
34406 tz,
34407 arg,
34408 ],
34409 )));
34410 Ok(Expression::Cast(Box::new(Cast {
34411 this: convert_tz,
34412 to: cast_type,
34413 trailing_comments: vec![],
34414 double_colon_syntax: false,
34415 format: None,
34416 default: None,
34417 inferred_type: None,
34418 })))
34419 } else {
34420 // STRING(x, tz) -> CAST(CAST(x AS TIMESTAMP) AT TIME ZONE 'UTC' AT TIME ZONE tz AS TEXT/VARCHAR)
34421 let cast_ts = Expression::Cast(Box::new(Cast {
34422 this: arg,
34423 to: DataType::Timestamp {
34424 timezone: false,
34425 precision: None,
34426 },
34427 trailing_comments: vec![],
34428 double_colon_syntax: false,
34429 format: None,
34430 default: None,
34431 inferred_type: None,
34432 }));
34433 let at_utc =
34434 Expression::AtTimeZone(Box::new(crate::expressions::AtTimeZone {
34435 this: cast_ts,
34436 zone: Expression::Literal(Box::new(Literal::String(
34437 "UTC".to_string(),
34438 ))),
34439 }));
34440 let at_tz =
34441 Expression::AtTimeZone(Box::new(crate::expressions::AtTimeZone {
34442 this: at_utc,
34443 zone: tz,
34444 }));
34445 Ok(Expression::Cast(Box::new(Cast {
34446 this: at_tz,
34447 to: cast_type,
34448 trailing_comments: vec![],
34449 double_colon_syntax: false,
34450 format: None,
34451 default: None,
34452 inferred_type: None,
34453 })))
34454 }
34455 } else {
34456 Ok(Expression::Function(Box::new(Function::new(
34457 "STRING".to_string(),
34458 args,
34459 ))))
34460 }
34461 }
34462
34463 // UNIX_SECONDS, UNIX_MILLIS, UNIX_MICROS as functions (not expressions)
34464 "UNIX_SECONDS" if args.len() == 1 => {
34465 let ts = args.remove(0);
34466 match target {
34467 DialectType::DuckDB => {
34468 // CAST(EPOCH(CAST(ts AS TIMESTAMPTZ)) AS BIGINT)
34469 let cast_ts = Self::ensure_cast_timestamptz(ts);
34470 let epoch = Expression::Function(Box::new(Function::new(
34471 "EPOCH".to_string(),
34472 vec![cast_ts],
34473 )));
34474 Ok(Expression::Cast(Box::new(Cast {
34475 this: epoch,
34476 to: DataType::BigInt { length: None },
34477 trailing_comments: vec![],
34478 double_colon_syntax: false,
34479 format: None,
34480 default: None,
34481 inferred_type: None,
34482 })))
34483 }
34484 DialectType::Snowflake => {
34485 // TIMESTAMPDIFF(SECONDS, CAST('1970-01-01 00:00:00+00' AS TIMESTAMPTZ), ts)
34486 let epoch = Expression::Cast(Box::new(Cast {
34487 this: Expression::Literal(Box::new(Literal::String(
34488 "1970-01-01 00:00:00+00".to_string(),
34489 ))),
34490 to: DataType::Timestamp {
34491 timezone: true,
34492 precision: None,
34493 },
34494 trailing_comments: vec![],
34495 double_colon_syntax: false,
34496 format: None,
34497 default: None,
34498 inferred_type: None,
34499 }));
34500 Ok(Expression::TimestampDiff(Box::new(
34501 crate::expressions::TimestampDiff {
34502 this: Box::new(epoch),
34503 expression: Box::new(ts),
34504 unit: Some("SECONDS".to_string()),
34505 },
34506 )))
34507 }
34508 _ => Ok(Expression::Function(Box::new(Function::new(
34509 "UNIX_SECONDS".to_string(),
34510 vec![ts],
34511 )))),
34512 }
34513 }
34514
34515 "UNIX_MILLIS" if args.len() == 1 => {
34516 let ts = args.remove(0);
34517 match target {
34518 DialectType::DuckDB => {
34519 // EPOCH_MS(CAST(ts AS TIMESTAMPTZ))
34520 let cast_ts = Self::ensure_cast_timestamptz(ts);
34521 Ok(Expression::Function(Box::new(Function::new(
34522 "EPOCH_MS".to_string(),
34523 vec![cast_ts],
34524 ))))
34525 }
34526 _ => Ok(Expression::Function(Box::new(Function::new(
34527 "UNIX_MILLIS".to_string(),
34528 vec![ts],
34529 )))),
34530 }
34531 }
34532
34533 "UNIX_MICROS" if args.len() == 1 => {
34534 let ts = args.remove(0);
34535 match target {
34536 DialectType::DuckDB => {
34537 // EPOCH_US(CAST(ts AS TIMESTAMPTZ))
34538 let cast_ts = Self::ensure_cast_timestamptz(ts);
34539 Ok(Expression::Function(Box::new(Function::new(
34540 "EPOCH_US".to_string(),
34541 vec![cast_ts],
34542 ))))
34543 }
34544 _ => Ok(Expression::Function(Box::new(Function::new(
34545 "UNIX_MICROS".to_string(),
34546 vec![ts],
34547 )))),
34548 }
34549 }
34550
34551 // ARRAY_CONCAT / LIST_CONCAT -> target-specific
34552 "ARRAY_CONCAT" | "LIST_CONCAT" => {
34553 match target {
34554 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
34555 // CONCAT(arr1, arr2, ...)
34556 Ok(Expression::Function(Box::new(Function::new(
34557 "CONCAT".to_string(),
34558 args,
34559 ))))
34560 }
34561 DialectType::Presto | DialectType::Trino => {
34562 // CONCAT(arr1, arr2, ...)
34563 Ok(Expression::Function(Box::new(Function::new(
34564 "CONCAT".to_string(),
34565 args,
34566 ))))
34567 }
34568 DialectType::Snowflake => {
34569 // ARRAY_CAT(arr1, ARRAY_CAT(arr2, arr3))
34570 if args.len() == 1 {
34571 // ARRAY_CAT requires 2 args, add empty array as []
34572 let empty_arr = Expression::ArrayFunc(Box::new(
34573 crate::expressions::ArrayConstructor {
34574 expressions: vec![],
34575 bracket_notation: true,
34576 use_list_keyword: false,
34577 },
34578 ));
34579 let mut new_args = args;
34580 new_args.push(empty_arr);
34581 Ok(Expression::Function(Box::new(Function::new(
34582 "ARRAY_CAT".to_string(),
34583 new_args,
34584 ))))
34585 } else if args.is_empty() {
34586 Ok(Expression::Function(Box::new(Function::new(
34587 "ARRAY_CAT".to_string(),
34588 args,
34589 ))))
34590 } else {
34591 let mut it = args.into_iter().rev();
34592 let mut result = it.next().unwrap();
34593 for arr in it {
34594 result = Expression::Function(Box::new(Function::new(
34595 "ARRAY_CAT".to_string(),
34596 vec![arr, result],
34597 )));
34598 }
34599 Ok(result)
34600 }
34601 }
34602 DialectType::PostgreSQL => {
34603 // ARRAY_CAT(arr1, ARRAY_CAT(arr2, arr3))
34604 if args.len() <= 1 {
34605 Ok(Expression::Function(Box::new(Function::new(
34606 "ARRAY_CAT".to_string(),
34607 args,
34608 ))))
34609 } else {
34610 let mut it = args.into_iter().rev();
34611 let mut result = it.next().unwrap();
34612 for arr in it {
34613 result = Expression::Function(Box::new(Function::new(
34614 "ARRAY_CAT".to_string(),
34615 vec![arr, result],
34616 )));
34617 }
34618 Ok(result)
34619 }
34620 }
34621 DialectType::Redshift => {
34622 // ARRAY_CONCAT(arr1, ARRAY_CONCAT(arr2, arr3))
34623 if args.len() <= 2 {
34624 Ok(Expression::Function(Box::new(Function::new(
34625 "ARRAY_CONCAT".to_string(),
34626 args,
34627 ))))
34628 } else {
34629 let mut it = args.into_iter().rev();
34630 let mut result = it.next().unwrap();
34631 for arr in it {
34632 result = Expression::Function(Box::new(Function::new(
34633 "ARRAY_CONCAT".to_string(),
34634 vec![arr, result],
34635 )));
34636 }
34637 Ok(result)
34638 }
34639 }
34640 DialectType::DuckDB => {
34641 // LIST_CONCAT supports multiple args natively in DuckDB
34642 Ok(Expression::Function(Box::new(Function::new(
34643 "LIST_CONCAT".to_string(),
34644 args,
34645 ))))
34646 }
34647 _ => Ok(Expression::Function(Box::new(Function::new(
34648 "ARRAY_CONCAT".to_string(),
34649 args,
34650 )))),
34651 }
34652 }
34653
34654 // ARRAY_CONCAT_AGG -> Snowflake: ARRAY_FLATTEN(ARRAY_AGG(x))
34655 "ARRAY_CONCAT_AGG" if args.len() == 1 => {
34656 let arg = args.remove(0);
34657 match target {
34658 DialectType::Snowflake => {
34659 let array_agg =
34660 Expression::ArrayAgg(Box::new(crate::expressions::AggFunc {
34661 this: arg,
34662 distinct: false,
34663 filter: None,
34664 order_by: vec![],
34665 name: None,
34666 ignore_nulls: None,
34667 having_max: None,
34668 limit: None,
34669 inferred_type: None,
34670 }));
34671 Ok(Expression::Function(Box::new(Function::new(
34672 "ARRAY_FLATTEN".to_string(),
34673 vec![array_agg],
34674 ))))
34675 }
34676 _ => Ok(Expression::Function(Box::new(Function::new(
34677 "ARRAY_CONCAT_AGG".to_string(),
34678 vec![arg],
34679 )))),
34680 }
34681 }
34682
34683 // MD5/SHA1/SHA256/SHA512 -> target-specific hash functions
34684 "MD5" if args.len() == 1 => {
34685 let arg = args.remove(0);
34686 match target {
34687 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
34688 // UNHEX(MD5(x))
34689 let md5 = Expression::Function(Box::new(Function::new(
34690 "MD5".to_string(),
34691 vec![arg],
34692 )));
34693 Ok(Expression::Function(Box::new(Function::new(
34694 "UNHEX".to_string(),
34695 vec![md5],
34696 ))))
34697 }
34698 DialectType::Snowflake => {
34699 // MD5_BINARY(x)
34700 Ok(Expression::Function(Box::new(Function::new(
34701 "MD5_BINARY".to_string(),
34702 vec![arg],
34703 ))))
34704 }
34705 _ => Ok(Expression::Function(Box::new(Function::new(
34706 "MD5".to_string(),
34707 vec![arg],
34708 )))),
34709 }
34710 }
34711
34712 "SHA1" if args.len() == 1 => {
34713 let arg = args.remove(0);
34714 match target {
34715 DialectType::DuckDB => {
34716 // UNHEX(SHA1(x))
34717 let sha1 = Expression::Function(Box::new(Function::new(
34718 "SHA1".to_string(),
34719 vec![arg],
34720 )));
34721 Ok(Expression::Function(Box::new(Function::new(
34722 "UNHEX".to_string(),
34723 vec![sha1],
34724 ))))
34725 }
34726 _ => Ok(Expression::Function(Box::new(Function::new(
34727 "SHA1".to_string(),
34728 vec![arg],
34729 )))),
34730 }
34731 }
34732
34733 "SHA256" if args.len() == 1 => {
34734 let arg = args.remove(0);
34735 match target {
34736 DialectType::DuckDB => {
34737 // UNHEX(SHA256(x))
34738 let sha = Expression::Function(Box::new(Function::new(
34739 "SHA256".to_string(),
34740 vec![arg],
34741 )));
34742 Ok(Expression::Function(Box::new(Function::new(
34743 "UNHEX".to_string(),
34744 vec![sha],
34745 ))))
34746 }
34747 DialectType::Snowflake => {
34748 // SHA2_BINARY(x, 256)
34749 Ok(Expression::Function(Box::new(Function::new(
34750 "SHA2_BINARY".to_string(),
34751 vec![arg, Expression::number(256)],
34752 ))))
34753 }
34754 DialectType::Redshift | DialectType::Spark => {
34755 // SHA2(x, 256)
34756 Ok(Expression::Function(Box::new(Function::new(
34757 "SHA2".to_string(),
34758 vec![arg, Expression::number(256)],
34759 ))))
34760 }
34761 _ => Ok(Expression::Function(Box::new(Function::new(
34762 "SHA256".to_string(),
34763 vec![arg],
34764 )))),
34765 }
34766 }
34767
34768 "SHA512" if args.len() == 1 => {
34769 let arg = args.remove(0);
34770 match target {
34771 DialectType::Snowflake => {
34772 // SHA2_BINARY(x, 512)
34773 Ok(Expression::Function(Box::new(Function::new(
34774 "SHA2_BINARY".to_string(),
34775 vec![arg, Expression::number(512)],
34776 ))))
34777 }
34778 DialectType::Redshift | DialectType::Spark => {
34779 // SHA2(x, 512)
34780 Ok(Expression::Function(Box::new(Function::new(
34781 "SHA2".to_string(),
34782 vec![arg, Expression::number(512)],
34783 ))))
34784 }
34785 _ => Ok(Expression::Function(Box::new(Function::new(
34786 "SHA512".to_string(),
34787 vec![arg],
34788 )))),
34789 }
34790 }
34791
34792 // REGEXP_EXTRACT_ALL(str, pattern) -> add default group arg
34793 "REGEXP_EXTRACT_ALL" if args.len() == 2 => {
34794 let str_expr = args.remove(0);
34795 let pattern = args.remove(0);
34796
34797 // Check if pattern contains capturing groups (parentheses)
34798 let has_groups = match &pattern {
34799 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)) => {
34800 let Literal::String(s) = lit.as_ref() else {
34801 unreachable!()
34802 };
34803 s.contains('(') && s.contains(')')
34804 }
34805 _ => false,
34806 };
34807
34808 match target {
34809 DialectType::DuckDB => {
34810 let group = if has_groups {
34811 Expression::number(1)
34812 } else {
34813 Expression::number(0)
34814 };
34815 Ok(Expression::Function(Box::new(Function::new(
34816 "REGEXP_EXTRACT_ALL".to_string(),
34817 vec![str_expr, pattern, group],
34818 ))))
34819 }
34820 DialectType::Spark | DialectType::Databricks => {
34821 // Spark's default group_index is 1 (same as BigQuery), so omit for capturing groups
34822 if has_groups {
34823 Ok(Expression::Function(Box::new(Function::new(
34824 "REGEXP_EXTRACT_ALL".to_string(),
34825 vec![str_expr, pattern],
34826 ))))
34827 } else {
34828 Ok(Expression::Function(Box::new(Function::new(
34829 "REGEXP_EXTRACT_ALL".to_string(),
34830 vec![str_expr, pattern, Expression::number(0)],
34831 ))))
34832 }
34833 }
34834 DialectType::Presto | DialectType::Trino => {
34835 if has_groups {
34836 Ok(Expression::Function(Box::new(Function::new(
34837 "REGEXP_EXTRACT_ALL".to_string(),
34838 vec![str_expr, pattern, Expression::number(1)],
34839 ))))
34840 } else {
34841 Ok(Expression::Function(Box::new(Function::new(
34842 "REGEXP_EXTRACT_ALL".to_string(),
34843 vec![str_expr, pattern],
34844 ))))
34845 }
34846 }
34847 DialectType::Snowflake => {
34848 if has_groups {
34849 // REGEXP_EXTRACT_ALL(str, pattern, 1, 1, 'c', 1)
34850 Ok(Expression::Function(Box::new(Function::new(
34851 "REGEXP_EXTRACT_ALL".to_string(),
34852 vec![
34853 str_expr,
34854 pattern,
34855 Expression::number(1),
34856 Expression::number(1),
34857 Expression::Literal(Box::new(Literal::String("c".to_string()))),
34858 Expression::number(1),
34859 ],
34860 ))))
34861 } else {
34862 Ok(Expression::Function(Box::new(Function::new(
34863 "REGEXP_EXTRACT_ALL".to_string(),
34864 vec![str_expr, pattern],
34865 ))))
34866 }
34867 }
34868 _ => Ok(Expression::Function(Box::new(Function::new(
34869 "REGEXP_EXTRACT_ALL".to_string(),
34870 vec![str_expr, pattern],
34871 )))),
34872 }
34873 }
34874
34875 // MOD(x, y) -> x % y for PostgreSQL/DuckDB
34876 "MOD" if args.len() == 2 => {
34877 match target {
34878 DialectType::PostgreSQL
34879 | DialectType::DuckDB
34880 | DialectType::Presto
34881 | DialectType::Trino
34882 | DialectType::Athena
34883 | DialectType::Snowflake => {
34884 let x = args.remove(0);
34885 let y = args.remove(0);
34886 // Wrap complex expressions in parens to preserve precedence
34887 let needs_paren = |e: &Expression| {
34888 matches!(
34889 e,
34890 Expression::Add(_)
34891 | Expression::Sub(_)
34892 | Expression::Mul(_)
34893 | Expression::Div(_)
34894 )
34895 };
34896 let x = if needs_paren(&x) {
34897 Expression::Paren(Box::new(crate::expressions::Paren {
34898 this: x,
34899 trailing_comments: vec![],
34900 }))
34901 } else {
34902 x
34903 };
34904 let y = if needs_paren(&y) {
34905 Expression::Paren(Box::new(crate::expressions::Paren {
34906 this: y,
34907 trailing_comments: vec![],
34908 }))
34909 } else {
34910 y
34911 };
34912 Ok(Expression::Mod(Box::new(
34913 crate::expressions::BinaryOp::new(x, y),
34914 )))
34915 }
34916 DialectType::Hive | DialectType::Spark | DialectType::Databricks => {
34917 // Hive/Spark: a % b
34918 let x = args.remove(0);
34919 let y = args.remove(0);
34920 let needs_paren = |e: &Expression| {
34921 matches!(
34922 e,
34923 Expression::Add(_)
34924 | Expression::Sub(_)
34925 | Expression::Mul(_)
34926 | Expression::Div(_)
34927 )
34928 };
34929 let x = if needs_paren(&x) {
34930 Expression::Paren(Box::new(crate::expressions::Paren {
34931 this: x,
34932 trailing_comments: vec![],
34933 }))
34934 } else {
34935 x
34936 };
34937 let y = if needs_paren(&y) {
34938 Expression::Paren(Box::new(crate::expressions::Paren {
34939 this: y,
34940 trailing_comments: vec![],
34941 }))
34942 } else {
34943 y
34944 };
34945 Ok(Expression::Mod(Box::new(
34946 crate::expressions::BinaryOp::new(x, y),
34947 )))
34948 }
34949 _ => Ok(Expression::Function(Box::new(Function::new(
34950 "MOD".to_string(),
34951 args,
34952 )))),
34953 }
34954 }
34955
34956 // ARRAY_FILTER(arr, lambda) -> FILTER for Hive/Spark/Presto, ARRAY_FILTER for StarRocks
34957 "ARRAY_FILTER" if args.len() == 2 => {
34958 let name = match target {
34959 DialectType::DuckDB => "LIST_FILTER",
34960 DialectType::StarRocks => "ARRAY_FILTER",
34961 _ => "FILTER",
34962 };
34963 Ok(Expression::Function(Box::new(Function::new(
34964 name.to_string(),
34965 args,
34966 ))))
34967 }
34968 // FILTER(arr, lambda) -> ARRAY_FILTER for StarRocks, LIST_FILTER for DuckDB
34969 "FILTER" if args.len() == 2 => {
34970 let name = match target {
34971 DialectType::DuckDB => "LIST_FILTER",
34972 DialectType::StarRocks => "ARRAY_FILTER",
34973 _ => "FILTER",
34974 };
34975 Ok(Expression::Function(Box::new(Function::new(
34976 name.to_string(),
34977 args,
34978 ))))
34979 }
34980 // REDUCE(arr, init, lambda1, lambda2) -> AGGREGATE for Spark
34981 "REDUCE" if args.len() >= 3 => {
34982 let name = match target {
34983 DialectType::Spark | DialectType::Databricks => "AGGREGATE",
34984 _ => "REDUCE",
34985 };
34986 Ok(Expression::Function(Box::new(Function::new(
34987 name.to_string(),
34988 args,
34989 ))))
34990 }
34991 // ARRAY_REVERSE(x) -> arrayReverse for ClickHouse (handled by generator)
34992 "ARRAY_REVERSE" if args.len() == 1 => Ok(Expression::Function(Box::new(
34993 Function::new("ARRAY_REVERSE".to_string(), args),
34994 ))),
34995
34996 // CONCAT(a, b, ...) -> a || b || ... for DuckDB with 3+ args
34997 "CONCAT" if args.len() > 2 => match target {
34998 DialectType::DuckDB => {
34999 let mut it = args.into_iter();
35000 let mut result = it.next().unwrap();
35001 for arg in it {
35002 result = Expression::DPipe(Box::new(crate::expressions::DPipe {
35003 this: Box::new(result),
35004 expression: Box::new(arg),
35005 safe: None,
35006 }));
35007 }
35008 Ok(result)
35009 }
35010 _ => Ok(Expression::Function(Box::new(Function::new(
35011 "CONCAT".to_string(),
35012 args,
35013 )))),
35014 },
35015
35016 // GENERATE_DATE_ARRAY(start, end[, step]) -> target-specific
35017 "GENERATE_DATE_ARRAY" => {
35018 if matches!(target, DialectType::BigQuery) {
35019 // BQ->BQ: add default interval if not present
35020 if args.len() == 2 {
35021 let start = args.remove(0);
35022 let end = args.remove(0);
35023 let default_interval =
35024 Expression::Interval(Box::new(crate::expressions::Interval {
35025 this: Some(Expression::Literal(Box::new(Literal::String(
35026 "1".to_string(),
35027 )))),
35028 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
35029 unit: crate::expressions::IntervalUnit::Day,
35030 use_plural: false,
35031 }),
35032 }));
35033 Ok(Expression::Function(Box::new(Function::new(
35034 "GENERATE_DATE_ARRAY".to_string(),
35035 vec![start, end, default_interval],
35036 ))))
35037 } else {
35038 Ok(Expression::Function(Box::new(Function::new(
35039 "GENERATE_DATE_ARRAY".to_string(),
35040 args,
35041 ))))
35042 }
35043 } else if matches!(target, DialectType::DuckDB) {
35044 // DuckDB: CAST(GENERATE_SERIES(CAST(start AS DATE), CAST(end AS DATE), step) AS DATE[])
35045 let start = args.get(0).cloned();
35046 let end = args.get(1).cloned();
35047 let step = args.get(2).cloned().or_else(|| {
35048 Some(Expression::Interval(Box::new(
35049 crate::expressions::Interval {
35050 this: Some(Expression::Literal(Box::new(Literal::String(
35051 "1".to_string(),
35052 )))),
35053 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
35054 unit: crate::expressions::IntervalUnit::Day,
35055 use_plural: false,
35056 }),
35057 },
35058 )))
35059 });
35060
35061 // Wrap start/end in CAST(... AS DATE) only for string literals
35062 let maybe_cast_date = |expr: Expression| -> Expression {
35063 if matches!(&expr, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)))
35064 {
35065 Expression::Cast(Box::new(Cast {
35066 this: expr,
35067 to: DataType::Date,
35068 trailing_comments: vec![],
35069 double_colon_syntax: false,
35070 format: None,
35071 default: None,
35072 inferred_type: None,
35073 }))
35074 } else {
35075 expr
35076 }
35077 };
35078 let cast_start = start.map(maybe_cast_date);
35079 let cast_end = end.map(maybe_cast_date);
35080
35081 let gen_series =
35082 Expression::GenerateSeries(Box::new(crate::expressions::GenerateSeries {
35083 start: cast_start.map(Box::new),
35084 end: cast_end.map(Box::new),
35085 step: step.map(Box::new),
35086 is_end_exclusive: None,
35087 }));
35088
35089 // Wrap in CAST(... AS DATE[])
35090 Ok(Expression::Cast(Box::new(Cast {
35091 this: gen_series,
35092 to: DataType::Array {
35093 element_type: Box::new(DataType::Date),
35094 dimension: None,
35095 },
35096 trailing_comments: vec![],
35097 double_colon_syntax: false,
35098 format: None,
35099 default: None,
35100 inferred_type: None,
35101 })))
35102 } else if matches!(target, DialectType::Snowflake) {
35103 // Snowflake: keep as GENERATE_DATE_ARRAY function for later transform
35104 // (transform_generate_date_array_snowflake will convert to ARRAY_GENERATE_RANGE + DATEADD)
35105 if args.len() == 2 {
35106 let start = args.remove(0);
35107 let end = args.remove(0);
35108 let default_interval =
35109 Expression::Interval(Box::new(crate::expressions::Interval {
35110 this: Some(Expression::Literal(Box::new(Literal::String(
35111 "1".to_string(),
35112 )))),
35113 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
35114 unit: crate::expressions::IntervalUnit::Day,
35115 use_plural: false,
35116 }),
35117 }));
35118 Ok(Expression::Function(Box::new(Function::new(
35119 "GENERATE_DATE_ARRAY".to_string(),
35120 vec![start, end, default_interval],
35121 ))))
35122 } else {
35123 Ok(Expression::Function(Box::new(Function::new(
35124 "GENERATE_DATE_ARRAY".to_string(),
35125 args,
35126 ))))
35127 }
35128 } else {
35129 // Convert to GenerateSeries for other targets
35130 let start = args.get(0).cloned();
35131 let end = args.get(1).cloned();
35132 let step = args.get(2).cloned().or_else(|| {
35133 Some(Expression::Interval(Box::new(
35134 crate::expressions::Interval {
35135 this: Some(Expression::Literal(Box::new(Literal::String(
35136 "1".to_string(),
35137 )))),
35138 unit: Some(crate::expressions::IntervalUnitSpec::Simple {
35139 unit: crate::expressions::IntervalUnit::Day,
35140 use_plural: false,
35141 }),
35142 },
35143 )))
35144 });
35145 Ok(Expression::GenerateSeries(Box::new(
35146 crate::expressions::GenerateSeries {
35147 start: start.map(Box::new),
35148 end: end.map(Box::new),
35149 step: step.map(Box::new),
35150 is_end_exclusive: None,
35151 },
35152 )))
35153 }
35154 }
35155
35156 // PARSE_DATE(format, str) -> target-specific
35157 "PARSE_DATE" if args.len() == 2 => {
35158 let format = args.remove(0);
35159 let str_expr = args.remove(0);
35160 match target {
35161 DialectType::DuckDB => {
35162 // CAST(STRPTIME(str, duck_format) AS DATE)
35163 let duck_format = Self::bq_format_to_duckdb(&format);
35164 let strptime = Expression::Function(Box::new(Function::new(
35165 "STRPTIME".to_string(),
35166 vec![str_expr, duck_format],
35167 )));
35168 Ok(Expression::Cast(Box::new(Cast {
35169 this: strptime,
35170 to: DataType::Date,
35171 trailing_comments: vec![],
35172 double_colon_syntax: false,
35173 format: None,
35174 default: None,
35175 inferred_type: None,
35176 })))
35177 }
35178 DialectType::Snowflake => {
35179 // _POLYGLOT_DATE(str, snowflake_format)
35180 // Use marker so Snowflake target transform keeps it as DATE() instead of TO_DATE()
35181 let sf_format = Self::bq_format_to_snowflake(&format);
35182 Ok(Expression::Function(Box::new(Function::new(
35183 "_POLYGLOT_DATE".to_string(),
35184 vec![str_expr, sf_format],
35185 ))))
35186 }
35187 _ => Ok(Expression::Function(Box::new(Function::new(
35188 "PARSE_DATE".to_string(),
35189 vec![format, str_expr],
35190 )))),
35191 }
35192 }
35193
35194 // PARSE_TIMESTAMP(format, str) -> target-specific
35195 "PARSE_TIMESTAMP" if args.len() >= 2 => {
35196 let format = args.remove(0);
35197 let str_expr = args.remove(0);
35198 let tz = if !args.is_empty() {
35199 Some(args.remove(0))
35200 } else {
35201 None
35202 };
35203 match target {
35204 DialectType::DuckDB => {
35205 let duck_format = Self::bq_format_to_duckdb(&format);
35206 let strptime = Expression::Function(Box::new(Function::new(
35207 "STRPTIME".to_string(),
35208 vec![str_expr, duck_format],
35209 )));
35210 Ok(strptime)
35211 }
35212 _ => {
35213 let mut result_args = vec![format, str_expr];
35214 if let Some(tz_arg) = tz {
35215 result_args.push(tz_arg);
35216 }
35217 Ok(Expression::Function(Box::new(Function::new(
35218 "PARSE_TIMESTAMP".to_string(),
35219 result_args,
35220 ))))
35221 }
35222 }
35223 }
35224
35225 // FORMAT_DATE(format, date) -> target-specific
35226 "FORMAT_DATE" if args.len() == 2 => {
35227 let format = args.remove(0);
35228 let date_expr = args.remove(0);
35229 match target {
35230 DialectType::DuckDB => {
35231 // STRFTIME(CAST(date AS DATE), format)
35232 let cast_date = Expression::Cast(Box::new(Cast {
35233 this: date_expr,
35234 to: DataType::Date,
35235 trailing_comments: vec![],
35236 double_colon_syntax: false,
35237 format: None,
35238 default: None,
35239 inferred_type: None,
35240 }));
35241 Ok(Expression::Function(Box::new(Function::new(
35242 "STRFTIME".to_string(),
35243 vec![cast_date, format],
35244 ))))
35245 }
35246 _ => Ok(Expression::Function(Box::new(Function::new(
35247 "FORMAT_DATE".to_string(),
35248 vec![format, date_expr],
35249 )))),
35250 }
35251 }
35252
35253 // FORMAT_DATETIME(format, datetime) -> target-specific
35254 "FORMAT_DATETIME" if args.len() == 2 => {
35255 let format = args.remove(0);
35256 let dt_expr = args.remove(0);
35257
35258 if matches!(target, DialectType::BigQuery) {
35259 // BQ->BQ: normalize %H:%M:%S to %T, %x to %D
35260 let norm_format = Self::bq_format_normalize_bq(&format);
35261 // Also strip DATETIME keyword from typed literals
35262 let norm_dt = match dt_expr {
35263 Expression::Literal(lit)
35264 if matches!(lit.as_ref(), Literal::Timestamp(_)) =>
35265 {
35266 let Literal::Timestamp(s) = lit.as_ref() else {
35267 unreachable!()
35268 };
35269 Expression::Cast(Box::new(Cast {
35270 this: Expression::Literal(Box::new(Literal::String(s.clone()))),
35271 to: DataType::Custom {
35272 name: "DATETIME".to_string(),
35273 },
35274 trailing_comments: vec![],
35275 double_colon_syntax: false,
35276 format: None,
35277 default: None,
35278 inferred_type: None,
35279 }))
35280 }
35281 other => other,
35282 };
35283 return Ok(Expression::Function(Box::new(Function::new(
35284 "FORMAT_DATETIME".to_string(),
35285 vec![norm_format, norm_dt],
35286 ))));
35287 }
35288
35289 match target {
35290 DialectType::DuckDB => {
35291 // STRFTIME(CAST(dt AS TIMESTAMP), duckdb_format)
35292 let cast_dt = Self::ensure_cast_timestamp(dt_expr);
35293 let duck_format = Self::bq_format_to_duckdb(&format);
35294 Ok(Expression::Function(Box::new(Function::new(
35295 "STRFTIME".to_string(),
35296 vec![cast_dt, duck_format],
35297 ))))
35298 }
35299 _ => Ok(Expression::Function(Box::new(Function::new(
35300 "FORMAT_DATETIME".to_string(),
35301 vec![format, dt_expr],
35302 )))),
35303 }
35304 }
35305
35306 // FORMAT_TIMESTAMP(format, ts) -> target-specific
35307 "FORMAT_TIMESTAMP" if args.len() == 2 => {
35308 let format = args.remove(0);
35309 let ts_expr = args.remove(0);
35310 match target {
35311 DialectType::DuckDB => {
35312 // STRFTIME(CAST(CAST(ts AS TIMESTAMPTZ) AS TIMESTAMP), format)
35313 let cast_tstz = Self::ensure_cast_timestamptz(ts_expr);
35314 let cast_ts = Expression::Cast(Box::new(Cast {
35315 this: cast_tstz,
35316 to: DataType::Timestamp {
35317 timezone: false,
35318 precision: None,
35319 },
35320 trailing_comments: vec![],
35321 double_colon_syntax: false,
35322 format: None,
35323 default: None,
35324 inferred_type: None,
35325 }));
35326 Ok(Expression::Function(Box::new(Function::new(
35327 "STRFTIME".to_string(),
35328 vec![cast_ts, format],
35329 ))))
35330 }
35331 DialectType::Snowflake => {
35332 // TO_CHAR(CAST(CAST(ts AS TIMESTAMPTZ) AS TIMESTAMP), snowflake_format)
35333 let cast_tstz = Self::ensure_cast_timestamptz(ts_expr);
35334 let cast_ts = Expression::Cast(Box::new(Cast {
35335 this: cast_tstz,
35336 to: DataType::Timestamp {
35337 timezone: false,
35338 precision: None,
35339 },
35340 trailing_comments: vec![],
35341 double_colon_syntax: false,
35342 format: None,
35343 default: None,
35344 inferred_type: None,
35345 }));
35346 let sf_format = Self::bq_format_to_snowflake(&format);
35347 Ok(Expression::Function(Box::new(Function::new(
35348 "TO_CHAR".to_string(),
35349 vec![cast_ts, sf_format],
35350 ))))
35351 }
35352 _ => Ok(Expression::Function(Box::new(Function::new(
35353 "FORMAT_TIMESTAMP".to_string(),
35354 vec![format, ts_expr],
35355 )))),
35356 }
35357 }
35358
35359 // UNIX_DATE(date) -> DATE_DIFF('DAY', '1970-01-01', date) for DuckDB
35360 "UNIX_DATE" if args.len() == 1 => {
35361 let date = args.remove(0);
35362 match target {
35363 DialectType::DuckDB => {
35364 let epoch = Expression::Cast(Box::new(Cast {
35365 this: Expression::Literal(Box::new(Literal::String(
35366 "1970-01-01".to_string(),
35367 ))),
35368 to: DataType::Date,
35369 trailing_comments: vec![],
35370 double_colon_syntax: false,
35371 format: None,
35372 default: None,
35373 inferred_type: None,
35374 }));
35375 // DATE_DIFF('DAY', epoch, date) but date might be DATE '...' literal
35376 // Need to convert DATE literal to CAST
35377 let norm_date = Self::date_literal_to_cast(date);
35378 Ok(Expression::Function(Box::new(Function::new(
35379 "DATE_DIFF".to_string(),
35380 vec![
35381 Expression::Literal(Box::new(Literal::String("DAY".to_string()))),
35382 epoch,
35383 norm_date,
35384 ],
35385 ))))
35386 }
35387 _ => Ok(Expression::Function(Box::new(Function::new(
35388 "UNIX_DATE".to_string(),
35389 vec![date],
35390 )))),
35391 }
35392 }
35393
35394 // UNIX_SECONDS(ts) -> target-specific
35395 "UNIX_SECONDS" if args.len() == 1 => {
35396 let ts = args.remove(0);
35397 match target {
35398 DialectType::DuckDB => {
35399 // CAST(EPOCH(CAST(ts AS TIMESTAMPTZ)) AS BIGINT)
35400 let norm_ts = Self::ts_literal_to_cast_tz(ts);
35401 let epoch = Expression::Function(Box::new(Function::new(
35402 "EPOCH".to_string(),
35403 vec![norm_ts],
35404 )));
35405 Ok(Expression::Cast(Box::new(Cast {
35406 this: epoch,
35407 to: DataType::BigInt { length: None },
35408 trailing_comments: vec![],
35409 double_colon_syntax: false,
35410 format: None,
35411 default: None,
35412 inferred_type: None,
35413 })))
35414 }
35415 DialectType::Snowflake => {
35416 // TIMESTAMPDIFF(SECONDS, CAST('1970-01-01 00:00:00+00' AS TIMESTAMPTZ), ts)
35417 let epoch = Expression::Cast(Box::new(Cast {
35418 this: Expression::Literal(Box::new(Literal::String(
35419 "1970-01-01 00:00:00+00".to_string(),
35420 ))),
35421 to: DataType::Timestamp {
35422 timezone: true,
35423 precision: None,
35424 },
35425 trailing_comments: vec![],
35426 double_colon_syntax: false,
35427 format: None,
35428 default: None,
35429 inferred_type: None,
35430 }));
35431 Ok(Expression::Function(Box::new(Function::new(
35432 "TIMESTAMPDIFF".to_string(),
35433 vec![
35434 Expression::Identifier(Identifier::new("SECONDS".to_string())),
35435 epoch,
35436 ts,
35437 ],
35438 ))))
35439 }
35440 _ => Ok(Expression::Function(Box::new(Function::new(
35441 "UNIX_SECONDS".to_string(),
35442 vec![ts],
35443 )))),
35444 }
35445 }
35446
35447 // UNIX_MILLIS(ts) -> target-specific
35448 "UNIX_MILLIS" if args.len() == 1 => {
35449 let ts = args.remove(0);
35450 match target {
35451 DialectType::DuckDB => {
35452 let norm_ts = Self::ts_literal_to_cast_tz(ts);
35453 Ok(Expression::Function(Box::new(Function::new(
35454 "EPOCH_MS".to_string(),
35455 vec![norm_ts],
35456 ))))
35457 }
35458 _ => Ok(Expression::Function(Box::new(Function::new(
35459 "UNIX_MILLIS".to_string(),
35460 vec![ts],
35461 )))),
35462 }
35463 }
35464
35465 // UNIX_MICROS(ts) -> target-specific
35466 "UNIX_MICROS" if args.len() == 1 => {
35467 let ts = args.remove(0);
35468 match target {
35469 DialectType::DuckDB => {
35470 let norm_ts = Self::ts_literal_to_cast_tz(ts);
35471 Ok(Expression::Function(Box::new(Function::new(
35472 "EPOCH_US".to_string(),
35473 vec![norm_ts],
35474 ))))
35475 }
35476 _ => Ok(Expression::Function(Box::new(Function::new(
35477 "UNIX_MICROS".to_string(),
35478 vec![ts],
35479 )))),
35480 }
35481 }
35482
35483 // INSTR(str, substr) -> target-specific
35484 "INSTR" => {
35485 if matches!(target, DialectType::BigQuery) {
35486 // BQ->BQ: keep as INSTR
35487 Ok(Expression::Function(Box::new(Function::new(
35488 "INSTR".to_string(),
35489 args,
35490 ))))
35491 } else if matches!(target, DialectType::Snowflake) && args.len() == 2 {
35492 // Snowflake: CHARINDEX(substr, str) - swap args
35493 let str_expr = args.remove(0);
35494 let substr = args.remove(0);
35495 Ok(Expression::Function(Box::new(Function::new(
35496 "CHARINDEX".to_string(),
35497 vec![substr, str_expr],
35498 ))))
35499 } else {
35500 // Keep as INSTR for other targets
35501 Ok(Expression::Function(Box::new(Function::new(
35502 "INSTR".to_string(),
35503 args,
35504 ))))
35505 }
35506 }
35507
35508 // CURRENT_TIMESTAMP / CURRENT_DATE handling - parens normalization and timezone
35509 "CURRENT_TIMESTAMP" | "CURRENT_DATE" | "CURRENT_DATETIME" | "CURRENT_TIME" => {
35510 if matches!(target, DialectType::BigQuery) {
35511 // BQ->BQ: always output with parens (function form), keep any timezone arg
35512 Ok(Expression::Function(Box::new(Function::new(name, args))))
35513 } else if name == "CURRENT_DATE" && args.len() == 1 {
35514 // CURRENT_DATE('UTC') - has timezone arg
35515 let tz_arg = args.remove(0);
35516 match target {
35517 DialectType::DuckDB => {
35518 // CAST(CURRENT_TIMESTAMP AT TIME ZONE 'UTC' AS DATE)
35519 let ct = Expression::CurrentTimestamp(
35520 crate::expressions::CurrentTimestamp {
35521 precision: None,
35522 sysdate: false,
35523 },
35524 );
35525 let at_tz =
35526 Expression::AtTimeZone(Box::new(crate::expressions::AtTimeZone {
35527 this: ct,
35528 zone: tz_arg,
35529 }));
35530 Ok(Expression::Cast(Box::new(Cast {
35531 this: at_tz,
35532 to: DataType::Date,
35533 trailing_comments: vec![],
35534 double_colon_syntax: false,
35535 format: None,
35536 default: None,
35537 inferred_type: None,
35538 })))
35539 }
35540 DialectType::Snowflake => {
35541 // CAST(CONVERT_TIMEZONE('UTC', CURRENT_TIMESTAMP()) AS DATE)
35542 let ct = Expression::Function(Box::new(Function::new(
35543 "CURRENT_TIMESTAMP".to_string(),
35544 vec![],
35545 )));
35546 let convert = Expression::Function(Box::new(Function::new(
35547 "CONVERT_TIMEZONE".to_string(),
35548 vec![tz_arg, ct],
35549 )));
35550 Ok(Expression::Cast(Box::new(Cast {
35551 this: convert,
35552 to: DataType::Date,
35553 trailing_comments: vec![],
35554 double_colon_syntax: false,
35555 format: None,
35556 default: None,
35557 inferred_type: None,
35558 })))
35559 }
35560 _ => {
35561 // PostgreSQL, MySQL, etc.: CURRENT_DATE AT TIME ZONE 'UTC'
35562 let cd = Expression::CurrentDate(crate::expressions::CurrentDate);
35563 Ok(Expression::AtTimeZone(Box::new(
35564 crate::expressions::AtTimeZone {
35565 this: cd,
35566 zone: tz_arg,
35567 },
35568 )))
35569 }
35570 }
35571 } else if (name == "CURRENT_TIMESTAMP"
35572 || name == "CURRENT_TIME"
35573 || name == "CURRENT_DATE")
35574 && args.is_empty()
35575 && matches!(
35576 target,
35577 DialectType::PostgreSQL
35578 | DialectType::DuckDB
35579 | DialectType::Presto
35580 | DialectType::Trino
35581 )
35582 {
35583 // These targets want no-parens CURRENT_TIMESTAMP / CURRENT_DATE / CURRENT_TIME
35584 if name == "CURRENT_TIMESTAMP" {
35585 Ok(Expression::CurrentTimestamp(
35586 crate::expressions::CurrentTimestamp {
35587 precision: None,
35588 sysdate: false,
35589 },
35590 ))
35591 } else if name == "CURRENT_DATE" {
35592 Ok(Expression::CurrentDate(crate::expressions::CurrentDate))
35593 } else {
35594 // CURRENT_TIME
35595 Ok(Expression::CurrentTime(crate::expressions::CurrentTime {
35596 precision: None,
35597 }))
35598 }
35599 } else {
35600 // All other targets: keep as function (with parens)
35601 Ok(Expression::Function(Box::new(Function::new(name, args))))
35602 }
35603 }
35604
35605 // JSON_QUERY(json, path) -> target-specific
35606 "JSON_QUERY" if args.len() == 2 => {
35607 match target {
35608 DialectType::DuckDB | DialectType::SQLite => {
35609 // json -> path syntax
35610 let json_expr = args.remove(0);
35611 let path = args.remove(0);
35612 Ok(Expression::JsonExtract(Box::new(
35613 crate::expressions::JsonExtractFunc {
35614 this: json_expr,
35615 path,
35616 returning: None,
35617 arrow_syntax: true,
35618 hash_arrow_syntax: false,
35619 wrapper_option: None,
35620 quotes_option: None,
35621 on_scalar_string: false,
35622 on_error: None,
35623 },
35624 )))
35625 }
35626 DialectType::Spark | DialectType::Databricks | DialectType::Hive => {
35627 Ok(Expression::Function(Box::new(Function::new(
35628 "GET_JSON_OBJECT".to_string(),
35629 args,
35630 ))))
35631 }
35632 DialectType::PostgreSQL | DialectType::Redshift => Ok(Expression::Function(
35633 Box::new(Function::new("JSON_EXTRACT_PATH".to_string(), args)),
35634 )),
35635 _ => Ok(Expression::Function(Box::new(Function::new(
35636 "JSON_QUERY".to_string(),
35637 args,
35638 )))),
35639 }
35640 }
35641
35642 // JSON_VALUE_ARRAY(json, path) -> target-specific
35643 "JSON_VALUE_ARRAY" if args.len() == 2 => {
35644 match target {
35645 DialectType::DuckDB => {
35646 // CAST(json -> path AS TEXT[])
35647 let json_expr = args.remove(0);
35648 let path = args.remove(0);
35649 let arrow = Expression::JsonExtract(Box::new(
35650 crate::expressions::JsonExtractFunc {
35651 this: json_expr,
35652 path,
35653 returning: None,
35654 arrow_syntax: true,
35655 hash_arrow_syntax: false,
35656 wrapper_option: None,
35657 quotes_option: None,
35658 on_scalar_string: false,
35659 on_error: None,
35660 },
35661 ));
35662 Ok(Expression::Cast(Box::new(Cast {
35663 this: arrow,
35664 to: DataType::Array {
35665 element_type: Box::new(DataType::Text),
35666 dimension: None,
35667 },
35668 trailing_comments: vec![],
35669 double_colon_syntax: false,
35670 format: None,
35671 default: None,
35672 inferred_type: None,
35673 })))
35674 }
35675 DialectType::Snowflake => {
35676 let json_expr = args.remove(0);
35677 let path_expr = args.remove(0);
35678 // Convert JSON path from $.path to just path
35679 let sf_path = if let Expression::Literal(ref lit) = path_expr {
35680 if let Literal::String(ref s) = lit.as_ref() {
35681 let trimmed = s.trim_start_matches('$').trim_start_matches('.');
35682 Expression::Literal(Box::new(Literal::String(trimmed.to_string())))
35683 } else {
35684 path_expr.clone()
35685 }
35686 } else {
35687 path_expr
35688 };
35689 let parse_json = Expression::Function(Box::new(Function::new(
35690 "PARSE_JSON".to_string(),
35691 vec![json_expr],
35692 )));
35693 let get_path = Expression::Function(Box::new(Function::new(
35694 "GET_PATH".to_string(),
35695 vec![parse_json, sf_path],
35696 )));
35697 // TRANSFORM(get_path, x -> CAST(x AS VARCHAR))
35698 let cast_expr = Expression::Cast(Box::new(Cast {
35699 this: Expression::Identifier(Identifier::new("x")),
35700 to: DataType::VarChar {
35701 length: None,
35702 parenthesized_length: false,
35703 },
35704 trailing_comments: vec![],
35705 double_colon_syntax: false,
35706 format: None,
35707 default: None,
35708 inferred_type: None,
35709 }));
35710 let lambda = Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
35711 parameters: vec![Identifier::new("x")],
35712 body: cast_expr,
35713 colon: false,
35714 parameter_types: vec![],
35715 }));
35716 Ok(Expression::Function(Box::new(Function::new(
35717 "TRANSFORM".to_string(),
35718 vec![get_path, lambda],
35719 ))))
35720 }
35721 _ => Ok(Expression::Function(Box::new(Function::new(
35722 "JSON_VALUE_ARRAY".to_string(),
35723 args,
35724 )))),
35725 }
35726 }
35727
35728 // BigQuery REGEXP_EXTRACT(val, regex[, position[, occurrence]]) -> target dialects
35729 // BigQuery's 3rd arg is "position" (starting char index), 4th is "occurrence" (which match to return)
35730 // This is different from Hive/Spark where 3rd arg is "group_index"
35731 "REGEXP_EXTRACT" if matches!(source, DialectType::BigQuery) => {
35732 match target {
35733 DialectType::DuckDB
35734 | DialectType::Presto
35735 | DialectType::Trino
35736 | DialectType::Athena => {
35737 if args.len() == 2 {
35738 // REGEXP_EXTRACT(val, regex) -> REGEXP_EXTRACT(val, regex, 1)
35739 args.push(Expression::number(1));
35740 Ok(Expression::Function(Box::new(Function::new(
35741 "REGEXP_EXTRACT".to_string(),
35742 args,
35743 ))))
35744 } else if args.len() == 3 {
35745 let val = args.remove(0);
35746 let regex = args.remove(0);
35747 let position = args.remove(0);
35748 let is_pos_1 = matches!(&position, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(n) if n == "1"));
35749 if is_pos_1 {
35750 Ok(Expression::Function(Box::new(Function::new(
35751 "REGEXP_EXTRACT".to_string(),
35752 vec![val, regex, Expression::number(1)],
35753 ))))
35754 } else {
35755 let substring_expr = Expression::Function(Box::new(Function::new(
35756 "SUBSTRING".to_string(),
35757 vec![val, position],
35758 )));
35759 let nullif_expr = Expression::Function(Box::new(Function::new(
35760 "NULLIF".to_string(),
35761 vec![
35762 substring_expr,
35763 Expression::Literal(Box::new(Literal::String(
35764 String::new(),
35765 ))),
35766 ],
35767 )));
35768 Ok(Expression::Function(Box::new(Function::new(
35769 "REGEXP_EXTRACT".to_string(),
35770 vec![nullif_expr, regex, Expression::number(1)],
35771 ))))
35772 }
35773 } else if args.len() == 4 {
35774 let val = args.remove(0);
35775 let regex = args.remove(0);
35776 let position = args.remove(0);
35777 let occurrence = args.remove(0);
35778 let is_pos_1 = matches!(&position, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(n) if n == "1"));
35779 let is_occ_1 = matches!(&occurrence, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(n) if n == "1"));
35780 if is_pos_1 && is_occ_1 {
35781 Ok(Expression::Function(Box::new(Function::new(
35782 "REGEXP_EXTRACT".to_string(),
35783 vec![val, regex, Expression::number(1)],
35784 ))))
35785 } else {
35786 let subject = if is_pos_1 {
35787 val
35788 } else {
35789 let substring_expr = Expression::Function(Box::new(
35790 Function::new("SUBSTRING".to_string(), vec![val, position]),
35791 ));
35792 Expression::Function(Box::new(Function::new(
35793 "NULLIF".to_string(),
35794 vec![
35795 substring_expr,
35796 Expression::Literal(Box::new(Literal::String(
35797 String::new(),
35798 ))),
35799 ],
35800 )))
35801 };
35802 let extract_all = Expression::Function(Box::new(Function::new(
35803 "REGEXP_EXTRACT_ALL".to_string(),
35804 vec![subject, regex, Expression::number(1)],
35805 )));
35806 Ok(Expression::Function(Box::new(Function::new(
35807 "ARRAY_EXTRACT".to_string(),
35808 vec![extract_all, occurrence],
35809 ))))
35810 }
35811 } else {
35812 Ok(Expression::Function(Box::new(Function {
35813 name: f.name,
35814 args,
35815 distinct: f.distinct,
35816 trailing_comments: f.trailing_comments,
35817 use_bracket_syntax: f.use_bracket_syntax,
35818 no_parens: f.no_parens,
35819 quoted: f.quoted,
35820 span: None,
35821 inferred_type: None,
35822 })))
35823 }
35824 }
35825 DialectType::Snowflake => {
35826 // BigQuery REGEXP_EXTRACT -> Snowflake REGEXP_SUBSTR
35827 Ok(Expression::Function(Box::new(Function::new(
35828 "REGEXP_SUBSTR".to_string(),
35829 args,
35830 ))))
35831 }
35832 _ => {
35833 // For other targets (Hive/Spark/BigQuery): pass through as-is
35834 // BigQuery's default group behavior matches Hive/Spark for 2-arg case
35835 Ok(Expression::Function(Box::new(Function {
35836 name: f.name,
35837 args,
35838 distinct: f.distinct,
35839 trailing_comments: f.trailing_comments,
35840 use_bracket_syntax: f.use_bracket_syntax,
35841 no_parens: f.no_parens,
35842 quoted: f.quoted,
35843 span: None,
35844 inferred_type: None,
35845 })))
35846 }
35847 }
35848 }
35849
35850 // BigQuery STRUCT(args) -> target-specific struct expression
35851 "STRUCT" => {
35852 // Convert Function args to Struct fields
35853 let mut fields: Vec<(Option<String>, Expression)> = Vec::new();
35854 for (i, arg) in args.into_iter().enumerate() {
35855 match arg {
35856 Expression::Alias(a) => {
35857 // Named field: expr AS name
35858 fields.push((Some(a.alias.name.clone()), a.this));
35859 }
35860 other => {
35861 // Unnamed field: for Spark/Hive, keep as None
35862 // For Snowflake, auto-name as _N
35863 // For DuckDB, use column name for column refs, _N for others
35864 if matches!(target, DialectType::Snowflake) {
35865 fields.push((Some(format!("_{}", i)), other));
35866 } else if matches!(target, DialectType::DuckDB) {
35867 let auto_name = match &other {
35868 Expression::Column(col) => col.name.name.clone(),
35869 _ => format!("_{}", i),
35870 };
35871 fields.push((Some(auto_name), other));
35872 } else {
35873 fields.push((None, other));
35874 }
35875 }
35876 }
35877 }
35878
35879 match target {
35880 DialectType::Snowflake => {
35881 // OBJECT_CONSTRUCT('name', value, ...)
35882 let mut oc_args = Vec::new();
35883 for (name, val) in &fields {
35884 if let Some(n) = name {
35885 oc_args.push(Expression::Literal(Box::new(Literal::String(
35886 n.clone(),
35887 ))));
35888 oc_args.push(val.clone());
35889 } else {
35890 oc_args.push(val.clone());
35891 }
35892 }
35893 Ok(Expression::Function(Box::new(Function::new(
35894 "OBJECT_CONSTRUCT".to_string(),
35895 oc_args,
35896 ))))
35897 }
35898 DialectType::DuckDB => {
35899 // {'name': value, ...}
35900 Ok(Expression::Struct(Box::new(crate::expressions::Struct {
35901 fields,
35902 })))
35903 }
35904 DialectType::Hive => {
35905 // STRUCT(val1, val2, ...) - strip aliases
35906 let hive_fields: Vec<(Option<String>, Expression)> =
35907 fields.into_iter().map(|(_, v)| (None, v)).collect();
35908 Ok(Expression::Struct(Box::new(crate::expressions::Struct {
35909 fields: hive_fields,
35910 })))
35911 }
35912 DialectType::Spark | DialectType::Databricks => {
35913 // Use Expression::Struct to bypass Spark target transform auto-naming
35914 Ok(Expression::Struct(Box::new(crate::expressions::Struct {
35915 fields,
35916 })))
35917 }
35918 DialectType::Presto | DialectType::Trino | DialectType::Athena => {
35919 // Check if all fields are named AND all have inferable types - if so, wrap in CAST(ROW(...) AS ROW(name TYPE, ...))
35920 let all_named =
35921 !fields.is_empty() && fields.iter().all(|(name, _)| name.is_some());
35922 let all_types_inferable = all_named
35923 && fields
35924 .iter()
35925 .all(|(_, val)| Self::can_infer_presto_type(val));
35926 let row_args: Vec<Expression> =
35927 fields.iter().map(|(_, v)| v.clone()).collect();
35928 let row_expr = Expression::Function(Box::new(Function::new(
35929 "ROW".to_string(),
35930 row_args,
35931 )));
35932 if all_named && all_types_inferable {
35933 // Build ROW type with inferred types
35934 let mut row_type_fields = Vec::new();
35935 for (name, val) in &fields {
35936 if let Some(n) = name {
35937 let type_str = Self::infer_sql_type_for_presto(val);
35938 row_type_fields.push(crate::expressions::StructField::new(
35939 n.clone(),
35940 crate::expressions::DataType::Custom { name: type_str },
35941 ));
35942 }
35943 }
35944 let row_type = crate::expressions::DataType::Struct {
35945 fields: row_type_fields,
35946 nested: true,
35947 };
35948 Ok(Expression::Cast(Box::new(Cast {
35949 this: row_expr,
35950 to: row_type,
35951 trailing_comments: Vec::new(),
35952 double_colon_syntax: false,
35953 format: None,
35954 default: None,
35955 inferred_type: None,
35956 })))
35957 } else {
35958 Ok(row_expr)
35959 }
35960 }
35961 _ => {
35962 // Default: keep as STRUCT function with original args
35963 let mut new_args = Vec::new();
35964 for (name, val) in fields {
35965 if let Some(n) = name {
35966 new_args.push(Expression::Alias(Box::new(
35967 crate::expressions::Alias::new(val, Identifier::new(n)),
35968 )));
35969 } else {
35970 new_args.push(val);
35971 }
35972 }
35973 Ok(Expression::Function(Box::new(Function::new(
35974 "STRUCT".to_string(),
35975 new_args,
35976 ))))
35977 }
35978 }
35979 }
35980
35981 // ROUND(x, n, 'ROUND_HALF_EVEN') -> ROUND_EVEN(x, n) for DuckDB
35982 "ROUND" if args.len() == 3 => {
35983 let x = args.remove(0);
35984 let n = args.remove(0);
35985 let mode = args.remove(0);
35986 // Check if mode is 'ROUND_HALF_EVEN'
35987 let is_half_even = matches!(&mode, Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(s) if s.eq_ignore_ascii_case("ROUND_HALF_EVEN")));
35988 if is_half_even && matches!(target, DialectType::DuckDB) {
35989 Ok(Expression::Function(Box::new(Function::new(
35990 "ROUND_EVEN".to_string(),
35991 vec![x, n],
35992 ))))
35993 } else {
35994 // Pass through with all args
35995 Ok(Expression::Function(Box::new(Function::new(
35996 "ROUND".to_string(),
35997 vec![x, n, mode],
35998 ))))
35999 }
36000 }
36001
36002 // MAKE_INTERVAL(year, month, named_args...) -> INTERVAL string for Snowflake/DuckDB
36003 "MAKE_INTERVAL" => {
36004 // MAKE_INTERVAL(1, 2, minute => 5, day => 3)
36005 // The positional args are: year, month
36006 // Named args are: day =>, minute =>, etc.
36007 // For Snowflake: INTERVAL '1 year, 2 month, 5 minute, 3 day'
36008 // For DuckDB: INTERVAL '1 year 2 month 5 minute 3 day'
36009 // For BigQuery->BigQuery: reorder named args (day before minute)
36010 if matches!(target, DialectType::Snowflake | DialectType::DuckDB) {
36011 let mut parts: Vec<(String, String)> = Vec::new();
36012 let mut pos_idx = 0;
36013 let pos_units = ["year", "month"];
36014 for arg in &args {
36015 if let Expression::NamedArgument(na) = arg {
36016 // Named arg like minute => 5
36017 let unit = na.name.name.clone();
36018 if let Expression::Literal(lit) = &na.value {
36019 if let Literal::Number(n) = lit.as_ref() {
36020 parts.push((unit, n.clone()));
36021 }
36022 }
36023 } else if pos_idx < pos_units.len() {
36024 if let Expression::Literal(lit) = arg {
36025 if let Literal::Number(n) = lit.as_ref() {
36026 parts.push((pos_units[pos_idx].to_string(), n.clone()));
36027 }
36028 }
36029 pos_idx += 1;
36030 }
36031 }
36032 // Don't sort - preserve original argument order
36033 let separator = if matches!(target, DialectType::Snowflake) {
36034 ", "
36035 } else {
36036 " "
36037 };
36038 let interval_str = parts
36039 .iter()
36040 .map(|(u, v)| format!("{} {}", v, u))
36041 .collect::<Vec<_>>()
36042 .join(separator);
36043 Ok(Expression::Interval(Box::new(
36044 crate::expressions::Interval {
36045 this: Some(Expression::Literal(Box::new(Literal::String(
36046 interval_str,
36047 )))),
36048 unit: None,
36049 },
36050 )))
36051 } else if matches!(target, DialectType::BigQuery) {
36052 // BigQuery->BigQuery: reorder named args (day, minute, etc.)
36053 let mut positional = Vec::new();
36054 let mut named: Vec<(
36055 String,
36056 Expression,
36057 crate::expressions::NamedArgSeparator,
36058 )> = Vec::new();
36059 let _pos_units = ["year", "month"];
36060 let mut _pos_idx = 0;
36061 for arg in args {
36062 if let Expression::NamedArgument(na) = arg {
36063 named.push((na.name.name.clone(), na.value, na.separator));
36064 } else {
36065 positional.push(arg);
36066 _pos_idx += 1;
36067 }
36068 }
36069 // Sort named args by: day, hour, minute, second
36070 let unit_order = |u: &str| -> usize {
36071 match u.to_ascii_lowercase().as_str() {
36072 "day" => 0,
36073 "hour" => 1,
36074 "minute" => 2,
36075 "second" => 3,
36076 _ => 4,
36077 }
36078 };
36079 named.sort_by_key(|(u, _, _)| unit_order(u));
36080 let mut result_args = positional;
36081 for (name, value, sep) in named {
36082 result_args.push(Expression::NamedArgument(Box::new(
36083 crate::expressions::NamedArgument {
36084 name: Identifier::new(&name),
36085 value,
36086 separator: sep,
36087 },
36088 )));
36089 }
36090 Ok(Expression::Function(Box::new(Function::new(
36091 "MAKE_INTERVAL".to_string(),
36092 result_args,
36093 ))))
36094 } else {
36095 Ok(Expression::Function(Box::new(Function::new(
36096 "MAKE_INTERVAL".to_string(),
36097 args,
36098 ))))
36099 }
36100 }
36101
36102 // ARRAY_TO_STRING(array, sep, null_text) -> ARRAY_TO_STRING(LIST_TRANSFORM(array, x -> COALESCE(x, null_text)), sep) for DuckDB
36103 "ARRAY_TO_STRING" if args.len() == 3 => {
36104 let arr = args.remove(0);
36105 let sep = args.remove(0);
36106 let null_text = args.remove(0);
36107 match target {
36108 DialectType::DuckDB => {
36109 // LIST_TRANSFORM(array, x -> COALESCE(x, null_text))
36110 let _lambda_param =
36111 Expression::Identifier(crate::expressions::Identifier::new("x"));
36112 let coalesce =
36113 Expression::Coalesce(Box::new(crate::expressions::VarArgFunc {
36114 original_name: None,
36115 expressions: vec![
36116 Expression::Identifier(crate::expressions::Identifier::new(
36117 "x",
36118 )),
36119 null_text,
36120 ],
36121 inferred_type: None,
36122 }));
36123 let lambda = Expression::Lambda(Box::new(crate::expressions::LambdaExpr {
36124 parameters: vec![crate::expressions::Identifier::new("x")],
36125 body: coalesce,
36126 colon: false,
36127 parameter_types: vec![],
36128 }));
36129 let list_transform = Expression::Function(Box::new(Function::new(
36130 "LIST_TRANSFORM".to_string(),
36131 vec![arr, lambda],
36132 )));
36133 Ok(Expression::Function(Box::new(Function::new(
36134 "ARRAY_TO_STRING".to_string(),
36135 vec![list_transform, sep],
36136 ))))
36137 }
36138 _ => Ok(Expression::Function(Box::new(Function::new(
36139 "ARRAY_TO_STRING".to_string(),
36140 vec![arr, sep, null_text],
36141 )))),
36142 }
36143 }
36144
36145 // LENGTH(x) -> CASE TYPEOF(x) ... for DuckDB
36146 "LENGTH" if args.len() == 1 => {
36147 let arg = args.remove(0);
36148 match target {
36149 DialectType::DuckDB => {
36150 // CASE TYPEOF(foo) WHEN 'BLOB' THEN OCTET_LENGTH(CAST(foo AS BLOB)) ELSE LENGTH(CAST(foo AS TEXT)) END
36151 let typeof_func = Expression::Function(Box::new(Function::new(
36152 "TYPEOF".to_string(),
36153 vec![arg.clone()],
36154 )));
36155 let blob_cast = Expression::Cast(Box::new(Cast {
36156 this: arg.clone(),
36157 to: DataType::VarBinary { length: None },
36158 trailing_comments: vec![],
36159 double_colon_syntax: false,
36160 format: None,
36161 default: None,
36162 inferred_type: None,
36163 }));
36164 let octet_length = Expression::Function(Box::new(Function::new(
36165 "OCTET_LENGTH".to_string(),
36166 vec![blob_cast],
36167 )));
36168 let text_cast = Expression::Cast(Box::new(Cast {
36169 this: arg,
36170 to: DataType::Text,
36171 trailing_comments: vec![],
36172 double_colon_syntax: false,
36173 format: None,
36174 default: None,
36175 inferred_type: None,
36176 }));
36177 let length_text = Expression::Function(Box::new(Function::new(
36178 "LENGTH".to_string(),
36179 vec![text_cast],
36180 )));
36181 Ok(Expression::Case(Box::new(crate::expressions::Case {
36182 operand: Some(typeof_func),
36183 whens: vec![(
36184 Expression::Literal(Box::new(Literal::String("BLOB".to_string()))),
36185 octet_length,
36186 )],
36187 else_: Some(length_text),
36188 comments: Vec::new(),
36189 inferred_type: None,
36190 })))
36191 }
36192 _ => Ok(Expression::Function(Box::new(Function::new(
36193 "LENGTH".to_string(),
36194 vec![arg],
36195 )))),
36196 }
36197 }
36198
36199 // PERCENTILE_CONT(x, fraction RESPECT NULLS) -> QUANTILE_CONT(x, fraction) for DuckDB
36200 "PERCENTILE_CONT" if args.len() >= 2 && matches!(source, DialectType::BigQuery) => {
36201 // BigQuery PERCENTILE_CONT(x, fraction [RESPECT|IGNORE NULLS]) OVER ()
36202 // The args should be [x, fraction] with the null handling stripped
36203 // For DuckDB: QUANTILE_CONT(x, fraction)
36204 // For Spark: PERCENTILE_CONT(x, fraction) RESPECT NULLS (handled at window level)
36205 match target {
36206 DialectType::DuckDB => {
36207 // Strip down to just 2 args, rename to QUANTILE_CONT
36208 let x = args[0].clone();
36209 let frac = args[1].clone();
36210 Ok(Expression::Function(Box::new(Function::new(
36211 "QUANTILE_CONT".to_string(),
36212 vec![x, frac],
36213 ))))
36214 }
36215 _ => Ok(Expression::Function(Box::new(Function::new(
36216 "PERCENTILE_CONT".to_string(),
36217 args,
36218 )))),
36219 }
36220 }
36221
36222 // All others: pass through
36223 _ => Ok(Expression::Function(Box::new(Function {
36224 name: f.name,
36225 args,
36226 distinct: f.distinct,
36227 trailing_comments: f.trailing_comments,
36228 use_bracket_syntax: f.use_bracket_syntax,
36229 no_parens: f.no_parens,
36230 quoted: f.quoted,
36231 span: None,
36232 inferred_type: None,
36233 }))),
36234 }
36235 }
36236
36237 /// Check if we can reliably infer the SQL type for Presto/Trino ROW CAST.
36238 /// Returns false for column references and other non-literal expressions where the type is unknown.
36239 fn can_infer_presto_type(expr: &Expression) -> bool {
36240 match expr {
36241 Expression::Literal(_) => true,
36242 Expression::Boolean(_) => true,
36243 Expression::Array(_) | Expression::ArrayFunc(_) => true,
36244 Expression::Struct(_) | Expression::StructFunc(_) => true,
36245 Expression::Function(f) => {
36246 f.name.eq_ignore_ascii_case("STRUCT")
36247 || f.name.eq_ignore_ascii_case("ROW")
36248 || f.name.eq_ignore_ascii_case("CURRENT_DATE")
36249 || f.name.eq_ignore_ascii_case("CURRENT_TIMESTAMP")
36250 || f.name.eq_ignore_ascii_case("NOW")
36251 }
36252 Expression::Cast(_) => true,
36253 Expression::Neg(inner) => Self::can_infer_presto_type(&inner.this),
36254 _ => false,
36255 }
36256 }
36257
36258 /// Infer SQL type name for a Presto/Trino ROW CAST from a literal expression
36259 fn infer_sql_type_for_presto(expr: &Expression) -> String {
36260 use crate::expressions::Literal;
36261 match expr {
36262 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)) => {
36263 "VARCHAR".to_string()
36264 }
36265 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(_)) => {
36266 let Literal::Number(n) = lit.as_ref() else {
36267 unreachable!()
36268 };
36269 if n.contains('.') {
36270 "DOUBLE".to_string()
36271 } else {
36272 "INTEGER".to_string()
36273 }
36274 }
36275 Expression::Boolean(_) => "BOOLEAN".to_string(),
36276 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Date(_)) => {
36277 "DATE".to_string()
36278 }
36279 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Timestamp(_)) => {
36280 "TIMESTAMP".to_string()
36281 }
36282 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Datetime(_)) => {
36283 "TIMESTAMP".to_string()
36284 }
36285 Expression::Array(_) | Expression::ArrayFunc(_) => "ARRAY(VARCHAR)".to_string(),
36286 Expression::Struct(_) | Expression::StructFunc(_) => "ROW".to_string(),
36287 Expression::Function(f) => {
36288 if f.name.eq_ignore_ascii_case("STRUCT") || f.name.eq_ignore_ascii_case("ROW") {
36289 "ROW".to_string()
36290 } else if f.name.eq_ignore_ascii_case("CURRENT_DATE") {
36291 "DATE".to_string()
36292 } else if f.name.eq_ignore_ascii_case("CURRENT_TIMESTAMP")
36293 || f.name.eq_ignore_ascii_case("NOW")
36294 {
36295 "TIMESTAMP".to_string()
36296 } else {
36297 "VARCHAR".to_string()
36298 }
36299 }
36300 Expression::Cast(c) => {
36301 // If already cast, use the target type
36302 Self::data_type_to_presto_string(&c.to)
36303 }
36304 _ => "VARCHAR".to_string(),
36305 }
36306 }
36307
36308 /// Convert a DataType to its Presto/Trino string representation for ROW type
36309 fn data_type_to_presto_string(dt: &crate::expressions::DataType) -> String {
36310 use crate::expressions::DataType;
36311 match dt {
36312 DataType::VarChar { .. } | DataType::Text | DataType::String { .. } => {
36313 "VARCHAR".to_string()
36314 }
36315 DataType::Int { .. }
36316 | DataType::BigInt { .. }
36317 | DataType::SmallInt { .. }
36318 | DataType::TinyInt { .. } => "INTEGER".to_string(),
36319 DataType::Float { .. } | DataType::Double { .. } => "DOUBLE".to_string(),
36320 DataType::Boolean => "BOOLEAN".to_string(),
36321 DataType::Date => "DATE".to_string(),
36322 DataType::Timestamp { .. } => "TIMESTAMP".to_string(),
36323 DataType::Struct { fields, .. } => {
36324 let field_strs: Vec<String> = fields
36325 .iter()
36326 .map(|f| {
36327 format!(
36328 "{} {}",
36329 f.name,
36330 Self::data_type_to_presto_string(&f.data_type)
36331 )
36332 })
36333 .collect();
36334 format!("ROW({})", field_strs.join(", "))
36335 }
36336 DataType::Array { element_type, .. } => {
36337 format!("ARRAY({})", Self::data_type_to_presto_string(element_type))
36338 }
36339 DataType::Custom { name } => {
36340 // Pass through custom type names (e.g., "INTEGER", "VARCHAR" from earlier inference)
36341 name.clone()
36342 }
36343 _ => "VARCHAR".to_string(),
36344 }
36345 }
36346
36347 /// Convert IntervalUnit to string
36348 fn interval_unit_to_string(unit: &crate::expressions::IntervalUnit) -> &'static str {
36349 match unit {
36350 crate::expressions::IntervalUnit::Year => "YEAR",
36351 crate::expressions::IntervalUnit::Quarter => "QUARTER",
36352 crate::expressions::IntervalUnit::Month => "MONTH",
36353 crate::expressions::IntervalUnit::Week => "WEEK",
36354 crate::expressions::IntervalUnit::Day => "DAY",
36355 crate::expressions::IntervalUnit::Hour => "HOUR",
36356 crate::expressions::IntervalUnit::Minute => "MINUTE",
36357 crate::expressions::IntervalUnit::Second => "SECOND",
36358 crate::expressions::IntervalUnit::Millisecond => "MILLISECOND",
36359 crate::expressions::IntervalUnit::Microsecond => "MICROSECOND",
36360 crate::expressions::IntervalUnit::Nanosecond => "NANOSECOND",
36361 }
36362 }
36363
36364 /// Extract unit string from an expression (uppercased)
36365 fn get_unit_str_static(expr: &Expression) -> String {
36366 use crate::expressions::Literal;
36367 match expr {
36368 Expression::Identifier(id) => id.name.to_ascii_uppercase(),
36369 Expression::Var(v) => v.this.to_ascii_uppercase(),
36370 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)) => {
36371 let Literal::String(s) = lit.as_ref() else {
36372 unreachable!()
36373 };
36374 s.to_ascii_uppercase()
36375 }
36376 Expression::Column(col) => col.name.name.to_ascii_uppercase(),
36377 Expression::Function(f) => {
36378 let base = f.name.to_ascii_uppercase();
36379 if !f.args.is_empty() {
36380 let inner = Self::get_unit_str_static(&f.args[0]);
36381 format!("{}({})", base, inner)
36382 } else {
36383 base
36384 }
36385 }
36386 _ => "DAY".to_string(),
36387 }
36388 }
36389
36390 /// Parse unit string to IntervalUnit
36391 fn parse_interval_unit_static(s: &str) -> crate::expressions::IntervalUnit {
36392 match s {
36393 "YEAR" | "YY" | "YYYY" => crate::expressions::IntervalUnit::Year,
36394 "QUARTER" | "QQ" | "Q" => crate::expressions::IntervalUnit::Quarter,
36395 "MONTH" | "MM" | "M" => crate::expressions::IntervalUnit::Month,
36396 "WEEK" | "WK" | "WW" | "ISOWEEK" => crate::expressions::IntervalUnit::Week,
36397 "DAY" | "DD" | "D" | "DY" => crate::expressions::IntervalUnit::Day,
36398 "HOUR" | "HH" => crate::expressions::IntervalUnit::Hour,
36399 "MINUTE" | "MI" | "N" => crate::expressions::IntervalUnit::Minute,
36400 "SECOND" | "SS" | "S" => crate::expressions::IntervalUnit::Second,
36401 "MILLISECOND" | "MS" => crate::expressions::IntervalUnit::Millisecond,
36402 "MICROSECOND" | "MCS" | "US" => crate::expressions::IntervalUnit::Microsecond,
36403 _ if s.starts_with("WEEK(") => crate::expressions::IntervalUnit::Week,
36404 _ => crate::expressions::IntervalUnit::Day,
36405 }
36406 }
36407
36408 /// Convert expression to simple string for interval building
36409 fn expr_to_string_static(expr: &Expression) -> String {
36410 use crate::expressions::Literal;
36411 match expr {
36412 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(_)) => {
36413 let Literal::Number(s) = lit.as_ref() else {
36414 unreachable!()
36415 };
36416 s.clone()
36417 }
36418 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)) => {
36419 let Literal::String(s) = lit.as_ref() else {
36420 unreachable!()
36421 };
36422 s.clone()
36423 }
36424 Expression::Identifier(id) => id.name.clone(),
36425 Expression::Neg(f) => format!("-{}", Self::expr_to_string_static(&f.this)),
36426 _ => "1".to_string(),
36427 }
36428 }
36429
36430 /// Extract a simple string representation from a literal expression
36431 fn expr_to_string(expr: &Expression) -> String {
36432 use crate::expressions::Literal;
36433 match expr {
36434 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(_)) => {
36435 let Literal::Number(s) = lit.as_ref() else {
36436 unreachable!()
36437 };
36438 s.clone()
36439 }
36440 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)) => {
36441 let Literal::String(s) = lit.as_ref() else {
36442 unreachable!()
36443 };
36444 s.clone()
36445 }
36446 Expression::Neg(f) => format!("-{}", Self::expr_to_string(&f.this)),
36447 Expression::Identifier(id) => id.name.clone(),
36448 _ => "1".to_string(),
36449 }
36450 }
36451
36452 /// Quote an interval value expression as a string literal if it's a number (or negated number)
36453 fn quote_interval_val(expr: &Expression) -> Expression {
36454 use crate::expressions::Literal;
36455 match expr {
36456 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(_)) => {
36457 let Literal::Number(n) = lit.as_ref() else {
36458 unreachable!()
36459 };
36460 Expression::Literal(Box::new(Literal::String(n.clone())))
36461 }
36462 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)) => expr.clone(),
36463 Expression::Neg(inner) => {
36464 if let Expression::Literal(lit) = &inner.this {
36465 if let Literal::Number(n) = lit.as_ref() {
36466 Expression::Literal(Box::new(Literal::String(format!("-{}", n))))
36467 } else {
36468 inner.this.clone()
36469 }
36470 } else {
36471 expr.clone()
36472 }
36473 }
36474 _ => expr.clone(),
36475 }
36476 }
36477
36478 /// Check if a timestamp string contains timezone info (offset like +02:00, or named timezone)
36479 fn timestamp_string_has_timezone(ts: &str) -> bool {
36480 let trimmed = ts.trim();
36481 // Check for numeric timezone offsets: +N, -N, +NN:NN, -NN:NN at end
36482 if let Some(last_space) = trimmed.rfind(' ') {
36483 let suffix = &trimmed[last_space + 1..];
36484 if (suffix.starts_with('+') || suffix.starts_with('-')) && suffix.len() > 1 {
36485 let rest = &suffix[1..];
36486 if rest.chars().all(|c| c.is_ascii_digit() || c == ':') {
36487 return true;
36488 }
36489 }
36490 }
36491 // Check for named timezone abbreviations
36492 let ts_lower = trimmed.to_ascii_lowercase();
36493 let tz_abbrevs = [" utc", " gmt", " cet", " est", " pst", " cst", " mst"];
36494 for abbrev in &tz_abbrevs {
36495 if ts_lower.ends_with(abbrev) {
36496 return true;
36497 }
36498 }
36499 false
36500 }
36501
36502 /// Maybe CAST timestamp literal to TIMESTAMPTZ for Snowflake
36503 fn maybe_cast_ts_to_tz(expr: Expression, func_name: &str) -> Expression {
36504 use crate::expressions::{Cast, DataType, Literal};
36505 match expr {
36506 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Timestamp(_)) => {
36507 let Literal::Timestamp(s) = lit.as_ref() else {
36508 unreachable!()
36509 };
36510 let tz = func_name.starts_with("TIMESTAMP");
36511 Expression::Cast(Box::new(Cast {
36512 this: Expression::Literal(Box::new(Literal::String(s.clone()))),
36513 to: if tz {
36514 DataType::Timestamp {
36515 timezone: true,
36516 precision: None,
36517 }
36518 } else {
36519 DataType::Timestamp {
36520 timezone: false,
36521 precision: None,
36522 }
36523 },
36524 trailing_comments: vec![],
36525 double_colon_syntax: false,
36526 format: None,
36527 default: None,
36528 inferred_type: None,
36529 }))
36530 }
36531 other => other,
36532 }
36533 }
36534
36535 /// Maybe CAST timestamp literal to TIMESTAMP (no tz)
36536 fn maybe_cast_ts(expr: Expression) -> Expression {
36537 use crate::expressions::{Cast, DataType, Literal};
36538 match expr {
36539 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Timestamp(_)) => {
36540 let Literal::Timestamp(s) = lit.as_ref() else {
36541 unreachable!()
36542 };
36543 Expression::Cast(Box::new(Cast {
36544 this: Expression::Literal(Box::new(Literal::String(s.clone()))),
36545 to: DataType::Timestamp {
36546 timezone: false,
36547 precision: None,
36548 },
36549 trailing_comments: vec![],
36550 double_colon_syntax: false,
36551 format: None,
36552 default: None,
36553 inferred_type: None,
36554 }))
36555 }
36556 other => other,
36557 }
36558 }
36559
36560 /// Convert DATE 'x' literal to CAST('x' AS DATE)
36561 fn date_literal_to_cast(expr: Expression) -> Expression {
36562 use crate::expressions::{Cast, DataType, Literal};
36563 match expr {
36564 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Date(_)) => {
36565 let Literal::Date(s) = lit.as_ref() else {
36566 unreachable!()
36567 };
36568 Expression::Cast(Box::new(Cast {
36569 this: Expression::Literal(Box::new(Literal::String(s.clone()))),
36570 to: DataType::Date,
36571 trailing_comments: vec![],
36572 double_colon_syntax: false,
36573 format: None,
36574 default: None,
36575 inferred_type: None,
36576 }))
36577 }
36578 other => other,
36579 }
36580 }
36581
36582 /// Ensure an expression that should be a date is CAST(... AS DATE).
36583 /// Handles both DATE literals and string literals that look like dates.
36584 fn ensure_cast_date(expr: Expression) -> Expression {
36585 use crate::expressions::{Cast, DataType, Literal};
36586 match expr {
36587 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Date(_)) => {
36588 let Literal::Date(s) = lit.as_ref() else {
36589 unreachable!()
36590 };
36591 Expression::Cast(Box::new(Cast {
36592 this: Expression::Literal(Box::new(Literal::String(s.clone()))),
36593 to: DataType::Date,
36594 trailing_comments: vec![],
36595 double_colon_syntax: false,
36596 format: None,
36597 default: None,
36598 inferred_type: None,
36599 }))
36600 }
36601 Expression::Literal(ref lit) if matches!(lit.as_ref(), Literal::String(ref _s)) => {
36602 // String literal that should be a date -> CAST('s' AS DATE)
36603 Expression::Cast(Box::new(Cast {
36604 this: expr,
36605 to: DataType::Date,
36606 trailing_comments: vec![],
36607 double_colon_syntax: false,
36608 format: None,
36609 default: None,
36610 inferred_type: None,
36611 }))
36612 }
36613 // Already a CAST or other expression -> leave as-is
36614 other => other,
36615 }
36616 }
36617
36618 /// Force CAST(expr AS DATE) for any expression (not just literals)
36619 /// Skips if the expression is already a CAST to DATE
36620 fn force_cast_date(expr: Expression) -> Expression {
36621 use crate::expressions::{Cast, DataType};
36622 // If it's already a CAST to DATE, don't double-wrap
36623 if let Expression::Cast(ref c) = expr {
36624 if matches!(c.to, DataType::Date) {
36625 return expr;
36626 }
36627 }
36628 Expression::Cast(Box::new(Cast {
36629 this: expr,
36630 to: DataType::Date,
36631 trailing_comments: vec![],
36632 double_colon_syntax: false,
36633 format: None,
36634 default: None,
36635 inferred_type: None,
36636 }))
36637 }
36638
36639 /// Internal TO_DATE function that won't be converted to CAST by the Snowflake handler.
36640 /// Uses the name `_POLYGLOT_TO_DATE` which is not recognized by the TO_DATE -> CAST logic.
36641 /// The Snowflake DATEDIFF handler converts these back to TO_DATE.
36642 const PRESERVED_TO_DATE: &'static str = "_POLYGLOT_TO_DATE";
36643
36644 fn ensure_to_date_preserved(expr: Expression) -> Expression {
36645 use crate::expressions::{Function, Literal};
36646 if matches!(expr, Expression::Literal(ref lit) if matches!(lit.as_ref(), Literal::String(_)))
36647 {
36648 Expression::Function(Box::new(Function::new(
36649 Self::PRESERVED_TO_DATE.to_string(),
36650 vec![expr],
36651 )))
36652 } else {
36653 expr
36654 }
36655 }
36656
36657 /// TRY_CAST(expr AS DATE) - used for DuckDB when TO_DATE is unwrapped
36658 fn try_cast_date(expr: Expression) -> Expression {
36659 use crate::expressions::{Cast, DataType};
36660 Expression::TryCast(Box::new(Cast {
36661 this: expr,
36662 to: DataType::Date,
36663 trailing_comments: vec![],
36664 double_colon_syntax: false,
36665 format: None,
36666 default: None,
36667 inferred_type: None,
36668 }))
36669 }
36670
36671 /// CAST(CAST(expr AS TIMESTAMP) AS DATE) - used when Hive string dates need to be cast
36672 fn double_cast_timestamp_date(expr: Expression) -> Expression {
36673 use crate::expressions::{Cast, DataType};
36674 let inner = Expression::Cast(Box::new(Cast {
36675 this: expr,
36676 to: DataType::Timestamp {
36677 timezone: false,
36678 precision: None,
36679 },
36680 trailing_comments: vec![],
36681 double_colon_syntax: false,
36682 format: None,
36683 default: None,
36684 inferred_type: None,
36685 }));
36686 Expression::Cast(Box::new(Cast {
36687 this: inner,
36688 to: DataType::Date,
36689 trailing_comments: vec![],
36690 double_colon_syntax: false,
36691 format: None,
36692 default: None,
36693 inferred_type: None,
36694 }))
36695 }
36696
36697 /// CAST(CAST(expr AS DATETIME) AS DATE) - BigQuery variant
36698 fn double_cast_datetime_date(expr: Expression) -> Expression {
36699 use crate::expressions::{Cast, DataType};
36700 let inner = Expression::Cast(Box::new(Cast {
36701 this: expr,
36702 to: DataType::Custom {
36703 name: "DATETIME".to_string(),
36704 },
36705 trailing_comments: vec![],
36706 double_colon_syntax: false,
36707 format: None,
36708 default: None,
36709 inferred_type: None,
36710 }));
36711 Expression::Cast(Box::new(Cast {
36712 this: inner,
36713 to: DataType::Date,
36714 trailing_comments: vec![],
36715 double_colon_syntax: false,
36716 format: None,
36717 default: None,
36718 inferred_type: None,
36719 }))
36720 }
36721
36722 /// CAST(CAST(expr AS DATETIME2) AS DATE) - TSQL variant
36723 fn double_cast_datetime2_date(expr: Expression) -> Expression {
36724 use crate::expressions::{Cast, DataType};
36725 let inner = Expression::Cast(Box::new(Cast {
36726 this: expr,
36727 to: DataType::Custom {
36728 name: "DATETIME2".to_string(),
36729 },
36730 trailing_comments: vec![],
36731 double_colon_syntax: false,
36732 format: None,
36733 default: None,
36734 inferred_type: None,
36735 }));
36736 Expression::Cast(Box::new(Cast {
36737 this: inner,
36738 to: DataType::Date,
36739 trailing_comments: vec![],
36740 double_colon_syntax: false,
36741 format: None,
36742 default: None,
36743 inferred_type: None,
36744 }))
36745 }
36746
36747 /// Convert Hive/Java-style date format strings to C-style (strftime) format
36748 /// e.g., "yyyy-MM-dd'T'HH" -> "%Y-%m-%d'T'%H"
36749 fn hive_format_to_c_format(fmt: &str) -> String {
36750 let mut result = String::new();
36751 let chars: Vec<char> = fmt.chars().collect();
36752 let mut i = 0;
36753 while i < chars.len() {
36754 match chars[i] {
36755 'y' => {
36756 let mut count = 0;
36757 while i < chars.len() && chars[i] == 'y' {
36758 count += 1;
36759 i += 1;
36760 }
36761 if count >= 4 {
36762 result.push_str("%Y");
36763 } else if count == 2 {
36764 result.push_str("%y");
36765 } else {
36766 result.push_str("%Y");
36767 }
36768 }
36769 'M' => {
36770 let mut count = 0;
36771 while i < chars.len() && chars[i] == 'M' {
36772 count += 1;
36773 i += 1;
36774 }
36775 if count >= 3 {
36776 result.push_str("%b");
36777 } else if count == 2 {
36778 result.push_str("%m");
36779 } else {
36780 result.push_str("%m");
36781 }
36782 }
36783 'd' => {
36784 let mut _count = 0;
36785 while i < chars.len() && chars[i] == 'd' {
36786 _count += 1;
36787 i += 1;
36788 }
36789 result.push_str("%d");
36790 }
36791 'H' => {
36792 let mut _count = 0;
36793 while i < chars.len() && chars[i] == 'H' {
36794 _count += 1;
36795 i += 1;
36796 }
36797 result.push_str("%H");
36798 }
36799 'h' => {
36800 let mut _count = 0;
36801 while i < chars.len() && chars[i] == 'h' {
36802 _count += 1;
36803 i += 1;
36804 }
36805 result.push_str("%I");
36806 }
36807 'm' => {
36808 let mut _count = 0;
36809 while i < chars.len() && chars[i] == 'm' {
36810 _count += 1;
36811 i += 1;
36812 }
36813 result.push_str("%M");
36814 }
36815 's' => {
36816 let mut _count = 0;
36817 while i < chars.len() && chars[i] == 's' {
36818 _count += 1;
36819 i += 1;
36820 }
36821 result.push_str("%S");
36822 }
36823 'S' => {
36824 // Fractional seconds - skip
36825 while i < chars.len() && chars[i] == 'S' {
36826 i += 1;
36827 }
36828 result.push_str("%f");
36829 }
36830 'a' => {
36831 // AM/PM
36832 while i < chars.len() && chars[i] == 'a' {
36833 i += 1;
36834 }
36835 result.push_str("%p");
36836 }
36837 'E' => {
36838 let mut count = 0;
36839 while i < chars.len() && chars[i] == 'E' {
36840 count += 1;
36841 i += 1;
36842 }
36843 if count >= 4 {
36844 result.push_str("%A");
36845 } else {
36846 result.push_str("%a");
36847 }
36848 }
36849 '\'' => {
36850 // Quoted literal text - pass through the quotes and content
36851 result.push('\'');
36852 i += 1;
36853 while i < chars.len() && chars[i] != '\'' {
36854 result.push(chars[i]);
36855 i += 1;
36856 }
36857 if i < chars.len() {
36858 result.push('\'');
36859 i += 1;
36860 }
36861 }
36862 c => {
36863 result.push(c);
36864 i += 1;
36865 }
36866 }
36867 }
36868 result
36869 }
36870
36871 /// Convert Hive/Java format to Presto format (uses %T for HH:mm:ss)
36872 fn hive_format_to_presto_format(fmt: &str) -> String {
36873 let c_fmt = Self::hive_format_to_c_format(fmt);
36874 // Presto uses %T for HH:MM:SS
36875 c_fmt.replace("%H:%M:%S", "%T")
36876 }
36877
36878 /// Ensure a timestamp-like expression for DuckDB with CAST(... AS TIMESTAMP)
36879 fn ensure_cast_timestamp(expr: Expression) -> Expression {
36880 use crate::expressions::{Cast, DataType, Literal};
36881 match expr {
36882 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Timestamp(_)) => {
36883 let Literal::Timestamp(s) = lit.as_ref() else {
36884 unreachable!()
36885 };
36886 Expression::Cast(Box::new(Cast {
36887 this: Expression::Literal(Box::new(Literal::String(s.clone()))),
36888 to: DataType::Timestamp {
36889 timezone: false,
36890 precision: None,
36891 },
36892 trailing_comments: vec![],
36893 double_colon_syntax: false,
36894 format: None,
36895 default: None,
36896 inferred_type: None,
36897 }))
36898 }
36899 Expression::Literal(ref lit) if matches!(lit.as_ref(), Literal::String(ref _s)) => {
36900 Expression::Cast(Box::new(Cast {
36901 this: expr,
36902 to: DataType::Timestamp {
36903 timezone: false,
36904 precision: None,
36905 },
36906 trailing_comments: vec![],
36907 double_colon_syntax: false,
36908 format: None,
36909 default: None,
36910 inferred_type: None,
36911 }))
36912 }
36913 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Datetime(_)) => {
36914 let Literal::Datetime(s) = lit.as_ref() else {
36915 unreachable!()
36916 };
36917 Expression::Cast(Box::new(Cast {
36918 this: Expression::Literal(Box::new(Literal::String(s.clone()))),
36919 to: DataType::Timestamp {
36920 timezone: false,
36921 precision: None,
36922 },
36923 trailing_comments: vec![],
36924 double_colon_syntax: false,
36925 format: None,
36926 default: None,
36927 inferred_type: None,
36928 }))
36929 }
36930 other => other,
36931 }
36932 }
36933
36934 /// Force CAST to TIMESTAMP for any expression (not just literals)
36935 /// Used when transpiling from Redshift/TSQL where DATEDIFF/DATEADD args need explicit timestamp cast
36936 fn force_cast_timestamp(expr: Expression) -> Expression {
36937 use crate::expressions::{Cast, DataType};
36938 // Don't double-wrap if already a CAST to TIMESTAMP
36939 if let Expression::Cast(ref c) = expr {
36940 if matches!(c.to, DataType::Timestamp { .. }) {
36941 return expr;
36942 }
36943 }
36944 Expression::Cast(Box::new(Cast {
36945 this: expr,
36946 to: DataType::Timestamp {
36947 timezone: false,
36948 precision: None,
36949 },
36950 trailing_comments: vec![],
36951 double_colon_syntax: false,
36952 format: None,
36953 default: None,
36954 inferred_type: None,
36955 }))
36956 }
36957
36958 /// Ensure a timestamp-like expression for DuckDB with CAST(... AS TIMESTAMPTZ)
36959 fn ensure_cast_timestamptz(expr: Expression) -> Expression {
36960 use crate::expressions::{Cast, DataType, Literal};
36961 match expr {
36962 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Timestamp(_)) => {
36963 let Literal::Timestamp(s) = lit.as_ref() else {
36964 unreachable!()
36965 };
36966 Expression::Cast(Box::new(Cast {
36967 this: Expression::Literal(Box::new(Literal::String(s.clone()))),
36968 to: DataType::Timestamp {
36969 timezone: true,
36970 precision: None,
36971 },
36972 trailing_comments: vec![],
36973 double_colon_syntax: false,
36974 format: None,
36975 default: None,
36976 inferred_type: None,
36977 }))
36978 }
36979 Expression::Literal(ref lit) if matches!(lit.as_ref(), Literal::String(ref _s)) => {
36980 Expression::Cast(Box::new(Cast {
36981 this: expr,
36982 to: DataType::Timestamp {
36983 timezone: true,
36984 precision: None,
36985 },
36986 trailing_comments: vec![],
36987 double_colon_syntax: false,
36988 format: None,
36989 default: None,
36990 inferred_type: None,
36991 }))
36992 }
36993 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Datetime(_)) => {
36994 let Literal::Datetime(s) = lit.as_ref() else {
36995 unreachable!()
36996 };
36997 Expression::Cast(Box::new(Cast {
36998 this: Expression::Literal(Box::new(Literal::String(s.clone()))),
36999 to: DataType::Timestamp {
37000 timezone: true,
37001 precision: None,
37002 },
37003 trailing_comments: vec![],
37004 double_colon_syntax: false,
37005 format: None,
37006 default: None,
37007 inferred_type: None,
37008 }))
37009 }
37010 other => other,
37011 }
37012 }
37013
37014 /// Ensure expression is CAST to DATETIME (for BigQuery)
37015 fn ensure_cast_datetime(expr: Expression) -> Expression {
37016 use crate::expressions::{Cast, DataType, Literal};
37017 match expr {
37018 Expression::Literal(ref lit) if matches!(lit.as_ref(), Literal::String(ref _s)) => {
37019 Expression::Cast(Box::new(Cast {
37020 this: expr,
37021 to: DataType::Custom {
37022 name: "DATETIME".to_string(),
37023 },
37024 trailing_comments: vec![],
37025 double_colon_syntax: false,
37026 format: None,
37027 default: None,
37028 inferred_type: None,
37029 }))
37030 }
37031 other => other,
37032 }
37033 }
37034
37035 /// Force CAST expression to DATETIME (for BigQuery) - always wraps unless already DATETIME
37036 fn force_cast_datetime(expr: Expression) -> Expression {
37037 use crate::expressions::{Cast, DataType};
37038 if let Expression::Cast(ref c) = expr {
37039 if let DataType::Custom { ref name } = c.to {
37040 if name.eq_ignore_ascii_case("DATETIME") {
37041 return expr;
37042 }
37043 }
37044 }
37045 Expression::Cast(Box::new(Cast {
37046 this: expr,
37047 to: DataType::Custom {
37048 name: "DATETIME".to_string(),
37049 },
37050 trailing_comments: vec![],
37051 double_colon_syntax: false,
37052 format: None,
37053 default: None,
37054 inferred_type: None,
37055 }))
37056 }
37057
37058 /// Ensure expression is CAST to DATETIME2 (for TSQL)
37059 fn ensure_cast_datetime2(expr: Expression) -> Expression {
37060 use crate::expressions::{Cast, DataType, Literal};
37061 match expr {
37062 Expression::Literal(ref lit) if matches!(lit.as_ref(), Literal::String(ref _s)) => {
37063 Expression::Cast(Box::new(Cast {
37064 this: expr,
37065 to: DataType::Custom {
37066 name: "DATETIME2".to_string(),
37067 },
37068 trailing_comments: vec![],
37069 double_colon_syntax: false,
37070 format: None,
37071 default: None,
37072 inferred_type: None,
37073 }))
37074 }
37075 other => other,
37076 }
37077 }
37078
37079 /// Convert TIMESTAMP 'x' literal to CAST('x' AS TIMESTAMPTZ) for DuckDB
37080 fn ts_literal_to_cast_tz(expr: Expression) -> Expression {
37081 use crate::expressions::{Cast, DataType, Literal};
37082 match expr {
37083 Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Timestamp(_)) => {
37084 let Literal::Timestamp(s) = lit.as_ref() else {
37085 unreachable!()
37086 };
37087 Expression::Cast(Box::new(Cast {
37088 this: Expression::Literal(Box::new(Literal::String(s.clone()))),
37089 to: DataType::Timestamp {
37090 timezone: true,
37091 precision: None,
37092 },
37093 trailing_comments: vec![],
37094 double_colon_syntax: false,
37095 format: None,
37096 default: None,
37097 inferred_type: None,
37098 }))
37099 }
37100 other => other,
37101 }
37102 }
37103
37104 /// Convert BigQuery format string to Snowflake format string
37105 fn bq_format_to_snowflake(format_expr: &Expression) -> Expression {
37106 use crate::expressions::Literal;
37107 if let Expression::Literal(lit) = format_expr {
37108 if let Literal::String(s) = lit.as_ref() {
37109 let sf = s
37110 .replace("%Y", "yyyy")
37111 .replace("%m", "mm")
37112 .replace("%d", "DD")
37113 .replace("%H", "HH24")
37114 .replace("%M", "MI")
37115 .replace("%S", "SS")
37116 .replace("%b", "mon")
37117 .replace("%B", "Month")
37118 .replace("%e", "FMDD");
37119 Expression::Literal(Box::new(Literal::String(sf)))
37120 } else {
37121 format_expr.clone()
37122 }
37123 } else {
37124 format_expr.clone()
37125 }
37126 }
37127
37128 /// Convert BigQuery format string to DuckDB format string
37129 fn bq_format_to_duckdb(format_expr: &Expression) -> Expression {
37130 use crate::expressions::Literal;
37131 if let Expression::Literal(lit) = format_expr {
37132 if let Literal::String(s) = lit.as_ref() {
37133 let duck = s
37134 .replace("%T", "%H:%M:%S")
37135 .replace("%F", "%Y-%m-%d")
37136 .replace("%D", "%m/%d/%y")
37137 .replace("%x", "%m/%d/%y")
37138 .replace("%c", "%a %b %-d %H:%M:%S %Y")
37139 .replace("%e", "%-d")
37140 .replace("%E6S", "%S.%f");
37141 Expression::Literal(Box::new(Literal::String(duck)))
37142 } else {
37143 format_expr.clone()
37144 }
37145 } else {
37146 format_expr.clone()
37147 }
37148 }
37149
37150 /// Convert BigQuery CAST FORMAT elements (like YYYY, MM, DD) to strftime (like %Y, %m, %d)
37151 fn bq_cast_format_to_strftime(format_expr: &Expression) -> Expression {
37152 use crate::expressions::Literal;
37153 if let Expression::Literal(lit) = format_expr {
37154 if let Literal::String(s) = lit.as_ref() {
37155 // Replace format elements from longest to shortest to avoid partial matches
37156 let result = s
37157 .replace("YYYYMMDD", "%Y%m%d")
37158 .replace("YYYY", "%Y")
37159 .replace("YY", "%y")
37160 .replace("MONTH", "%B")
37161 .replace("MON", "%b")
37162 .replace("MM", "%m")
37163 .replace("DD", "%d")
37164 .replace("HH24", "%H")
37165 .replace("HH12", "%I")
37166 .replace("HH", "%I")
37167 .replace("MI", "%M")
37168 .replace("SSTZH", "%S%z")
37169 .replace("SS", "%S")
37170 .replace("TZH", "%z");
37171 Expression::Literal(Box::new(Literal::String(result)))
37172 } else {
37173 format_expr.clone()
37174 }
37175 } else {
37176 format_expr.clone()
37177 }
37178 }
37179
37180 /// Normalize BigQuery format strings for BQ->BQ output
37181 fn bq_format_normalize_bq(format_expr: &Expression) -> Expression {
37182 use crate::expressions::Literal;
37183 if let Expression::Literal(lit) = format_expr {
37184 if let Literal::String(s) = lit.as_ref() {
37185 let norm = s.replace("%H:%M:%S", "%T").replace("%x", "%D");
37186 Expression::Literal(Box::new(Literal::String(norm)))
37187 } else {
37188 format_expr.clone()
37189 }
37190 } else {
37191 format_expr.clone()
37192 }
37193 }
37194}
37195
37196#[cfg(test)]
37197mod tests {
37198 use super::*;
37199
37200 #[test]
37201 fn test_dialect_type_from_str() {
37202 assert_eq!(
37203 "postgres".parse::<DialectType>().unwrap(),
37204 DialectType::PostgreSQL
37205 );
37206 assert_eq!(
37207 "postgresql".parse::<DialectType>().unwrap(),
37208 DialectType::PostgreSQL
37209 );
37210 assert_eq!("mysql".parse::<DialectType>().unwrap(), DialectType::MySQL);
37211 assert_eq!(
37212 "bigquery".parse::<DialectType>().unwrap(),
37213 DialectType::BigQuery
37214 );
37215 }
37216
37217 #[test]
37218 fn test_basic_transpile() {
37219 let dialect = Dialect::get(DialectType::Generic);
37220 let result = dialect
37221 .transpile("SELECT 1", DialectType::PostgreSQL)
37222 .unwrap();
37223 assert_eq!(result.len(), 1);
37224 assert_eq!(result[0], "SELECT 1");
37225 }
37226
37227 #[test]
37228 fn test_function_transformation_mysql() {
37229 // NVL should be transformed to IFNULL in MySQL
37230 let dialect = Dialect::get(DialectType::Generic);
37231 let result = dialect
37232 .transpile("SELECT NVL(a, b)", DialectType::MySQL)
37233 .unwrap();
37234 assert_eq!(result[0], "SELECT IFNULL(a, b)");
37235 }
37236
37237 #[test]
37238 fn test_get_path_duckdb() {
37239 // Test: step by step
37240 let snowflake = Dialect::get(DialectType::Snowflake);
37241
37242 // Step 1: Parse and check what Snowflake produces as intermediate
37243 let result_sf_sf = snowflake
37244 .transpile(
37245 "SELECT PARSE_JSON('{\"fruit\":\"banana\"}'):fruit",
37246 DialectType::Snowflake,
37247 )
37248 .unwrap();
37249 eprintln!("Snowflake->Snowflake colon: {}", result_sf_sf[0]);
37250
37251 // Step 2: DuckDB target
37252 let result_sf_dk = snowflake
37253 .transpile(
37254 "SELECT PARSE_JSON('{\"fruit\":\"banana\"}'):fruit",
37255 DialectType::DuckDB,
37256 )
37257 .unwrap();
37258 eprintln!("Snowflake->DuckDB colon: {}", result_sf_dk[0]);
37259
37260 // Step 3: GET_PATH directly
37261 let result_gp = snowflake
37262 .transpile(
37263 "SELECT GET_PATH(PARSE_JSON('{\"fruit\":\"banana\"}'), 'fruit')",
37264 DialectType::DuckDB,
37265 )
37266 .unwrap();
37267 eprintln!("Snowflake->DuckDB explicit GET_PATH: {}", result_gp[0]);
37268 }
37269
37270 #[test]
37271 fn test_function_transformation_postgres() {
37272 // IFNULL should be transformed to COALESCE in PostgreSQL
37273 let dialect = Dialect::get(DialectType::Generic);
37274 let result = dialect
37275 .transpile("SELECT IFNULL(a, b)", DialectType::PostgreSQL)
37276 .unwrap();
37277 assert_eq!(result[0], "SELECT COALESCE(a, b)");
37278
37279 // NVL should also be transformed to COALESCE
37280 let result = dialect
37281 .transpile("SELECT NVL(a, b)", DialectType::PostgreSQL)
37282 .unwrap();
37283 assert_eq!(result[0], "SELECT COALESCE(a, b)");
37284 }
37285
37286 #[test]
37287 fn test_hive_cast_to_trycast() {
37288 // Hive CAST should become TRY_CAST for targets that support it
37289 let hive = Dialect::get(DialectType::Hive);
37290 let result = hive
37291 .transpile("CAST(1 AS INT)", DialectType::DuckDB)
37292 .unwrap();
37293 assert_eq!(result[0], "TRY_CAST(1 AS INT)");
37294
37295 let result = hive
37296 .transpile("CAST(1 AS INT)", DialectType::Presto)
37297 .unwrap();
37298 assert_eq!(result[0], "TRY_CAST(1 AS INTEGER)");
37299 }
37300
37301 #[test]
37302 fn test_hive_array_identity() {
37303 // Hive ARRAY<DATE> should preserve angle bracket syntax
37304 let sql = "CREATE EXTERNAL TABLE `my_table` (`a7` ARRAY<DATE>) ROW FORMAT SERDE 'a' STORED AS INPUTFORMAT 'b' OUTPUTFORMAT 'c' LOCATION 'd' TBLPROPERTIES ('e'='f')";
37305 let hive = Dialect::get(DialectType::Hive);
37306
37307 // Test via transpile (this works)
37308 let result = hive.transpile(sql, DialectType::Hive).unwrap();
37309 eprintln!("Hive ARRAY via transpile: {}", result[0]);
37310 assert!(
37311 result[0].contains("ARRAY<DATE>"),
37312 "transpile: Expected ARRAY<DATE>, got: {}",
37313 result[0]
37314 );
37315
37316 // Test via parse -> transform -> generate (identity test path)
37317 let ast = hive.parse(sql).unwrap();
37318 let transformed = hive.transform(ast[0].clone()).unwrap();
37319 let output = hive.generate(&transformed).unwrap();
37320 eprintln!("Hive ARRAY via identity path: {}", output);
37321 assert!(
37322 output.contains("ARRAY<DATE>"),
37323 "identity path: Expected ARRAY<DATE>, got: {}",
37324 output
37325 );
37326 }
37327
37328 #[test]
37329 fn test_starrocks_delete_between_expansion() {
37330 // StarRocks doesn't support BETWEEN in DELETE statements
37331 let dialect = Dialect::get(DialectType::Generic);
37332
37333 // BETWEEN should be expanded to >= AND <= in DELETE
37334 let result = dialect
37335 .transpile(
37336 "DELETE FROM t WHERE a BETWEEN b AND c",
37337 DialectType::StarRocks,
37338 )
37339 .unwrap();
37340 assert_eq!(result[0], "DELETE FROM t WHERE a >= b AND a <= c");
37341
37342 // NOT BETWEEN should be expanded to < OR > in DELETE
37343 let result = dialect
37344 .transpile(
37345 "DELETE FROM t WHERE a NOT BETWEEN b AND c",
37346 DialectType::StarRocks,
37347 )
37348 .unwrap();
37349 assert_eq!(result[0], "DELETE FROM t WHERE a < b OR a > c");
37350
37351 // BETWEEN in SELECT should NOT be expanded (StarRocks supports it there)
37352 let result = dialect
37353 .transpile(
37354 "SELECT * FROM t WHERE a BETWEEN b AND c",
37355 DialectType::StarRocks,
37356 )
37357 .unwrap();
37358 assert!(
37359 result[0].contains("BETWEEN"),
37360 "BETWEEN should be preserved in SELECT"
37361 );
37362 }
37363
37364 #[test]
37365 fn test_snowflake_ltrim_rtrim_parse() {
37366 let sf = Dialect::get(DialectType::Snowflake);
37367 let sql = "SELECT LTRIM(RTRIM(col)) FROM t1";
37368 let result = sf.transpile(sql, DialectType::DuckDB);
37369 match &result {
37370 Ok(r) => eprintln!("LTRIM/RTRIM result: {}", r[0]),
37371 Err(e) => eprintln!("LTRIM/RTRIM error: {}", e),
37372 }
37373 assert!(
37374 result.is_ok(),
37375 "Expected successful parse of LTRIM(RTRIM(col)), got error: {:?}",
37376 result.err()
37377 );
37378 }
37379
37380 #[test]
37381 fn test_duckdb_count_if_parse() {
37382 let duck = Dialect::get(DialectType::DuckDB);
37383 let sql = "COUNT_IF(x)";
37384 let result = duck.transpile(sql, DialectType::DuckDB);
37385 match &result {
37386 Ok(r) => eprintln!("COUNT_IF result: {}", r[0]),
37387 Err(e) => eprintln!("COUNT_IF error: {}", e),
37388 }
37389 assert!(
37390 result.is_ok(),
37391 "Expected successful parse of COUNT_IF(x), got error: {:?}",
37392 result.err()
37393 );
37394 }
37395
37396 #[test]
37397 fn test_tsql_cast_tinyint_parse() {
37398 let tsql = Dialect::get(DialectType::TSQL);
37399 let sql = "CAST(X AS TINYINT)";
37400 let result = tsql.transpile(sql, DialectType::DuckDB);
37401 match &result {
37402 Ok(r) => eprintln!("TSQL CAST TINYINT result: {}", r[0]),
37403 Err(e) => eprintln!("TSQL CAST TINYINT error: {}", e),
37404 }
37405 assert!(
37406 result.is_ok(),
37407 "Expected successful transpile, got error: {:?}",
37408 result.err()
37409 );
37410 }
37411
37412 #[test]
37413 fn test_pg_hash_bitwise_xor() {
37414 let dialect = Dialect::get(DialectType::PostgreSQL);
37415 let result = dialect.transpile("x # y", DialectType::PostgreSQL).unwrap();
37416 assert_eq!(result[0], "x # y");
37417 }
37418
37419 #[test]
37420 fn test_pg_array_to_duckdb() {
37421 let dialect = Dialect::get(DialectType::PostgreSQL);
37422 let result = dialect
37423 .transpile("SELECT ARRAY[1, 2, 3] @> ARRAY[1, 2]", DialectType::DuckDB)
37424 .unwrap();
37425 assert_eq!(result[0], "SELECT [1, 2, 3] @> [1, 2]");
37426 }
37427
37428 #[test]
37429 fn test_array_remove_bigquery() {
37430 let dialect = Dialect::get(DialectType::Generic);
37431 let result = dialect
37432 .transpile("ARRAY_REMOVE(the_array, target)", DialectType::BigQuery)
37433 .unwrap();
37434 assert_eq!(
37435 result[0],
37436 "ARRAY(SELECT _u FROM UNNEST(the_array) AS _u WHERE _u <> target)"
37437 );
37438 }
37439
37440 #[test]
37441 fn test_map_clickhouse_case() {
37442 let dialect = Dialect::get(DialectType::Generic);
37443 let parsed = dialect
37444 .parse("CAST(MAP('a', '1') AS MAP(TEXT, TEXT))")
37445 .unwrap();
37446 eprintln!("MAP parsed: {:?}", parsed);
37447 let result = dialect
37448 .transpile(
37449 "CAST(MAP('a', '1') AS MAP(TEXT, TEXT))",
37450 DialectType::ClickHouse,
37451 )
37452 .unwrap();
37453 eprintln!("MAP result: {}", result[0]);
37454 }
37455
37456 #[test]
37457 fn test_generate_date_array_presto() {
37458 let dialect = Dialect::get(DialectType::Generic);
37459 let result = dialect.transpile(
37460 "SELECT * FROM UNNEST(GENERATE_DATE_ARRAY(DATE '2020-01-01', DATE '2020-02-01', INTERVAL 1 WEEK))",
37461 DialectType::Presto,
37462 ).unwrap();
37463 eprintln!("GDA -> Presto: {}", result[0]);
37464 assert_eq!(result[0], "SELECT * FROM UNNEST(SEQUENCE(CAST('2020-01-01' AS DATE), CAST('2020-02-01' AS DATE), (1 * INTERVAL '7' DAY)))");
37465 }
37466
37467 #[test]
37468 fn test_generate_date_array_postgres() {
37469 let dialect = Dialect::get(DialectType::Generic);
37470 let result = dialect.transpile(
37471 "SELECT * FROM UNNEST(GENERATE_DATE_ARRAY(DATE '2020-01-01', DATE '2020-02-01', INTERVAL 1 WEEK))",
37472 DialectType::PostgreSQL,
37473 ).unwrap();
37474 eprintln!("GDA -> PostgreSQL: {}", result[0]);
37475 }
37476
37477 #[test]
37478 fn test_generate_date_array_snowflake() {
37479 let dialect = Dialect::get(DialectType::Generic);
37480 let result = dialect
37481 .transpile(
37482 "SELECT * FROM UNNEST(GENERATE_DATE_ARRAY(DATE '2020-01-01', DATE '2020-02-01', INTERVAL 1 WEEK))",
37483 DialectType::Snowflake,
37484 )
37485 .unwrap();
37486 eprintln!("GDA -> Snowflake: {}", result[0]);
37487 }
37488
37489 #[test]
37490 fn test_array_length_generate_date_array_snowflake() {
37491 let dialect = Dialect::get(DialectType::Generic);
37492 let result = dialect.transpile(
37493 "SELECT ARRAY_LENGTH(GENERATE_DATE_ARRAY(DATE '2020-01-01', DATE '2020-02-01', INTERVAL 1 WEEK))",
37494 DialectType::Snowflake,
37495 ).unwrap();
37496 eprintln!("ARRAY_LENGTH(GDA) -> Snowflake: {}", result[0]);
37497 }
37498
37499 #[test]
37500 fn test_generate_date_array_mysql() {
37501 let dialect = Dialect::get(DialectType::Generic);
37502 let result = dialect.transpile(
37503 "SELECT * FROM UNNEST(GENERATE_DATE_ARRAY(DATE '2020-01-01', DATE '2020-02-01', INTERVAL 1 WEEK))",
37504 DialectType::MySQL,
37505 ).unwrap();
37506 eprintln!("GDA -> MySQL: {}", result[0]);
37507 }
37508
37509 #[test]
37510 fn test_generate_date_array_redshift() {
37511 let dialect = Dialect::get(DialectType::Generic);
37512 let result = dialect.transpile(
37513 "SELECT * FROM UNNEST(GENERATE_DATE_ARRAY(DATE '2020-01-01', DATE '2020-02-01', INTERVAL 1 WEEK))",
37514 DialectType::Redshift,
37515 ).unwrap();
37516 eprintln!("GDA -> Redshift: {}", result[0]);
37517 }
37518
37519 #[test]
37520 fn test_generate_date_array_tsql() {
37521 let dialect = Dialect::get(DialectType::Generic);
37522 let result = dialect.transpile(
37523 "SELECT * FROM UNNEST(GENERATE_DATE_ARRAY(DATE '2020-01-01', DATE '2020-02-01', INTERVAL 1 WEEK))",
37524 DialectType::TSQL,
37525 ).unwrap();
37526 eprintln!("GDA -> TSQL: {}", result[0]);
37527 }
37528
37529 #[test]
37530 fn test_struct_colon_syntax() {
37531 let dialect = Dialect::get(DialectType::Generic);
37532 // Test without colon first
37533 let result = dialect.transpile(
37534 "CAST((1, 2, 3, 4) AS STRUCT<a TINYINT, b SMALLINT, c INT, d BIGINT>)",
37535 DialectType::ClickHouse,
37536 );
37537 match result {
37538 Ok(r) => eprintln!("STRUCT no colon -> ClickHouse: {}", r[0]),
37539 Err(e) => eprintln!("STRUCT no colon error: {}", e),
37540 }
37541 // Now test with colon
37542 let result = dialect.transpile(
37543 "CAST((1, 2, 3, 4) AS STRUCT<a: TINYINT, b: SMALLINT, c: INT, d: BIGINT>)",
37544 DialectType::ClickHouse,
37545 );
37546 match result {
37547 Ok(r) => eprintln!("STRUCT colon -> ClickHouse: {}", r[0]),
37548 Err(e) => eprintln!("STRUCT colon error: {}", e),
37549 }
37550 }
37551
37552 #[test]
37553 fn test_generate_date_array_cte_wrapped_mysql() {
37554 let dialect = Dialect::get(DialectType::Generic);
37555 let result = dialect.transpile(
37556 "WITH dates AS (SELECT * FROM UNNEST(GENERATE_DATE_ARRAY(DATE '2020-01-01', DATE '2020-02-01', INTERVAL 1 WEEK))) SELECT * FROM dates",
37557 DialectType::MySQL,
37558 ).unwrap();
37559 eprintln!("GDA CTE -> MySQL: {}", result[0]);
37560 }
37561
37562 #[test]
37563 fn test_generate_date_array_cte_wrapped_tsql() {
37564 let dialect = Dialect::get(DialectType::Generic);
37565 let result = dialect.transpile(
37566 "WITH dates AS (SELECT * FROM UNNEST(GENERATE_DATE_ARRAY(DATE '2020-01-01', DATE '2020-02-01', INTERVAL 1 WEEK))) SELECT * FROM dates",
37567 DialectType::TSQL,
37568 ).unwrap();
37569 eprintln!("GDA CTE -> TSQL: {}", result[0]);
37570 }
37571
37572 #[test]
37573 fn test_decode_literal_no_null_check() {
37574 // Oracle DECODE with all literals should produce simple equality, no IS NULL
37575 let dialect = Dialect::get(DialectType::Oracle);
37576 let result = dialect
37577 .transpile("SELECT decode(1,2,3,4)", DialectType::DuckDB)
37578 .unwrap();
37579 assert_eq!(
37580 result[0], "SELECT CASE WHEN 1 = 2 THEN 3 ELSE 4 END",
37581 "Literal DECODE should not have IS NULL checks"
37582 );
37583 }
37584
37585 #[test]
37586 fn test_decode_column_vs_literal_no_null_check() {
37587 // Oracle DECODE with column vs literal should use simple equality (like sqlglot)
37588 let dialect = Dialect::get(DialectType::Oracle);
37589 let result = dialect
37590 .transpile("SELECT decode(col, 2, 3, 4) FROM t", DialectType::DuckDB)
37591 .unwrap();
37592 assert_eq!(
37593 result[0], "SELECT CASE WHEN col = 2 THEN 3 ELSE 4 END FROM t",
37594 "Column vs literal DECODE should not have IS NULL checks"
37595 );
37596 }
37597
37598 #[test]
37599 fn test_decode_column_vs_column_keeps_null_check() {
37600 // Oracle DECODE with column vs column should keep null-safe comparison
37601 let dialect = Dialect::get(DialectType::Oracle);
37602 let result = dialect
37603 .transpile("SELECT decode(col, col2, 3, 4) FROM t", DialectType::DuckDB)
37604 .unwrap();
37605 assert!(
37606 result[0].contains("IS NULL"),
37607 "Column vs column DECODE should have IS NULL checks, got: {}",
37608 result[0]
37609 );
37610 }
37611
37612 #[test]
37613 fn test_decode_null_search() {
37614 // Oracle DECODE with NULL search should use IS NULL
37615 let dialect = Dialect::get(DialectType::Oracle);
37616 let result = dialect
37617 .transpile("SELECT decode(col, NULL, 3, 4) FROM t", DialectType::DuckDB)
37618 .unwrap();
37619 assert_eq!(
37620 result[0],
37621 "SELECT CASE WHEN col IS NULL THEN 3 ELSE 4 END FROM t",
37622 );
37623 }
37624
37625 // =========================================================================
37626 // REGEXP function transpilation tests
37627 // =========================================================================
37628
37629 #[test]
37630 fn test_regexp_substr_snowflake_to_duckdb_2arg() {
37631 let dialect = Dialect::get(DialectType::Snowflake);
37632 let result = dialect
37633 .transpile("SELECT REGEXP_SUBSTR(s, 'pattern')", DialectType::DuckDB)
37634 .unwrap();
37635 assert_eq!(result[0], "SELECT REGEXP_EXTRACT(s, 'pattern')");
37636 }
37637
37638 #[test]
37639 fn test_regexp_substr_snowflake_to_duckdb_3arg_pos1() {
37640 let dialect = Dialect::get(DialectType::Snowflake);
37641 let result = dialect
37642 .transpile("SELECT REGEXP_SUBSTR(s, 'pattern', 1)", DialectType::DuckDB)
37643 .unwrap();
37644 assert_eq!(result[0], "SELECT REGEXP_EXTRACT(s, 'pattern')");
37645 }
37646
37647 #[test]
37648 fn test_regexp_substr_snowflake_to_duckdb_3arg_pos_gt1() {
37649 let dialect = Dialect::get(DialectType::Snowflake);
37650 let result = dialect
37651 .transpile("SELECT REGEXP_SUBSTR(s, 'pattern', 3)", DialectType::DuckDB)
37652 .unwrap();
37653 assert_eq!(
37654 result[0],
37655 "SELECT REGEXP_EXTRACT(NULLIF(SUBSTRING(s, 3), ''), 'pattern')"
37656 );
37657 }
37658
37659 #[test]
37660 fn test_regexp_substr_snowflake_to_duckdb_4arg_occ_gt1() {
37661 let dialect = Dialect::get(DialectType::Snowflake);
37662 let result = dialect
37663 .transpile(
37664 "SELECT REGEXP_SUBSTR(s, 'pattern', 1, 3)",
37665 DialectType::DuckDB,
37666 )
37667 .unwrap();
37668 assert_eq!(
37669 result[0],
37670 "SELECT ARRAY_EXTRACT(REGEXP_EXTRACT_ALL(s, 'pattern'), 3)"
37671 );
37672 }
37673
37674 #[test]
37675 fn test_regexp_substr_snowflake_to_duckdb_5arg_e_flag() {
37676 let dialect = Dialect::get(DialectType::Snowflake);
37677 let result = dialect
37678 .transpile(
37679 "SELECT REGEXP_SUBSTR(s, 'pattern', 1, 1, 'e')",
37680 DialectType::DuckDB,
37681 )
37682 .unwrap();
37683 assert_eq!(result[0], "SELECT REGEXP_EXTRACT(s, 'pattern')");
37684 }
37685
37686 #[test]
37687 fn test_regexp_substr_snowflake_to_duckdb_6arg_group0() {
37688 let dialect = Dialect::get(DialectType::Snowflake);
37689 let result = dialect
37690 .transpile(
37691 "SELECT REGEXP_SUBSTR(s, 'pattern', 1, 1, 'e', 0)",
37692 DialectType::DuckDB,
37693 )
37694 .unwrap();
37695 assert_eq!(result[0], "SELECT REGEXP_EXTRACT(s, 'pattern')");
37696 }
37697
37698 #[test]
37699 fn test_regexp_substr_snowflake_identity_strip_group0() {
37700 let dialect = Dialect::get(DialectType::Snowflake);
37701 let result = dialect
37702 .transpile(
37703 "SELECT REGEXP_SUBSTR(s, 'pattern', 1, 1, 'e', 0)",
37704 DialectType::Snowflake,
37705 )
37706 .unwrap();
37707 assert_eq!(result[0], "SELECT REGEXP_SUBSTR(s, 'pattern', 1, 1, 'e')");
37708 }
37709
37710 #[test]
37711 fn test_regexp_substr_all_snowflake_to_duckdb_2arg() {
37712 let dialect = Dialect::get(DialectType::Snowflake);
37713 let result = dialect
37714 .transpile(
37715 "SELECT REGEXP_SUBSTR_ALL(s, 'pattern')",
37716 DialectType::DuckDB,
37717 )
37718 .unwrap();
37719 assert_eq!(result[0], "SELECT REGEXP_EXTRACT_ALL(s, 'pattern')");
37720 }
37721
37722 #[test]
37723 fn test_regexp_substr_all_snowflake_to_duckdb_3arg_pos_gt1() {
37724 let dialect = Dialect::get(DialectType::Snowflake);
37725 let result = dialect
37726 .transpile(
37727 "SELECT REGEXP_SUBSTR_ALL(s, 'pattern', 3)",
37728 DialectType::DuckDB,
37729 )
37730 .unwrap();
37731 assert_eq!(
37732 result[0],
37733 "SELECT REGEXP_EXTRACT_ALL(SUBSTRING(s, 3), 'pattern')"
37734 );
37735 }
37736
37737 #[test]
37738 fn test_regexp_substr_all_snowflake_to_duckdb_5arg_e_flag() {
37739 let dialect = Dialect::get(DialectType::Snowflake);
37740 let result = dialect
37741 .transpile(
37742 "SELECT REGEXP_SUBSTR_ALL(s, 'pattern', 1, 1, 'e')",
37743 DialectType::DuckDB,
37744 )
37745 .unwrap();
37746 assert_eq!(result[0], "SELECT REGEXP_EXTRACT_ALL(s, 'pattern')");
37747 }
37748
37749 #[test]
37750 fn test_regexp_substr_all_snowflake_to_duckdb_6arg_group0() {
37751 let dialect = Dialect::get(DialectType::Snowflake);
37752 let result = dialect
37753 .transpile(
37754 "SELECT REGEXP_SUBSTR_ALL(s, 'pattern', 1, 1, 'e', 0)",
37755 DialectType::DuckDB,
37756 )
37757 .unwrap();
37758 assert_eq!(result[0], "SELECT REGEXP_EXTRACT_ALL(s, 'pattern')");
37759 }
37760
37761 #[test]
37762 fn test_regexp_substr_all_snowflake_identity_strip_group0() {
37763 let dialect = Dialect::get(DialectType::Snowflake);
37764 let result = dialect
37765 .transpile(
37766 "SELECT REGEXP_SUBSTR_ALL(s, 'pattern', 1, 1, 'e', 0)",
37767 DialectType::Snowflake,
37768 )
37769 .unwrap();
37770 assert_eq!(
37771 result[0],
37772 "SELECT REGEXP_SUBSTR_ALL(s, 'pattern', 1, 1, 'e')"
37773 );
37774 }
37775
37776 #[test]
37777 fn test_regexp_count_snowflake_to_duckdb_2arg() {
37778 let dialect = Dialect::get(DialectType::Snowflake);
37779 let result = dialect
37780 .transpile("SELECT REGEXP_COUNT(s, 'pattern')", DialectType::DuckDB)
37781 .unwrap();
37782 assert_eq!(
37783 result[0],
37784 "SELECT CASE WHEN 'pattern' = '' THEN 0 ELSE LENGTH(REGEXP_EXTRACT_ALL(s, 'pattern')) END"
37785 );
37786 }
37787
37788 #[test]
37789 fn test_regexp_count_snowflake_to_duckdb_3arg() {
37790 let dialect = Dialect::get(DialectType::Snowflake);
37791 let result = dialect
37792 .transpile("SELECT REGEXP_COUNT(s, 'pattern', 3)", DialectType::DuckDB)
37793 .unwrap();
37794 assert_eq!(
37795 result[0],
37796 "SELECT CASE WHEN 'pattern' = '' THEN 0 ELSE LENGTH(REGEXP_EXTRACT_ALL(SUBSTRING(s, 3), 'pattern')) END"
37797 );
37798 }
37799
37800 #[test]
37801 fn test_regexp_count_snowflake_to_duckdb_4arg_flags() {
37802 let dialect = Dialect::get(DialectType::Snowflake);
37803 let result = dialect
37804 .transpile(
37805 "SELECT REGEXP_COUNT(s, 'pattern', 1, 'i')",
37806 DialectType::DuckDB,
37807 )
37808 .unwrap();
37809 assert_eq!(
37810 result[0],
37811 "SELECT CASE WHEN '(?i)' || 'pattern' = '' THEN 0 ELSE LENGTH(REGEXP_EXTRACT_ALL(SUBSTRING(s, 1), '(?i)' || 'pattern')) END"
37812 );
37813 }
37814
37815 #[test]
37816 fn test_regexp_count_snowflake_to_duckdb_4arg_flags_literal_string() {
37817 let dialect = Dialect::get(DialectType::Snowflake);
37818 let result = dialect
37819 .transpile(
37820 "SELECT REGEXP_COUNT('Hello World', 'L', 1, 'im')",
37821 DialectType::DuckDB,
37822 )
37823 .unwrap();
37824 assert_eq!(
37825 result[0],
37826 "SELECT CASE WHEN '(?im)' || 'L' = '' THEN 0 ELSE LENGTH(REGEXP_EXTRACT_ALL(SUBSTRING('Hello World', 1), '(?im)' || 'L')) END"
37827 );
37828 }
37829
37830 #[test]
37831 fn test_regexp_replace_snowflake_to_duckdb_5arg_pos1_occ1() {
37832 let dialect = Dialect::get(DialectType::Snowflake);
37833 let result = dialect
37834 .transpile(
37835 "SELECT REGEXP_REPLACE(s, 'pattern', 'repl', 1, 1)",
37836 DialectType::DuckDB,
37837 )
37838 .unwrap();
37839 assert_eq!(result[0], "SELECT REGEXP_REPLACE(s, 'pattern', 'repl')");
37840 }
37841
37842 #[test]
37843 fn test_regexp_replace_snowflake_to_duckdb_5arg_pos_gt1_occ0() {
37844 let dialect = Dialect::get(DialectType::Snowflake);
37845 let result = dialect
37846 .transpile(
37847 "SELECT REGEXP_REPLACE(s, 'pattern', 'repl', 3, 0)",
37848 DialectType::DuckDB,
37849 )
37850 .unwrap();
37851 assert_eq!(
37852 result[0],
37853 "SELECT SUBSTRING(s, 1, 2) || REGEXP_REPLACE(SUBSTRING(s, 3), 'pattern', 'repl', 'g')"
37854 );
37855 }
37856
37857 #[test]
37858 fn test_regexp_replace_snowflake_to_duckdb_5arg_pos_gt1_occ1() {
37859 let dialect = Dialect::get(DialectType::Snowflake);
37860 let result = dialect
37861 .transpile(
37862 "SELECT REGEXP_REPLACE(s, 'pattern', 'repl', 3, 1)",
37863 DialectType::DuckDB,
37864 )
37865 .unwrap();
37866 assert_eq!(
37867 result[0],
37868 "SELECT SUBSTRING(s, 1, 2) || REGEXP_REPLACE(SUBSTRING(s, 3), 'pattern', 'repl')"
37869 );
37870 }
37871
37872 #[test]
37873 fn test_rlike_snowflake_to_duckdb_2arg() {
37874 let dialect = Dialect::get(DialectType::Snowflake);
37875 let result = dialect
37876 .transpile("SELECT RLIKE(a, b)", DialectType::DuckDB)
37877 .unwrap();
37878 assert_eq!(result[0], "SELECT REGEXP_FULL_MATCH(a, b)");
37879 }
37880
37881 #[test]
37882 fn test_rlike_snowflake_to_duckdb_3arg_flags() {
37883 let dialect = Dialect::get(DialectType::Snowflake);
37884 let result = dialect
37885 .transpile("SELECT RLIKE(a, b, 'i')", DialectType::DuckDB)
37886 .unwrap();
37887 assert_eq!(result[0], "SELECT REGEXP_FULL_MATCH(a, b, 'i')");
37888 }
37889
37890 #[test]
37891 fn test_regexp_extract_all_bigquery_to_snowflake_no_capture() {
37892 let dialect = Dialect::get(DialectType::BigQuery);
37893 let result = dialect
37894 .transpile(
37895 "SELECT REGEXP_EXTRACT_ALL(s, 'pattern')",
37896 DialectType::Snowflake,
37897 )
37898 .unwrap();
37899 assert_eq!(result[0], "SELECT REGEXP_SUBSTR_ALL(s, 'pattern')");
37900 }
37901
37902 #[test]
37903 fn test_regexp_extract_all_bigquery_to_snowflake_with_capture() {
37904 let dialect = Dialect::get(DialectType::BigQuery);
37905 let result = dialect
37906 .transpile(
37907 "SELECT REGEXP_EXTRACT_ALL(s, '(a)[0-9]')",
37908 DialectType::Snowflake,
37909 )
37910 .unwrap();
37911 assert_eq!(
37912 result[0],
37913 "SELECT REGEXP_SUBSTR_ALL(s, '(a)[0-9]', 1, 1, 'c', 1)"
37914 );
37915 }
37916
37917 #[test]
37918 fn test_regexp_instr_snowflake_to_duckdb_2arg() {
37919 let dialect = Dialect::get(DialectType::Snowflake);
37920 let result = dialect
37921 .transpile("SELECT REGEXP_INSTR(s, 'pattern')", DialectType::DuckDB)
37922 .unwrap();
37923 assert!(
37924 result[0].contains("CASE WHEN"),
37925 "Expected CASE WHEN in result: {}",
37926 result[0]
37927 );
37928 assert!(
37929 result[0].contains("LIST_SUM"),
37930 "Expected LIST_SUM in result: {}",
37931 result[0]
37932 );
37933 }
37934
37935 #[test]
37936 fn test_array_except_generic_to_duckdb() {
37937 let dialect = Dialect::get(DialectType::Generic);
37938 let result = dialect
37939 .transpile(
37940 "SELECT ARRAY_EXCEPT(ARRAY(1, 2, 3), ARRAY(2))",
37941 DialectType::DuckDB,
37942 )
37943 .unwrap();
37944 eprintln!("ARRAY_EXCEPT Generic->DuckDB: {}", result[0]);
37945 assert!(
37946 result[0].contains("CASE WHEN"),
37947 "Expected CASE WHEN: {}",
37948 result[0]
37949 );
37950 assert!(
37951 result[0].contains("LIST_FILTER"),
37952 "Expected LIST_FILTER: {}",
37953 result[0]
37954 );
37955 assert!(
37956 result[0].contains("LIST_DISTINCT"),
37957 "Expected LIST_DISTINCT: {}",
37958 result[0]
37959 );
37960 assert!(
37961 result[0].contains("IS NOT DISTINCT FROM"),
37962 "Expected IS NOT DISTINCT FROM: {}",
37963 result[0]
37964 );
37965 assert!(result[0].contains("= 0"), "Expected = 0 filter: {}", result[0]);
37966 }
37967
37968 #[test]
37969 fn test_array_except_generic_to_snowflake() {
37970 let dialect = Dialect::get(DialectType::Generic);
37971 let result = dialect
37972 .transpile(
37973 "SELECT ARRAY_EXCEPT(ARRAY(1, 2, 3), ARRAY(2))",
37974 DialectType::Snowflake,
37975 )
37976 .unwrap();
37977 eprintln!("ARRAY_EXCEPT Generic->Snowflake: {}", result[0]);
37978 assert_eq!(result[0], "SELECT ARRAY_EXCEPT([1, 2, 3], [2])");
37979 }
37980
37981 #[test]
37982 fn test_array_except_generic_to_presto() {
37983 let dialect = Dialect::get(DialectType::Generic);
37984 let result = dialect
37985 .transpile(
37986 "SELECT ARRAY_EXCEPT(ARRAY(1, 2, 3), ARRAY(2))",
37987 DialectType::Presto,
37988 )
37989 .unwrap();
37990 eprintln!("ARRAY_EXCEPT Generic->Presto: {}", result[0]);
37991 assert_eq!(result[0], "SELECT ARRAY_EXCEPT(ARRAY[1, 2, 3], ARRAY[2])");
37992 }
37993
37994 #[test]
37995 fn test_array_except_snowflake_to_duckdb() {
37996 let dialect = Dialect::get(DialectType::Snowflake);
37997 let result = dialect
37998 .transpile("SELECT ARRAY_EXCEPT([1, 2, 3], [2])", DialectType::DuckDB)
37999 .unwrap();
38000 eprintln!("ARRAY_EXCEPT Snowflake->DuckDB: {}", result[0]);
38001 assert!(
38002 result[0].contains("CASE WHEN"),
38003 "Expected CASE WHEN: {}",
38004 result[0]
38005 );
38006 assert!(
38007 result[0].contains("LIST_TRANSFORM"),
38008 "Expected LIST_TRANSFORM: {}",
38009 result[0]
38010 );
38011 }
38012
38013 #[test]
38014 fn test_array_contains_snowflake_to_snowflake() {
38015 let dialect = Dialect::get(DialectType::Snowflake);
38016 let result = dialect
38017 .transpile(
38018 "SELECT ARRAY_CONTAINS(x, [1, NULL, 3])",
38019 DialectType::Snowflake,
38020 )
38021 .unwrap();
38022 eprintln!("ARRAY_CONTAINS Snowflake->Snowflake: {}", result[0]);
38023 assert_eq!(result[0], "SELECT ARRAY_CONTAINS(x, [1, NULL, 3])");
38024 }
38025
38026 #[test]
38027 fn test_array_contains_snowflake_to_duckdb() {
38028 let dialect = Dialect::get(DialectType::Snowflake);
38029 let result = dialect
38030 .transpile(
38031 "SELECT ARRAY_CONTAINS(x, [1, NULL, 3])",
38032 DialectType::DuckDB,
38033 )
38034 .unwrap();
38035 eprintln!("ARRAY_CONTAINS Snowflake->DuckDB: {}", result[0]);
38036 assert!(
38037 result[0].contains("CASE WHEN"),
38038 "Expected CASE WHEN: {}",
38039 result[0]
38040 );
38041 assert!(
38042 result[0].contains("NULLIF"),
38043 "Expected NULLIF: {}",
38044 result[0]
38045 );
38046 assert!(
38047 result[0].contains("ARRAY_CONTAINS"),
38048 "Expected ARRAY_CONTAINS: {}",
38049 result[0]
38050 );
38051 }
38052
38053 #[test]
38054 fn test_array_distinct_snowflake_to_duckdb() {
38055 let dialect = Dialect::get(DialectType::Snowflake);
38056 let result = dialect
38057 .transpile(
38058 "SELECT ARRAY_DISTINCT([1, 2, 2, 3, 1])",
38059 DialectType::DuckDB,
38060 )
38061 .unwrap();
38062 eprintln!("ARRAY_DISTINCT Snowflake->DuckDB: {}", result[0]);
38063 assert!(
38064 result[0].contains("CASE WHEN"),
38065 "Expected CASE WHEN: {}",
38066 result[0]
38067 );
38068 assert!(
38069 result[0].contains("LIST_DISTINCT"),
38070 "Expected LIST_DISTINCT: {}",
38071 result[0]
38072 );
38073 assert!(
38074 result[0].contains("LIST_APPEND"),
38075 "Expected LIST_APPEND: {}",
38076 result[0]
38077 );
38078 assert!(
38079 result[0].contains("LIST_FILTER"),
38080 "Expected LIST_FILTER: {}",
38081 result[0]
38082 );
38083 }
38084}